Coverage Report

Created: 2026-02-14 09:37

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libreoffice/svl/source/numbers/zforfind.hxx
Line
Count
Source
1
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2
/*
3
 * This file is part of the LibreOffice project.
4
 *
5
 * This Source Code Form is subject to the terms of the Mozilla Public
6
 * License, v. 2.0. If a copy of the MPL was not distributed with this
7
 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8
 *
9
 * This file incorporates work covered by the following license notice:
10
 *
11
 *   Licensed to the Apache Software Foundation (ASF) under one or more
12
 *   contributor license agreements. See the NOTICE file distributed
13
 *   with this work for additional information regarding copyright
14
 *   ownership. The ASF licenses this file to you under the Apache
15
 *   License, Version 2.0 (the "License"); you may not use this file
16
 *   except in compliance with the License. You may obtain a copy of
17
 *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
18
 */
19
20
#ifndef INCLUDED_SVL_SOURCE_NUMBERS_ZFORFIND_HXX
21
#define INCLUDED_SVL_SOURCE_NUMBERS_ZFORFIND_HXX
22
23
#include <com/sun/star/uno/Sequence.hxx>
24
#include <rtl/ustring.hxx>
25
#include <svl/zforlist.hxx>
26
#include <tools/date.hxx>
27
#include <unotools/nativenumberwrapper.hxx>
28
#include <memory>
29
#include <optional>
30
31
class SvNumberformat;
32
class SvNFLanguageData;
33
enum class SvNumFormatType : sal_Int16;
34
35
141M
#define SV_MAX_COUNT_INPUT_STRINGS  20    // max count of substrings in input scanner
36
37
class ImpSvNumberInputScan
38
{
39
public:
40
    explicit ImpSvNumberInputScan(SvNFLanguageData& rCurrentLanguage);
41
    ~ImpSvNumberInputScan();
42
43
/*!*/   void ChangeIntl();                      // MUST be called if language changes
44
45
    /// set reference date for offset calculation
46
    void ChangeNullDate( const sal_uInt16 nDay,
47
                         const sal_uInt16 nMonth,
48
                         const sal_Int16 nYear );
49
50
    /// convert input string to number
51
    bool IsNumberFormat( const OUString& rString,            /// input string
52
                         SvNumFormatType& F_Type,            /// format type (in + out)
53
                         double& fOutNumber,                 /// value determined (out)
54
                         const SvNumberformat* pFormat,      /// number format to which compare against
55
                         const NativeNumberWrapper& rNatNum,
56
                         SvNumInputOptions eInputOptions);
57
58
    /// after IsNumberFormat: get decimal position
59
162k
    short   GetDecPos() const { return nDecPos; }
60
    /// after IsNumberFormat: get count of numeric substrings in input string
61
16.4k
    sal_uInt16  GetNumericsCount() const { return nNumericsCnt; }
62
63
    /// set threshold of two-digit year input
64
185k
    void    SetYear2000( sal_uInt16 nVal ) { nYear2000 = nVal; }
65
    /// get threshold of two-digit year input
66
129
    sal_uInt16  GetYear2000() const { return nYear2000; }
67
68
    /** Whether input can be forced to ISO 8601 format.
69
70
        Depends on locale's date separator and a specific date format order.
71
     */
72
    bool CanForceToIso8601( DateOrder eDateOrder );
73
74
    void InvalidateDateAcceptancePatterns();
75
76
    /** Whether 'T' separator was detected in an ISO 8601 date+time format.
77
     */
78
23.9k
    bool HasIso8601Tsep() const { return bIso8601Tsep; }
79
80
private:
81
    SvNFLanguageData& mrCurrentLanguageData;
82
    const SvNumberformat* mpFormat;                            //* The format to compare against, if any
83
    std::unique_ptr<OUString[]> pUpperMonthText;               //* Array of month names, uppercase
84
    std::unique_ptr<OUString[]> pUpperAbbrevMonthText;         //* Array of month names, abbreviated, uppercase
85
    std::unique_ptr<OUString[]> pUpperGenitiveMonthText;       //* Array of genitive month names, uppercase
86
    std::unique_ptr<OUString[]> pUpperGenitiveAbbrevMonthText; //* Array of genitive month names, abbreviated, uppercase
87
    std::unique_ptr<OUString[]> pUpperPartitiveMonthText;      //* Array of partitive month names, uppercase
88
    std::unique_ptr<OUString[]> pUpperPartitiveAbbrevMonthText;//* Array of partitive month names, abbreviated, uppercase
89
    std::unique_ptr<OUString[]> pUpperDayText;                 //* Array of day of week names, uppercase
90
    std::unique_ptr<OUString[]> pUpperAbbrevDayText;           //* Array of day of week names, abbreviated, uppercase
91
    OUString  aUpperCurrSymbol;                 //* Currency symbol, uppercase
92
    Date    maNullDate;                 //* 30Dec1899
93
    bool    bTextInitialized;                   //* Whether days and months are initialized
94
    bool    bScanGenitiveMonths;                //* Whether to scan an input for genitive months
95
    bool    bScanPartitiveMonths;               //* Whether to scan an input for partitive months
96
    // Variables for provisional results:
97
    OUString   sStrArray[SV_MAX_COUNT_INPUT_STRINGS];//* Array of scanned substrings
98
    bool       IsNum[SV_MAX_COUNT_INPUT_STRINGS];    //* Whether a substring is numeric
99
    sal_uInt16 nNums[SV_MAX_COUNT_INPUT_STRINGS];    //* Sequence of offsets to numeric strings
100
    sal_uInt16 nStringsCnt;                          //* Total count of scanned substrings
101
    sal_uInt16 nNumericsCnt;                         //* Count of numeric substrings
102
    bool       bDecSepInDateSeps;                    //* True <=> DecSep in {.,-,/,DateSep}
103
    sal_uInt8  nMatchedAllStrings;                   //* Scan...String() matched all substrings,
104
105
    // bit mask of nMatched... constants
106
    static const sal_uInt8 nMatchedEndString;        // 0x01
107
    static const sal_uInt8 nMatchedMidString;        // 0x02
108
    static const sal_uInt8 nMatchedStartString;      // 0x04
109
    static const sal_uInt8 nMatchedVirgin;           // 0x08
110
    static const sal_uInt8 nMatchedUsedAsReturn;     // 0x10
111
112
    int    nSign;                               // Sign of number
113
    int    nMonth;                              // Month (1..x) if date
114
                                                // negative => short format
115
    short  nMonthPos;                           // 1 = front, 2 = middle
116
                                                // 3 = end
117
    int    nDayOfWeek;                          // Temporary (!) day of week (1..7,-1..-7) if date
118
    sal_uInt16 nTimePos;                        // Index of first time separator (+1)
119
    short  nDecPos;                             // Index of substring containing "," (+1)
120
    bool   bNegCheck;                           // '( )' for negative
121
    short  nESign;                              // Sign of exponent
122
    short  nAmPm;                               // +1 AM, -1 PM, 0 if none
123
    short  nLogical;                            // -1 => False, 1 => True
124
    bool   mbEraCE;                            // Era if date, 0 => BCE, 1 => CE (currently only Gregorian)
125
    sal_uInt16 nThousand;                       // Count of group (AKA thousand) separators
126
    sal_uInt16 nPosThousandString;              // Position of concatenated 000,000,000 string
127
    SvNumFormatType eScannedType;               // Scanned type
128
    SvNumFormatType eSetType;                   // Preset Type
129
130
    sal_uInt16 nStringScanNumFor;               // Fixed strings recognized in
131
                                                // pFormat->NumFor[nNumForStringScan]
132
    short  nStringScanSign;                     // Sign resulting of FixString
133
    sal_uInt16 nYear2000;                       // Two-digit threshold
134
                                                // Year as 20xx
135
                                                // default 18
136
                                                // number <= nYear2000 => 20xx
137
                                                // number >  nYear2000 => 19xx
138
139
    /** State of ISO 8601 detection.
140
141
        0:= don't know yet
142
        1:= no
143
        2:= yes, <=2 digits in year
144
        3:= yes,   3 digits in year
145
        4:= yes, >=4 digits in year
146
147
        @see MayBeIso8601()
148
     */
149
    sal_uInt8    nMayBeIso8601;
150
151
    /** Whether the 'T' time separator was detected in an ISO 8601 string. */
152
    bool        bIso8601Tsep;
153
154
    /** State of dd-month-yy or yy-month-dd detection, with month name.
155
156
        0:= don't know yet
157
        1:= no
158
        2:= yes, dd-month-yy
159
        3:= yes, yy-month-dd
160
161
        @see MayBeMonthDate()
162
     */
163
    sal_uInt8   nMayBeMonthDate;
164
165
    /** Input matched this locale dependent date acceptance pattern.
166
        -2 if not checked yet, -1 if no match, >=0 matched pattern.
167
168
        @see IsAcceptedDatePattern()
169
     */
170
    sal_Int32   nAcceptedDatePattern;
171
    css::uno::Sequence< OUString >  sDateAcceptancePatterns;
172
173
    /** If input matched a date acceptance pattern that starts at input
174
        particle sStrArray[nDatePatternStart].
175
176
        @see IsAcceptedDatePattern()
177
     */
178
    sal_uInt16  nDatePatternStart;
179
180
    /** Count of numbers that matched the accepted pattern, if any, else 0.
181
182
        @see GetDatePatternNumbers()
183
     */
184
    sal_uInt16  nDatePatternNumbers;
185
186
    // Copy assignment is forbidden and not implemented.
187
    ImpSvNumberInputScan (const ImpSvNumberInputScan &) = delete;
188
    ImpSvNumberInputScan & operator= (const ImpSvNumberInputScan &) = delete;
189
190
    void Reset();                               // Reset all variables before start of analysis
191
192
    void InitText();                            // Init of months and days of week
193
194
    // Next number/string symbol
195
    static bool NextNumberStringSymbol( const sal_Unicode*& pStr,
196
                                        OUString& rSymbol );
197
198
    // Concatenate ,000,23 blocks
199
    // in input to 000123
200
    bool SkipThousands( const sal_Unicode*& pStr, OUString& rSymbol ) const;
201
202
    // Divide numbers/strings into
203
    // arrays and variables above.
204
    // Leading blanks and blanks
205
    // after numbers are thrown away
206
    void NumberStringDivision( const OUString& rString );
207
208
209
    /** Whether rString contains word (!) rWhat at nPos.
210
        rWhat will not be matched if it is a substring of a word.
211
     */
212
    bool StringContainsWord( const OUString& rWhat,
213
                             const OUString& rString,
214
                             sal_Int32 nPos ) const;
215
216
    // optimized substring versions
217
218
    // Whether rString contains rWhat at nPos
219
    static bool StringContains( const OUString& rWhat,
220
                                       const OUString& rString,
221
                                       sal_Int32 nPos )
222
6.22M
        {
223
6.22M
            if (rWhat.isEmpty() || rString.getLength() <= nPos)
224
1.59M
            {
225
1.59M
                return false;
226
1.59M
            }
227
            // mostly used with one character
228
4.62M
            if ( rWhat[ 0 ] != rString[ nPos ] )
229
3.91M
            {
230
3.91M
                return false;
231
3.91M
            }
232
706k
            return StringContainsImpl( rWhat, rString, nPos );
233
4.62M
        }
234
235
    // Whether pString contains rWhat at nPos
236
    static bool StringPtrContains( const OUString& rWhat,
237
                                          const sal_Unicode* pString,
238
                                          sal_Int32 nPos ) // nPos MUST be a valid offset from pString
239
88.8k
        {
240
            // mostly used with one character
241
88.8k
            if ( rWhat[ 0 ] != pString[ nPos ] )
242
56.4k
            {
243
56.4k
                return false;
244
56.4k
            }
245
32.4k
            return StringPtrContainsImpl( rWhat, pString, nPos );
246
88.8k
        }
247
248
    //! DO NOT use directly
249
    static bool StringContainsImpl( const OUString& rWhat,
250
                                    const OUString& rString,
251
                                    sal_Int32 nPos );
252
    //! DO NOT use directly
253
    static bool StringPtrContainsImpl( const OUString& rWhat,
254
                                       const sal_Unicode* pString,
255
                                       sal_Int32 nPos );
256
257
    // Skip a special character
258
    static inline bool SkipChar( sal_Unicode c,
259
                                 std::u16string_view rString,
260
                                 sal_Int32& nPos );
261
262
    // Skip blank
263
    static inline bool SkipBlanks( const OUString& rString,
264
                                   sal_Int32& nPos );
265
266
    // Jump over rWhat in rString at nPos
267
    static inline bool SkipString( const OUString& rWhat,
268
                                   const OUString& rString,
269
                                   sal_Int32& nPos );
270
271
    // Recognizes exactly ,111 as group separator
272
    inline bool GetThousandSep( std::u16string_view rString,
273
                                sal_Int32& nPos,
274
                                sal_uInt16 nStringPos ) const;
275
    // Get boolean value
276
    short GetLogical( std::u16string_view rString ) const;
277
278
    // Get month and advance string position
279
    short GetMonth( const OUString& rString,
280
                    sal_Int32& nPos );
281
282
    // Get day of week and advance string position
283
    int GetDayOfWeek( const OUString& rString,
284
                      sal_Int32& nPos );
285
286
    // Get currency symbol and advance string position
287
    bool GetCurrency( const OUString& rString,
288
                      sal_Int32& nPos );
289
290
    // Get symbol AM or PM and advance string position
291
    bool GetTimeAmPm( const OUString& rString,
292
                      sal_Int32& nPos );
293
294
    // Get decimal separator and advance string position
295
    inline bool GetDecSep( std::u16string_view rString,
296
                           sal_Int32& nPos ) const;
297
298
    // Get hundredth seconds separator and advance string position
299
    inline bool GetTime100SecSep( std::u16string_view rString,
300
                                  sal_Int32& nPos ) const;
301
302
    // Get sign  and advance string position
303
    // Including special case '('
304
    int GetSign( std::u16string_view rString,
305
                 sal_Int32& nPos );
306
307
    // Get sign of exponent and advance string position
308
    static short GetESign( std::u16string_view rString,
309
                           sal_Int32& nPos );
310
311
    // Get next number as array offset
312
    inline bool GetNextNumber( sal_uInt16& i,
313
                               sal_uInt16& j ) const;
314
315
    /** Converts time -> double (only decimals)
316
317
        @return TRUE if time, FALSE if not (e.g. hours >12 with AM/PM)
318
     */
319
    bool GetTimeRef( double& fOutNumber,                     // result as double
320
                     sal_uInt16 nIndex,                      // Index of hour in input
321
                     sal_uInt16 nCnt,                        // Count of time substrings in input
322
                     SvNumInputOptions eInputOptions ) const;
323
    sal_uInt16 ImplGetDay  ( sal_uInt16 nIndex ) const;      // Day input, 0 if no match
324
    sal_uInt16 ImplGetMonth( sal_uInt16 nIndex ) const;      // Month input, zero based return, NumberOfMonths if no match
325
    sal_uInt16 ImplGetYear ( sal_uInt16 nIndex );   // Year input, 0 if no match
326
327
    // Conversion of date to number
328
    bool GetDateRef( double& fDays,                          // OUT: days diff to null date
329
                     sal_uInt16& nCounter );                 // Count of date substrings
330
331
    // Analyze start of string
332
    bool ScanStartString( const OUString& rString );
333
334
    // Analyze middle substring
335
    bool ScanMidString( const OUString& rString,
336
                        sal_uInt16 nStringPos,
337
                        sal_uInt16 nCurNumCount );
338
339
340
    // Analyze end of string
341
    bool ScanEndString( const OUString& rString );
342
343
    // Compare rString to substring of array indexed by nString
344
    // nString == 0xFFFF => last substring
345
    bool ScanStringNumFor( const OUString& rString,
346
                           sal_Int32 nPos,
347
                           sal_uInt16 nString,
348
                           bool bDontDetectNegation = false );
349
350
    // if nMatchedAllStrings set nMatchedUsedAsReturn and return true,
351
    // else do nothing and return false
352
    bool MatchedReturn();
353
354
    //! Be sure that the string to be analyzed is already converted to upper
355
    //! case and if it contained native number digits that they are already
356
    //! converted to ASCII.
357
358
    // Main analyzing function
359
    bool IsNumberFormatMain( const OUString& rString,
360
                             const SvNumberformat* pFormat);    // number format to match against
361
362
    /** Whether input matches locale dependent date acceptance pattern.
363
364
        @param nStartPatternAt
365
               The pattern matching starts at input particle
366
               sStrArray[nStartPatternAt].
367
368
        NOTE: once called the result is remembered, subsequent calls with
369
        different parameters do not check for a match and do not lead to a
370
        different result.
371
     */
372
    bool IsAcceptedDatePattern( sal_uInt16 nStartPatternAt );
373
374
    /** Sets (not advances!) rPos to sStrArray[nParticle].getLength() if string
375
        matches separator in pattern at nParticle.
376
377
        Also detects a signed year case like M/D/-Y
378
379
        @returns TRUE if separator matched.
380
     */
381
    bool SkipDatePatternSeparator( sal_uInt16 nParticle, sal_Int32 & rPos, bool & rSignedYear );
382
383
    /** Returns count of numbers in accepted date pattern.
384
     */
385
    sal_uInt16 GetDatePatternNumbers();
386
387
    /** Whether numeric string nNumber is of type cType in accepted date
388
        pattern, 'Y', 'M' or 'D'.
389
     */
390
    bool IsDatePatternNumberOfType( sal_uInt16 nNumber, sal_Unicode cType );
391
392
    /** Obtain order of accepted date pattern coded as, for example,
393
        ('D'<<16)|('M'<<8)|'Y'
394
    */
395
    sal_uInt32 GetDatePatternOrder();
396
397
    /** Obtain date format order, from accepted date pattern if available or
398
        otherwise the locale's default order.
399
400
        @param  bFromFormatIfNoPattern
401
                If <TRUE/> and no pattern was matched, obtain date order from
402
                format if available, instead from format's or current locale.
403
     */
404
    DateOrder GetDateOrder( bool bFromFormatIfNoPattern = false );
405
406
    /** Whether input may be an ISO 8601 date format, yyyy-mm-dd...
407
408
        Checks if input has at least 3 numbers for yyyy-mm-dd and the separator
409
        is '-', and 1<=mm<=12 and 1<=dd<=31.
410
411
        @see nMayBeIso8601
412
     */
413
    bool MayBeIso8601();
414
415
    /** Whether input may be a dd-month-yy format, with month name, not
416
        number.
417
418
        @see nMayBeMonthDate
419
     */
420
    bool MayBeMonthDate();
421
422
    /** Whether input is acceptable as ISO 8601 date format in the current
423
        NfEvalDateFormat setting.
424
     */
425
    bool IsAcceptableIso8601();
426
427
    /** If month name in the middle was parsed, get the corresponding
428
        LongDateOrder in GetDateRef().
429
     */
430
    LongDateOrder GetMiddleMonthLongDateOrder( bool bFormatTurn,
431
                                               const LocaleDataWrapper* pLoc,
432
                                               DateOrder eDateOrder );
433
};
434
435
#endif // INCLUDED_SVL_SOURCE_NUMBERS_ZFORFIND_HXX
436
437
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */