/src/libreoffice/svl/source/numbers/zforfind.hxx
Line | Count | Source |
1 | | /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ |
2 | | /* |
3 | | * This file is part of the LibreOffice project. |
4 | | * |
5 | | * This Source Code Form is subject to the terms of the Mozilla Public |
6 | | * License, v. 2.0. If a copy of the MPL was not distributed with this |
7 | | * file, You can obtain one at http://mozilla.org/MPL/2.0/. |
8 | | * |
9 | | * This file incorporates work covered by the following license notice: |
10 | | * |
11 | | * Licensed to the Apache Software Foundation (ASF) under one or more |
12 | | * contributor license agreements. See the NOTICE file distributed |
13 | | * with this work for additional information regarding copyright |
14 | | * ownership. The ASF licenses this file to you under the Apache |
15 | | * License, Version 2.0 (the "License"); you may not use this file |
16 | | * except in compliance with the License. You may obtain a copy of |
17 | | * the License at http://www.apache.org/licenses/LICENSE-2.0 . |
18 | | */ |
19 | | |
20 | | #ifndef INCLUDED_SVL_SOURCE_NUMBERS_ZFORFIND_HXX |
21 | | #define INCLUDED_SVL_SOURCE_NUMBERS_ZFORFIND_HXX |
22 | | |
23 | | #include <com/sun/star/uno/Sequence.hxx> |
24 | | #include <rtl/ustring.hxx> |
25 | | #include <svl/zforlist.hxx> |
26 | | #include <tools/date.hxx> |
27 | | #include <unotools/nativenumberwrapper.hxx> |
28 | | #include <memory> |
29 | | #include <optional> |
30 | | |
31 | | class SvNumberformat; |
32 | | class SvNFLanguageData; |
33 | | enum class SvNumFormatType : sal_Int16; |
34 | | |
35 | 141M | #define SV_MAX_COUNT_INPUT_STRINGS 20 // max count of substrings in input scanner |
36 | | |
37 | | class ImpSvNumberInputScan |
38 | | { |
39 | | public: |
40 | | explicit ImpSvNumberInputScan(SvNFLanguageData& rCurrentLanguage); |
41 | | ~ImpSvNumberInputScan(); |
42 | | |
43 | | /*!*/ void ChangeIntl(); // MUST be called if language changes |
44 | | |
45 | | /// set reference date for offset calculation |
46 | | void ChangeNullDate( const sal_uInt16 nDay, |
47 | | const sal_uInt16 nMonth, |
48 | | const sal_Int16 nYear ); |
49 | | |
50 | | /// convert input string to number |
51 | | bool IsNumberFormat( const OUString& rString, /// input string |
52 | | SvNumFormatType& F_Type, /// format type (in + out) |
53 | | double& fOutNumber, /// value determined (out) |
54 | | const SvNumberformat* pFormat, /// number format to which compare against |
55 | | const NativeNumberWrapper& rNatNum, |
56 | | SvNumInputOptions eInputOptions); |
57 | | |
58 | | /// after IsNumberFormat: get decimal position |
59 | 162k | short GetDecPos() const { return nDecPos; } |
60 | | /// after IsNumberFormat: get count of numeric substrings in input string |
61 | 16.4k | sal_uInt16 GetNumericsCount() const { return nNumericsCnt; } |
62 | | |
63 | | /// set threshold of two-digit year input |
64 | 185k | void SetYear2000( sal_uInt16 nVal ) { nYear2000 = nVal; } |
65 | | /// get threshold of two-digit year input |
66 | 129 | sal_uInt16 GetYear2000() const { return nYear2000; } |
67 | | |
68 | | /** Whether input can be forced to ISO 8601 format. |
69 | | |
70 | | Depends on locale's date separator and a specific date format order. |
71 | | */ |
72 | | bool CanForceToIso8601( DateOrder eDateOrder ); |
73 | | |
74 | | void InvalidateDateAcceptancePatterns(); |
75 | | |
76 | | /** Whether 'T' separator was detected in an ISO 8601 date+time format. |
77 | | */ |
78 | 23.9k | bool HasIso8601Tsep() const { return bIso8601Tsep; } |
79 | | |
80 | | private: |
81 | | SvNFLanguageData& mrCurrentLanguageData; |
82 | | const SvNumberformat* mpFormat; //* The format to compare against, if any |
83 | | std::unique_ptr<OUString[]> pUpperMonthText; //* Array of month names, uppercase |
84 | | std::unique_ptr<OUString[]> pUpperAbbrevMonthText; //* Array of month names, abbreviated, uppercase |
85 | | std::unique_ptr<OUString[]> pUpperGenitiveMonthText; //* Array of genitive month names, uppercase |
86 | | std::unique_ptr<OUString[]> pUpperGenitiveAbbrevMonthText; //* Array of genitive month names, abbreviated, uppercase |
87 | | std::unique_ptr<OUString[]> pUpperPartitiveMonthText; //* Array of partitive month names, uppercase |
88 | | std::unique_ptr<OUString[]> pUpperPartitiveAbbrevMonthText;//* Array of partitive month names, abbreviated, uppercase |
89 | | std::unique_ptr<OUString[]> pUpperDayText; //* Array of day of week names, uppercase |
90 | | std::unique_ptr<OUString[]> pUpperAbbrevDayText; //* Array of day of week names, abbreviated, uppercase |
91 | | OUString aUpperCurrSymbol; //* Currency symbol, uppercase |
92 | | Date maNullDate; //* 30Dec1899 |
93 | | bool bTextInitialized; //* Whether days and months are initialized |
94 | | bool bScanGenitiveMonths; //* Whether to scan an input for genitive months |
95 | | bool bScanPartitiveMonths; //* Whether to scan an input for partitive months |
96 | | // Variables for provisional results: |
97 | | OUString sStrArray[SV_MAX_COUNT_INPUT_STRINGS];//* Array of scanned substrings |
98 | | bool IsNum[SV_MAX_COUNT_INPUT_STRINGS]; //* Whether a substring is numeric |
99 | | sal_uInt16 nNums[SV_MAX_COUNT_INPUT_STRINGS]; //* Sequence of offsets to numeric strings |
100 | | sal_uInt16 nStringsCnt; //* Total count of scanned substrings |
101 | | sal_uInt16 nNumericsCnt; //* Count of numeric substrings |
102 | | bool bDecSepInDateSeps; //* True <=> DecSep in {.,-,/,DateSep} |
103 | | sal_uInt8 nMatchedAllStrings; //* Scan...String() matched all substrings, |
104 | | |
105 | | // bit mask of nMatched... constants |
106 | | static const sal_uInt8 nMatchedEndString; // 0x01 |
107 | | static const sal_uInt8 nMatchedMidString; // 0x02 |
108 | | static const sal_uInt8 nMatchedStartString; // 0x04 |
109 | | static const sal_uInt8 nMatchedVirgin; // 0x08 |
110 | | static const sal_uInt8 nMatchedUsedAsReturn; // 0x10 |
111 | | |
112 | | int nSign; // Sign of number |
113 | | int nMonth; // Month (1..x) if date |
114 | | // negative => short format |
115 | | short nMonthPos; // 1 = front, 2 = middle |
116 | | // 3 = end |
117 | | int nDayOfWeek; // Temporary (!) day of week (1..7,-1..-7) if date |
118 | | sal_uInt16 nTimePos; // Index of first time separator (+1) |
119 | | short nDecPos; // Index of substring containing "," (+1) |
120 | | bool bNegCheck; // '( )' for negative |
121 | | short nESign; // Sign of exponent |
122 | | short nAmPm; // +1 AM, -1 PM, 0 if none |
123 | | short nLogical; // -1 => False, 1 => True |
124 | | bool mbEraCE; // Era if date, 0 => BCE, 1 => CE (currently only Gregorian) |
125 | | sal_uInt16 nThousand; // Count of group (AKA thousand) separators |
126 | | sal_uInt16 nPosThousandString; // Position of concatenated 000,000,000 string |
127 | | SvNumFormatType eScannedType; // Scanned type |
128 | | SvNumFormatType eSetType; // Preset Type |
129 | | |
130 | | sal_uInt16 nStringScanNumFor; // Fixed strings recognized in |
131 | | // pFormat->NumFor[nNumForStringScan] |
132 | | short nStringScanSign; // Sign resulting of FixString |
133 | | sal_uInt16 nYear2000; // Two-digit threshold |
134 | | // Year as 20xx |
135 | | // default 18 |
136 | | // number <= nYear2000 => 20xx |
137 | | // number > nYear2000 => 19xx |
138 | | |
139 | | /** State of ISO 8601 detection. |
140 | | |
141 | | 0:= don't know yet |
142 | | 1:= no |
143 | | 2:= yes, <=2 digits in year |
144 | | 3:= yes, 3 digits in year |
145 | | 4:= yes, >=4 digits in year |
146 | | |
147 | | @see MayBeIso8601() |
148 | | */ |
149 | | sal_uInt8 nMayBeIso8601; |
150 | | |
151 | | /** Whether the 'T' time separator was detected in an ISO 8601 string. */ |
152 | | bool bIso8601Tsep; |
153 | | |
154 | | /** State of dd-month-yy or yy-month-dd detection, with month name. |
155 | | |
156 | | 0:= don't know yet |
157 | | 1:= no |
158 | | 2:= yes, dd-month-yy |
159 | | 3:= yes, yy-month-dd |
160 | | |
161 | | @see MayBeMonthDate() |
162 | | */ |
163 | | sal_uInt8 nMayBeMonthDate; |
164 | | |
165 | | /** Input matched this locale dependent date acceptance pattern. |
166 | | -2 if not checked yet, -1 if no match, >=0 matched pattern. |
167 | | |
168 | | @see IsAcceptedDatePattern() |
169 | | */ |
170 | | sal_Int32 nAcceptedDatePattern; |
171 | | css::uno::Sequence< OUString > sDateAcceptancePatterns; |
172 | | |
173 | | /** If input matched a date acceptance pattern that starts at input |
174 | | particle sStrArray[nDatePatternStart]. |
175 | | |
176 | | @see IsAcceptedDatePattern() |
177 | | */ |
178 | | sal_uInt16 nDatePatternStart; |
179 | | |
180 | | /** Count of numbers that matched the accepted pattern, if any, else 0. |
181 | | |
182 | | @see GetDatePatternNumbers() |
183 | | */ |
184 | | sal_uInt16 nDatePatternNumbers; |
185 | | |
186 | | // Copy assignment is forbidden and not implemented. |
187 | | ImpSvNumberInputScan (const ImpSvNumberInputScan &) = delete; |
188 | | ImpSvNumberInputScan & operator= (const ImpSvNumberInputScan &) = delete; |
189 | | |
190 | | void Reset(); // Reset all variables before start of analysis |
191 | | |
192 | | void InitText(); // Init of months and days of week |
193 | | |
194 | | // Next number/string symbol |
195 | | static bool NextNumberStringSymbol( const sal_Unicode*& pStr, |
196 | | OUString& rSymbol ); |
197 | | |
198 | | // Concatenate ,000,23 blocks |
199 | | // in input to 000123 |
200 | | bool SkipThousands( const sal_Unicode*& pStr, OUString& rSymbol ) const; |
201 | | |
202 | | // Divide numbers/strings into |
203 | | // arrays and variables above. |
204 | | // Leading blanks and blanks |
205 | | // after numbers are thrown away |
206 | | void NumberStringDivision( const OUString& rString ); |
207 | | |
208 | | |
209 | | /** Whether rString contains word (!) rWhat at nPos. |
210 | | rWhat will not be matched if it is a substring of a word. |
211 | | */ |
212 | | bool StringContainsWord( const OUString& rWhat, |
213 | | const OUString& rString, |
214 | | sal_Int32 nPos ) const; |
215 | | |
216 | | // optimized substring versions |
217 | | |
218 | | // Whether rString contains rWhat at nPos |
219 | | static bool StringContains( const OUString& rWhat, |
220 | | const OUString& rString, |
221 | | sal_Int32 nPos ) |
222 | 6.22M | { |
223 | 6.22M | if (rWhat.isEmpty() || rString.getLength() <= nPos) |
224 | 1.59M | { |
225 | 1.59M | return false; |
226 | 1.59M | } |
227 | | // mostly used with one character |
228 | 4.62M | if ( rWhat[ 0 ] != rString[ nPos ] ) |
229 | 3.91M | { |
230 | 3.91M | return false; |
231 | 3.91M | } |
232 | 706k | return StringContainsImpl( rWhat, rString, nPos ); |
233 | 4.62M | } |
234 | | |
235 | | // Whether pString contains rWhat at nPos |
236 | | static bool StringPtrContains( const OUString& rWhat, |
237 | | const sal_Unicode* pString, |
238 | | sal_Int32 nPos ) // nPos MUST be a valid offset from pString |
239 | 88.8k | { |
240 | | // mostly used with one character |
241 | 88.8k | if ( rWhat[ 0 ] != pString[ nPos ] ) |
242 | 56.4k | { |
243 | 56.4k | return false; |
244 | 56.4k | } |
245 | 32.4k | return StringPtrContainsImpl( rWhat, pString, nPos ); |
246 | 88.8k | } |
247 | | |
248 | | //! DO NOT use directly |
249 | | static bool StringContainsImpl( const OUString& rWhat, |
250 | | const OUString& rString, |
251 | | sal_Int32 nPos ); |
252 | | //! DO NOT use directly |
253 | | static bool StringPtrContainsImpl( const OUString& rWhat, |
254 | | const sal_Unicode* pString, |
255 | | sal_Int32 nPos ); |
256 | | |
257 | | // Skip a special character |
258 | | static inline bool SkipChar( sal_Unicode c, |
259 | | std::u16string_view rString, |
260 | | sal_Int32& nPos ); |
261 | | |
262 | | // Skip blank |
263 | | static inline bool SkipBlanks( const OUString& rString, |
264 | | sal_Int32& nPos ); |
265 | | |
266 | | // Jump over rWhat in rString at nPos |
267 | | static inline bool SkipString( const OUString& rWhat, |
268 | | const OUString& rString, |
269 | | sal_Int32& nPos ); |
270 | | |
271 | | // Recognizes exactly ,111 as group separator |
272 | | inline bool GetThousandSep( std::u16string_view rString, |
273 | | sal_Int32& nPos, |
274 | | sal_uInt16 nStringPos ) const; |
275 | | // Get boolean value |
276 | | short GetLogical( std::u16string_view rString ) const; |
277 | | |
278 | | // Get month and advance string position |
279 | | short GetMonth( const OUString& rString, |
280 | | sal_Int32& nPos ); |
281 | | |
282 | | // Get day of week and advance string position |
283 | | int GetDayOfWeek( const OUString& rString, |
284 | | sal_Int32& nPos ); |
285 | | |
286 | | // Get currency symbol and advance string position |
287 | | bool GetCurrency( const OUString& rString, |
288 | | sal_Int32& nPos ); |
289 | | |
290 | | // Get symbol AM or PM and advance string position |
291 | | bool GetTimeAmPm( const OUString& rString, |
292 | | sal_Int32& nPos ); |
293 | | |
294 | | // Get decimal separator and advance string position |
295 | | inline bool GetDecSep( std::u16string_view rString, |
296 | | sal_Int32& nPos ) const; |
297 | | |
298 | | // Get hundredth seconds separator and advance string position |
299 | | inline bool GetTime100SecSep( std::u16string_view rString, |
300 | | sal_Int32& nPos ) const; |
301 | | |
302 | | // Get sign and advance string position |
303 | | // Including special case '(' |
304 | | int GetSign( std::u16string_view rString, |
305 | | sal_Int32& nPos ); |
306 | | |
307 | | // Get sign of exponent and advance string position |
308 | | static short GetESign( std::u16string_view rString, |
309 | | sal_Int32& nPos ); |
310 | | |
311 | | // Get next number as array offset |
312 | | inline bool GetNextNumber( sal_uInt16& i, |
313 | | sal_uInt16& j ) const; |
314 | | |
315 | | /** Converts time -> double (only decimals) |
316 | | |
317 | | @return TRUE if time, FALSE if not (e.g. hours >12 with AM/PM) |
318 | | */ |
319 | | bool GetTimeRef( double& fOutNumber, // result as double |
320 | | sal_uInt16 nIndex, // Index of hour in input |
321 | | sal_uInt16 nCnt, // Count of time substrings in input |
322 | | SvNumInputOptions eInputOptions ) const; |
323 | | sal_uInt16 ImplGetDay ( sal_uInt16 nIndex ) const; // Day input, 0 if no match |
324 | | sal_uInt16 ImplGetMonth( sal_uInt16 nIndex ) const; // Month input, zero based return, NumberOfMonths if no match |
325 | | sal_uInt16 ImplGetYear ( sal_uInt16 nIndex ); // Year input, 0 if no match |
326 | | |
327 | | // Conversion of date to number |
328 | | bool GetDateRef( double& fDays, // OUT: days diff to null date |
329 | | sal_uInt16& nCounter ); // Count of date substrings |
330 | | |
331 | | // Analyze start of string |
332 | | bool ScanStartString( const OUString& rString ); |
333 | | |
334 | | // Analyze middle substring |
335 | | bool ScanMidString( const OUString& rString, |
336 | | sal_uInt16 nStringPos, |
337 | | sal_uInt16 nCurNumCount ); |
338 | | |
339 | | |
340 | | // Analyze end of string |
341 | | bool ScanEndString( const OUString& rString ); |
342 | | |
343 | | // Compare rString to substring of array indexed by nString |
344 | | // nString == 0xFFFF => last substring |
345 | | bool ScanStringNumFor( const OUString& rString, |
346 | | sal_Int32 nPos, |
347 | | sal_uInt16 nString, |
348 | | bool bDontDetectNegation = false ); |
349 | | |
350 | | // if nMatchedAllStrings set nMatchedUsedAsReturn and return true, |
351 | | // else do nothing and return false |
352 | | bool MatchedReturn(); |
353 | | |
354 | | //! Be sure that the string to be analyzed is already converted to upper |
355 | | //! case and if it contained native number digits that they are already |
356 | | //! converted to ASCII. |
357 | | |
358 | | // Main analyzing function |
359 | | bool IsNumberFormatMain( const OUString& rString, |
360 | | const SvNumberformat* pFormat); // number format to match against |
361 | | |
362 | | /** Whether input matches locale dependent date acceptance pattern. |
363 | | |
364 | | @param nStartPatternAt |
365 | | The pattern matching starts at input particle |
366 | | sStrArray[nStartPatternAt]. |
367 | | |
368 | | NOTE: once called the result is remembered, subsequent calls with |
369 | | different parameters do not check for a match and do not lead to a |
370 | | different result. |
371 | | */ |
372 | | bool IsAcceptedDatePattern( sal_uInt16 nStartPatternAt ); |
373 | | |
374 | | /** Sets (not advances!) rPos to sStrArray[nParticle].getLength() if string |
375 | | matches separator in pattern at nParticle. |
376 | | |
377 | | Also detects a signed year case like M/D/-Y |
378 | | |
379 | | @returns TRUE if separator matched. |
380 | | */ |
381 | | bool SkipDatePatternSeparator( sal_uInt16 nParticle, sal_Int32 & rPos, bool & rSignedYear ); |
382 | | |
383 | | /** Returns count of numbers in accepted date pattern. |
384 | | */ |
385 | | sal_uInt16 GetDatePatternNumbers(); |
386 | | |
387 | | /** Whether numeric string nNumber is of type cType in accepted date |
388 | | pattern, 'Y', 'M' or 'D'. |
389 | | */ |
390 | | bool IsDatePatternNumberOfType( sal_uInt16 nNumber, sal_Unicode cType ); |
391 | | |
392 | | /** Obtain order of accepted date pattern coded as, for example, |
393 | | ('D'<<16)|('M'<<8)|'Y' |
394 | | */ |
395 | | sal_uInt32 GetDatePatternOrder(); |
396 | | |
397 | | /** Obtain date format order, from accepted date pattern if available or |
398 | | otherwise the locale's default order. |
399 | | |
400 | | @param bFromFormatIfNoPattern |
401 | | If <TRUE/> and no pattern was matched, obtain date order from |
402 | | format if available, instead from format's or current locale. |
403 | | */ |
404 | | DateOrder GetDateOrder( bool bFromFormatIfNoPattern = false ); |
405 | | |
406 | | /** Whether input may be an ISO 8601 date format, yyyy-mm-dd... |
407 | | |
408 | | Checks if input has at least 3 numbers for yyyy-mm-dd and the separator |
409 | | is '-', and 1<=mm<=12 and 1<=dd<=31. |
410 | | |
411 | | @see nMayBeIso8601 |
412 | | */ |
413 | | bool MayBeIso8601(); |
414 | | |
415 | | /** Whether input may be a dd-month-yy format, with month name, not |
416 | | number. |
417 | | |
418 | | @see nMayBeMonthDate |
419 | | */ |
420 | | bool MayBeMonthDate(); |
421 | | |
422 | | /** Whether input is acceptable as ISO 8601 date format in the current |
423 | | NfEvalDateFormat setting. |
424 | | */ |
425 | | bool IsAcceptableIso8601(); |
426 | | |
427 | | /** If month name in the middle was parsed, get the corresponding |
428 | | LongDateOrder in GetDateRef(). |
429 | | */ |
430 | | LongDateOrder GetMiddleMonthLongDateOrder( bool bFormatTurn, |
431 | | const LocaleDataWrapper* pLoc, |
432 | | DateOrder eDateOrder ); |
433 | | }; |
434 | | |
435 | | #endif // INCLUDED_SVL_SOURCE_NUMBERS_ZFORFIND_HXX |
436 | | |
437 | | /* vim:set shiftwidth=4 softtabstop=4 expandtab: */ |