/src/libunistring/lib/unicase/ignorable.c
Line  | Count  | Source (jump to first uncovered line)  | 
1  |  | /* Test whether a Unicode character is case-ignorable.  | 
2  |  |    Copyright (C) 2002, 2006-2007, 2009-2024 Free Software Foundation, Inc.  | 
3  |  |    Written by Bruno Haible <bruno@clisp.org>, 2009.  | 
4  |  |  | 
5  |  |    This file is free software.  | 
6  |  |    It is dual-licensed under "the GNU LGPLv3+ or the GNU GPLv2+".  | 
7  |  |    You can redistribute it and/or modify it under either  | 
8  |  |      - the terms of the GNU Lesser General Public License as published  | 
9  |  |        by the Free Software Foundation, either version 3, or (at your  | 
10  |  |        option) any later version, or  | 
11  |  |      - the terms of the GNU General Public License as published by the  | 
12  |  |        Free Software Foundation; either version 2, or (at your option)  | 
13  |  |        any later version, or  | 
14  |  |      - the same dual license "the GNU LGPLv3+ or the GNU GPLv2+".  | 
15  |  |  | 
16  |  |    This file is distributed in the hope that it will be useful,  | 
17  |  |    but WITHOUT ANY WARRANTY; without even the implied warranty of  | 
18  |  |    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU  | 
19  |  |    Lesser General Public License and the GNU General Public License  | 
20  |  |    for more details.  | 
21  |  |  | 
22  |  |    You should have received a copy of the GNU Lesser General Public  | 
23  |  |    License and of the GNU General Public License along with this  | 
24  |  |    program.  If not, see <https://www.gnu.org/licenses/>.  */  | 
25  |  |  | 
26  |  | #include <config.h>  | 
27  |  |  | 
28  |  | /* Specification.  */  | 
29  |  | #include "caseprop.h"  | 
30  |  |  | 
31  |  | /* Quoting the Unicode standard:  | 
32  |  |      Definition: A character is defined to be "case-ignorable" if it has the  | 
33  |  |      value MidLetter {or the value MidNumLet} for the Word_Break property or | 
34  |  |      its General_Category is one of Nonspacing_Mark (Mn), Enclosing_Mark (Me),  | 
35  |  |      Format (Cf), Modifier_Letter (Lm), or Modifier_Symbol (Sk).  | 
36  |  |    The text marked in braces was added in Unicode 5.1.0, see  | 
37  |  |    <https://www.unicode.org/versions/Unicode5.1.0/> section "Update of  | 
38  |  |    Definition of case-ignorable".   */  | 
39  |  | /* Since this predicate is only used for the "Before C" and "After C"  | 
40  |  |    conditions of FINAL_SIGMA, we exclude the "cased" characters here.  | 
41  |  |    This simplifies the evaluation of the regular expressions  | 
42  |  |      \p{cased} (\p{case-ignorable})* C | 
43  |  |    and  | 
44  |  |      C (\p{case-ignorable})* \p{cased} | 
45  |  |  */  | 
46  |  |  | 
47  |  | #if 0  | 
48  |  |  | 
49  |  | #include "unictype.h"  | 
50  |  | #include "uniwbrk.h"  | 
51  |  |  | 
52  |  | bool  | 
53  |  | uc_is_case_ignorable (ucs4_t uc)  | 
54  |  | { | 
55  |  |   int wbp = uc_wordbreak_property (uc);  | 
56  |  |  | 
57  |  |   return (wbp == WBP_MIDLETTER || wbp == WBP_MIDNUMLET  | 
58  |  |           || uc_is_general_category_withtable (uc, UC_CATEGORY_MASK_Mn  | 
59  |  |                                                    | UC_CATEGORY_MASK_Me  | 
60  |  |                                                    | UC_CATEGORY_MASK_Cf  | 
61  |  |                                                    | UC_CATEGORY_MASK_Lm  | 
62  |  |                                                    | UC_CATEGORY_MASK_Sk))  | 
63  |  |          && !uc_is_cased (uc);  | 
64  |  | }  | 
65  |  |  | 
66  |  | #else  | 
67  |  |  | 
68  |  | #include "unictype/bitmap.h"  | 
69  |  |  | 
70  |  | /* Define u_casing_property_case_ignorable table.  */  | 
71  |  | #include "ignorable.h"  | 
72  |  |  | 
73  |  | bool  | 
74  |  | uc_is_case_ignorable (ucs4_t uc)  | 
75  | 0  | { | 
76  | 0  |   return bitmap_lookup (&u_casing_property_case_ignorable, uc);  | 
77  | 0  | }  | 
78  |  |  | 
79  |  | #endif  |