/src/libunistring/lib/unicase/ignorable.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* Test whether a Unicode character is case-ignorable. |
2 | | Copyright (C) 2002, 2006-2007, 2009-2022 Free Software Foundation, Inc. |
3 | | Written by Bruno Haible <bruno@clisp.org>, 2009. |
4 | | |
5 | | This file is free software. |
6 | | It is dual-licensed under "the GNU LGPLv3+ or the GNU GPLv2+". |
7 | | You can redistribute it and/or modify it under either |
8 | | - the terms of the GNU Lesser General Public License as published |
9 | | by the Free Software Foundation; either version 3, or (at your |
10 | | option) any later version, or |
11 | | - the terms of the GNU General Public License as published by the |
12 | | Free Software Foundation; either version 2, or (at your option) |
13 | | any later version, or |
14 | | - the same dual license "the GNU LGPLv3+ or the GNU GPLv2+". |
15 | | |
16 | | This file is distributed in the hope that it will be useful, |
17 | | but WITHOUT ANY WARRANTY; without even the implied warranty of |
18 | | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
19 | | Lesser General Public License and the GNU General Public License |
20 | | for more details. |
21 | | |
22 | | You should have received a copy of the GNU Lesser General Public |
23 | | License and of the GNU General Public License along with this |
24 | | program. If not, see <https://www.gnu.org/licenses/>. */ |
25 | | |
26 | | #include <config.h> |
27 | | |
28 | | /* Specification. */ |
29 | | #include "caseprop.h" |
30 | | |
31 | | /* Quoting the Unicode standard: |
32 | | Definition: A character is defined to be "case-ignorable" if it has the |
33 | | value MidLetter {or the value MidNumLet} for the Word_Break property or |
34 | | its General_Category is one of Nonspacing_Mark (Mn), Enclosing_Mark (Me), |
35 | | Format (Cf), Modifier_Letter (Lm), or Modifier_Symbol (Sk). |
36 | | The text marked in braces was added in Unicode 5.1.0, see |
37 | | <https://www.unicode.org/versions/Unicode5.1.0/> section "Update of |
38 | | Definition of case-ignorable". */ |
39 | | /* Since this predicate is only used for the "Before C" and "After C" |
40 | | conditions of FINAL_SIGMA, we exclude the "cased" characters here. |
41 | | This simplifies the evaluation of the regular expressions |
42 | | \p{cased} (\p{case-ignorable})* C |
43 | | and |
44 | | C (\p{case-ignorable})* \p{cased} |
45 | | */ |
46 | | |
47 | | #if 0 |
48 | | |
49 | | #include "unictype.h" |
50 | | #include "uniwbrk.h" |
51 | | |
52 | | bool |
53 | | uc_is_case_ignorable (ucs4_t uc) |
54 | | { |
55 | | int wbp = uc_wordbreak_property (uc); |
56 | | |
57 | | return (wbp == WBP_MIDLETTER || wbp == WBP_MIDNUMLET |
58 | | || uc_is_general_category_withtable (uc, UC_CATEGORY_MASK_Mn |
59 | | | UC_CATEGORY_MASK_Me |
60 | | | UC_CATEGORY_MASK_Cf |
61 | | | UC_CATEGORY_MASK_Lm |
62 | | | UC_CATEGORY_MASK_Sk)) |
63 | | && !uc_is_cased (uc); |
64 | | } |
65 | | |
66 | | #else |
67 | | |
68 | | #include "unictype/bitmap.h" |
69 | | |
70 | | /* Define u_casing_property_case_ignorable table. */ |
71 | | #include "ignorable.h" |
72 | | |
73 | | bool |
74 | | uc_is_case_ignorable (ucs4_t uc) |
75 | 0 | { |
76 | 0 | return bitmap_lookup (&u_casing_property_case_ignorable, uc); |
77 | 0 | } |
78 | | |
79 | | #endif |