/src/libunistring/lib/unicase/ignorable.c
| Line | Count | Source (jump to first uncovered line) | 
| 1 |  | /* Test whether a Unicode character is case-ignorable. | 
| 2 |  |    Copyright (C) 2002, 2006-2007, 2009-2022 Free Software Foundation, Inc. | 
| 3 |  |    Written by Bruno Haible <bruno@clisp.org>, 2009. | 
| 4 |  |  | 
| 5 |  |    This file is free software. | 
| 6 |  |    It is dual-licensed under "the GNU LGPLv3+ or the GNU GPLv2+". | 
| 7 |  |    You can redistribute it and/or modify it under either | 
| 8 |  |      - the terms of the GNU Lesser General Public License as published | 
| 9 |  |        by the Free Software Foundation; either version 3, or (at your | 
| 10 |  |        option) any later version, or | 
| 11 |  |      - the terms of the GNU General Public License as published by the | 
| 12 |  |        Free Software Foundation; either version 2, or (at your option) | 
| 13 |  |        any later version, or | 
| 14 |  |      - the same dual license "the GNU LGPLv3+ or the GNU GPLv2+". | 
| 15 |  |  | 
| 16 |  |    This file is distributed in the hope that it will be useful, | 
| 17 |  |    but WITHOUT ANY WARRANTY; without even the implied warranty of | 
| 18 |  |    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU | 
| 19 |  |    Lesser General Public License and the GNU General Public License | 
| 20 |  |    for more details. | 
| 21 |  |  | 
| 22 |  |    You should have received a copy of the GNU Lesser General Public | 
| 23 |  |    License and of the GNU General Public License along with this | 
| 24 |  |    program.  If not, see <https://www.gnu.org/licenses/>.  */ | 
| 25 |  |  | 
| 26 |  | #include <config.h> | 
| 27 |  |  | 
| 28 |  | /* Specification.  */ | 
| 29 |  | #include "caseprop.h" | 
| 30 |  |  | 
| 31 |  | /* Quoting the Unicode standard: | 
| 32 |  |      Definition: A character is defined to be "case-ignorable" if it has the | 
| 33 |  |      value MidLetter {or the value MidNumLet} for the Word_Break property or | 
| 34 |  |      its General_Category is one of Nonspacing_Mark (Mn), Enclosing_Mark (Me), | 
| 35 |  |      Format (Cf), Modifier_Letter (Lm), or Modifier_Symbol (Sk). | 
| 36 |  |    The text marked in braces was added in Unicode 5.1.0, see | 
| 37 |  |    <https://www.unicode.org/versions/Unicode5.1.0/> section "Update of | 
| 38 |  |    Definition of case-ignorable".   */ | 
| 39 |  | /* Since this predicate is only used for the "Before C" and "After C" | 
| 40 |  |    conditions of FINAL_SIGMA, we exclude the "cased" characters here. | 
| 41 |  |    This simplifies the evaluation of the regular expressions | 
| 42 |  |      \p{cased} (\p{case-ignorable})* C | 
| 43 |  |    and | 
| 44 |  |      C (\p{case-ignorable})* \p{cased} | 
| 45 |  |  */ | 
| 46 |  |  | 
| 47 |  | #if 0 | 
| 48 |  |  | 
| 49 |  | #include "unictype.h" | 
| 50 |  | #include "uniwbrk.h" | 
| 51 |  |  | 
| 52 |  | bool | 
| 53 |  | uc_is_case_ignorable (ucs4_t uc) | 
| 54 |  | { | 
| 55 |  |   int wbp = uc_wordbreak_property (uc); | 
| 56 |  |  | 
| 57 |  |   return (wbp == WBP_MIDLETTER || wbp == WBP_MIDNUMLET | 
| 58 |  |           || uc_is_general_category_withtable (uc, UC_CATEGORY_MASK_Mn | 
| 59 |  |                                                    | UC_CATEGORY_MASK_Me | 
| 60 |  |                                                    | UC_CATEGORY_MASK_Cf | 
| 61 |  |                                                    | UC_CATEGORY_MASK_Lm | 
| 62 |  |                                                    | UC_CATEGORY_MASK_Sk)) | 
| 63 |  |          && !uc_is_cased (uc); | 
| 64 |  | } | 
| 65 |  |  | 
| 66 |  | #else | 
| 67 |  |  | 
| 68 |  | #include "unictype/bitmap.h" | 
| 69 |  |  | 
| 70 |  | /* Define u_casing_property_case_ignorable table.  */ | 
| 71 |  | #include "ignorable.h" | 
| 72 |  |  | 
| 73 |  | bool | 
| 74 |  | uc_is_case_ignorable (ucs4_t uc) | 
| 75 | 0 | { | 
| 76 | 0 |   return bitmap_lookup (&u_casing_property_case_ignorable, uc); | 
| 77 | 0 | } | 
| 78 |  |  | 
| 79 |  | #endif |