Coverage Report

Created: 2023-03-26 08:33

/src/libunistring/lib/unicase/ignorable.c
Line
Count
Source (jump to first uncovered line)
1
/* Test whether a Unicode character is case-ignorable.
2
   Copyright (C) 2002, 2006-2007, 2009-2022 Free Software Foundation, Inc.
3
   Written by Bruno Haible <bruno@clisp.org>, 2009.
4
5
   This file is free software.
6
   It is dual-licensed under "the GNU LGPLv3+ or the GNU GPLv2+".
7
   You can redistribute it and/or modify it under either
8
     - the terms of the GNU Lesser General Public License as published
9
       by the Free Software Foundation; either version 3, or (at your
10
       option) any later version, or
11
     - the terms of the GNU General Public License as published by the
12
       Free Software Foundation; either version 2, or (at your option)
13
       any later version, or
14
     - the same dual license "the GNU LGPLv3+ or the GNU GPLv2+".
15
16
   This file is distributed in the hope that it will be useful,
17
   but WITHOUT ANY WARRANTY; without even the implied warranty of
18
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19
   Lesser General Public License and the GNU General Public License
20
   for more details.
21
22
   You should have received a copy of the GNU Lesser General Public
23
   License and of the GNU General Public License along with this
24
   program.  If not, see <https://www.gnu.org/licenses/>.  */
25
26
#include <config.h>
27
28
/* Specification.  */
29
#include "caseprop.h"
30
31
/* Quoting the Unicode standard:
32
     Definition: A character is defined to be "case-ignorable" if it has the
33
     value MidLetter {or the value MidNumLet} for the Word_Break property or
34
     its General_Category is one of Nonspacing_Mark (Mn), Enclosing_Mark (Me),
35
     Format (Cf), Modifier_Letter (Lm), or Modifier_Symbol (Sk).
36
   The text marked in braces was added in Unicode 5.1.0, see
37
   <https://www.unicode.org/versions/Unicode5.1.0/> section "Update of
38
   Definition of case-ignorable".   */
39
/* Since this predicate is only used for the "Before C" and "After C"
40
   conditions of FINAL_SIGMA, we exclude the "cased" characters here.
41
   This simplifies the evaluation of the regular expressions
42
     \p{cased} (\p{case-ignorable})* C
43
   and
44
     C (\p{case-ignorable})* \p{cased}
45
 */
46
47
#if 0
48
49
#include "unictype.h"
50
#include "uniwbrk.h"
51
52
bool
53
uc_is_case_ignorable (ucs4_t uc)
54
{
55
  int wbp = uc_wordbreak_property (uc);
56
57
  return (wbp == WBP_MIDLETTER || wbp == WBP_MIDNUMLET
58
          || uc_is_general_category_withtable (uc, UC_CATEGORY_MASK_Mn
59
                                                   | UC_CATEGORY_MASK_Me
60
                                                   | UC_CATEGORY_MASK_Cf
61
                                                   | UC_CATEGORY_MASK_Lm
62
                                                   | UC_CATEGORY_MASK_Sk))
63
         && !uc_is_cased (uc);
64
}
65
66
#else
67
68
#include "unictype/bitmap.h"
69
70
/* Define u_casing_property_case_ignorable table.  */
71
#include "ignorable.h"
72
73
bool
74
uc_is_case_ignorable (ucs4_t uc)
75
0
{
76
0
  return bitmap_lookup (&u_casing_property_case_ignorable, uc);
77
0
}
78
79
#endif