/src/glib/subprojects/pcre2-10.44/src/pcre2_extuni.c
Line  | Count  | Source (jump to first uncovered line)  | 
1  |  | /*************************************************  | 
2  |  | *      Perl-Compatible Regular Expressions       *  | 
3  |  | *************************************************/  | 
4  |  |  | 
5  |  | /* PCRE is a library of functions to support regular expressions whose syntax  | 
6  |  | and semantics are as close as possible to those of the Perl 5 language.  | 
7  |  |  | 
8  |  |                        Written by Philip Hazel  | 
9  |  |      Original API code Copyright (c) 1997-2012 University of Cambridge  | 
10  |  |           New API code Copyright (c) 2016-2024 University of Cambridge  | 
11  |  |  | 
12  |  | -----------------------------------------------------------------------------  | 
13  |  | Redistribution and use in source and binary forms, with or without  | 
14  |  | modification, are permitted provided that the following conditions are met:  | 
15  |  |  | 
16  |  |     * Redistributions of source code must retain the above copyright notice,  | 
17  |  |       this list of conditions and the following disclaimer.  | 
18  |  |  | 
19  |  |     * Redistributions in binary form must reproduce the above copyright  | 
20  |  |       notice, this list of conditions and the following disclaimer in the  | 
21  |  |       documentation and/or other materials provided with the distribution.  | 
22  |  |  | 
23  |  |     * Neither the name of the University of Cambridge nor the names of its  | 
24  |  |       contributors may be used to endorse or promote products derived from  | 
25  |  |       this software without specific prior written permission.  | 
26  |  |  | 
27  |  | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"  | 
28  |  | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE  | 
29  |  | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE  | 
30  |  | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE  | 
31  |  | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR  | 
32  |  | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF  | 
33  |  | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS  | 
34  |  | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN  | 
35  |  | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)  | 
36  |  | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE  | 
37  |  | POSSIBILITY OF SUCH DAMAGE.  | 
38  |  | -----------------------------------------------------------------------------  | 
39  |  | */  | 
40  |  |  | 
41  |  | /* This module contains an internal function that is used to match a Unicode  | 
42  |  | extended grapheme sequence. It is used by both pcre2_match() and  | 
43  |  | pcre2_def_match(). However, it is called only when Unicode support is being  | 
44  |  | compiled. Nevertheless, we provide a dummy function when there is no Unicode  | 
45  |  | support, because some compilers do not like functionless source files. */  | 
46  |  |  | 
47  |  |  | 
48  |  | #ifdef HAVE_CONFIG_H  | 
49  |  | #include "config.h"  | 
50  |  | #endif  | 
51  |  |  | 
52  |  |  | 
53  |  | #include "pcre2_internal.h"  | 
54  |  |  | 
55  |  |  | 
56  |  | /* Dummy function */  | 
57  |  |  | 
58  |  | #ifndef SUPPORT_UNICODE  | 
59  |  | PCRE2_SPTR  | 
60  |  | PRIV(extuni)(uint32_t c, PCRE2_SPTR eptr, PCRE2_SPTR start_subject,  | 
61  |  |   PCRE2_SPTR end_subject, BOOL utf, int *xcount)  | 
62  |  | { | 
63  |  | (void)c;  | 
64  |  | (void)eptr;  | 
65  |  | (void)start_subject;  | 
66  |  | (void)end_subject;  | 
67  |  | (void)utf;  | 
68  |  | (void)xcount;  | 
69  |  | return NULL;  | 
70  |  | }  | 
71  |  | #else  | 
72  |  |  | 
73  |  |  | 
74  |  | /*************************************************  | 
75  |  | *      Match an extended grapheme sequence       *  | 
76  |  | *************************************************/  | 
77  |  |  | 
78  |  | /* NOTE: The logic contained in this function is replicated in three special-  | 
79  |  | purpose functions in the pcre2_jit_compile.c module. If the logic below is  | 
80  |  | changed, they must be kept in step so that the interpreter and the JIT have the  | 
81  |  | same behaviour.  | 
82  |  |  | 
83  |  | Arguments:  | 
84  |  |   c              the first character  | 
85  |  |   eptr           pointer to next character  | 
86  |  |   start_subject  pointer to start of subject  | 
87  |  |   end_subject    pointer to end of subject  | 
88  |  |   utf            TRUE if in UTF mode  | 
89  |  |   xcount         pointer to count of additional characters,  | 
90  |  |                    or NULL if count not needed  | 
91  |  |  | 
92  |  | Returns:         pointer after the end of the sequence  | 
93  |  | */  | 
94  |  |  | 
95  |  | PCRE2_SPTR  | 
96  |  | PRIV(extuni)(uint32_t c, PCRE2_SPTR eptr, PCRE2_SPTR start_subject,  | 
97  |  |   PCRE2_SPTR end_subject, BOOL utf, int *xcount)  | 
98  | 0  | { | 
99  | 0  | BOOL was_ep_ZWJ = FALSE;  | 
100  | 0  | int lgb = UCD_GRAPHBREAK(c);  | 
101  |  | 
  | 
102  | 0  | while (eptr < end_subject)  | 
103  | 0  |   { | 
104  | 0  |   int rgb;  | 
105  | 0  |   int len = 1;  | 
106  | 0  |   if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); } | 
107  | 0  |   rgb = UCD_GRAPHBREAK(c);  | 
108  | 0  |   if ((PRIV(ucp_gbtable)[lgb] & (1u << rgb)) == 0) break;  | 
109  |  |  | 
110  |  |   /* ZWJ followed by Extended Pictographic is allowed only if the ZWJ was  | 
111  |  |   preceded by Extended Pictographic. */  | 
112  |  |  | 
113  | 0  |   if (lgb == ucp_gbZWJ && rgb == ucp_gbExtended_Pictographic && !was_ep_ZWJ)  | 
114  | 0  |     break;  | 
115  |  |  | 
116  |  |   /* Not breaking between Regional Indicators is allowed only if there  | 
117  |  |   are an even number of preceding RIs. */  | 
118  |  |  | 
119  | 0  |   if (lgb == ucp_gbRegional_Indicator && rgb == ucp_gbRegional_Indicator)  | 
120  | 0  |     { | 
121  | 0  |     int ricount = 0;  | 
122  | 0  |     PCRE2_SPTR bptr = eptr - 1;  | 
123  | 0  |     if (utf) BACKCHAR(bptr);  | 
124  |  |  | 
125  |  |     /* bptr is pointing to the left-hand character */  | 
126  |  | 
  | 
127  | 0  |     while (bptr > start_subject)  | 
128  | 0  |       { | 
129  | 0  |       bptr--;  | 
130  | 0  |       if (utf)  | 
131  | 0  |         { | 
132  | 0  |         BACKCHAR(bptr);  | 
133  | 0  |         GETCHAR(c, bptr);  | 
134  | 0  |         }  | 
135  | 0  |       else  | 
136  | 0  |       c = *bptr;  | 
137  | 0  |       if (UCD_GRAPHBREAK(c) != ucp_gbRegional_Indicator) break;  | 
138  | 0  |       ricount++;  | 
139  | 0  |       }  | 
140  | 0  |     if ((ricount & 1) != 0) break;  /* Grapheme break required */  | 
141  | 0  |     }  | 
142  |  |  | 
143  |  |   /* Set a flag when ZWJ follows Extended Pictographic (with optional Extend in  | 
144  |  |   between; see next statement). */  | 
145  |  |  | 
146  | 0  |   was_ep_ZWJ = (lgb == ucp_gbExtended_Pictographic && rgb == ucp_gbZWJ);  | 
147  |  |  | 
148  |  |   /* If Extend follows Extended_Pictographic, do not update lgb; this allows  | 
149  |  |   any number of them before a following ZWJ. */  | 
150  |  | 
  | 
151  | 0  |   if (rgb != ucp_gbExtend || lgb != ucp_gbExtended_Pictographic) lgb = rgb;  | 
152  |  | 
  | 
153  | 0  |   eptr += len;  | 
154  | 0  |   if (xcount != NULL) *xcount += 1;  | 
155  | 0  |   }  | 
156  |  | 
  | 
157  | 0  | return eptr;  | 
158  | 0  | }  | 
159  |  |  | 
160  |  | #endif  /* SUPPORT_UNICODE */  | 
161  |  |  | 
162  |  | /* End of pcre2_extuni.c */  |