/src/pcre2/src/pcre2_newline.c
Line | Count | Source (jump to first uncovered line) |
1 | | /************************************************* |
2 | | * Perl-Compatible Regular Expressions * |
3 | | *************************************************/ |
4 | | |
5 | | /* PCRE is a library of functions to support regular expressions whose syntax |
6 | | and semantics are as close as possible to those of the Perl 5 language. |
7 | | |
8 | | Written by Philip Hazel |
9 | | Original API code Copyright (c) 1997-2012 University of Cambridge |
10 | | New API code Copyright (c) 2016 University of Cambridge |
11 | | |
12 | | ----------------------------------------------------------------------------- |
13 | | Redistribution and use in source and binary forms, with or without |
14 | | modification, are permitted provided that the following conditions are met: |
15 | | |
16 | | * Redistributions of source code must retain the above copyright notice, |
17 | | this list of conditions and the following disclaimer. |
18 | | |
19 | | * Redistributions in binary form must reproduce the above copyright |
20 | | notice, this list of conditions and the following disclaimer in the |
21 | | documentation and/or other materials provided with the distribution. |
22 | | |
23 | | * Neither the name of the University of Cambridge nor the names of its |
24 | | contributors may be used to endorse or promote products derived from |
25 | | this software without specific prior written permission. |
26 | | |
27 | | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
28 | | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
29 | | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
30 | | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
31 | | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
32 | | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
33 | | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
34 | | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
35 | | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
36 | | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
37 | | POSSIBILITY OF SUCH DAMAGE. |
38 | | ----------------------------------------------------------------------------- |
39 | | */ |
40 | | |
41 | | |
42 | | /* This module contains internal functions for testing newlines when more than |
43 | | one kind of newline is to be recognized. When a newline is found, its length is |
44 | | returned. In principle, we could implement several newline "types", each |
45 | | referring to a different set of newline characters. At present, PCRE2 supports |
46 | | only NLTYPE_FIXED, which gets handled without these functions, NLTYPE_ANYCRLF, |
47 | | and NLTYPE_ANY. The full list of Unicode newline characters is taken from |
48 | | http://unicode.org/unicode/reports/tr18/. */ |
49 | | |
50 | | |
51 | | #include "pcre2_internal.h" |
52 | | |
53 | | |
54 | | |
55 | | /************************************************* |
56 | | * Check for newline at given position * |
57 | | *************************************************/ |
58 | | |
59 | | /* This function is called only via the IS_NEWLINE macro, which does so only |
60 | | when the newline type is NLTYPE_ANY or NLTYPE_ANYCRLF. The case of a fixed |
61 | | newline (NLTYPE_FIXED) is handled inline. It is guaranteed that the code unit |
62 | | pointed to by ptr is less than the end of the string. |
63 | | |
64 | | Arguments: |
65 | | ptr pointer to possible newline |
66 | | type the newline type |
67 | | endptr pointer to the end of the string |
68 | | lenptr where to return the length |
69 | | utf TRUE if in utf mode |
70 | | |
71 | | Returns: TRUE or FALSE |
72 | | */ |
73 | | |
74 | | BOOL |
75 | | PRIV(is_newline)(PCRE2_SPTR ptr, uint32_t type, PCRE2_SPTR endptr, |
76 | | uint32_t *lenptr, BOOL utf) |
77 | 5.29M | { |
78 | 5.29M | uint32_t c; |
79 | | |
80 | 5.29M | #ifdef SUPPORT_UNICODE |
81 | 5.29M | if (utf) { GETCHAR(c, ptr); } else c = *ptr; |
82 | | #else |
83 | | (void)utf; |
84 | | c = *ptr; |
85 | | #endif /* SUPPORT_UNICODE */ |
86 | | |
87 | 5.29M | if (type == NLTYPE_ANYCRLF) switch(c) |
88 | 576k | { |
89 | 7.16k | case CHAR_LF: |
90 | 7.16k | *lenptr = 1; |
91 | 7.16k | return TRUE; |
92 | | |
93 | 17.0k | case CHAR_CR: |
94 | 17.0k | *lenptr = (ptr < endptr - 1 && ptr[1] == CHAR_LF)? 2 : 1; |
95 | 17.0k | return TRUE; |
96 | | |
97 | 552k | default: |
98 | 552k | return FALSE; |
99 | 576k | } |
100 | | |
101 | | /* NLTYPE_ANY */ |
102 | | |
103 | 4.71M | else switch(c) |
104 | 4.71M | { |
105 | | #ifdef EBCDIC |
106 | | case CHAR_NEL: |
107 | | #endif |
108 | 29.1k | case CHAR_LF: |
109 | 34.1k | case CHAR_VT: |
110 | 44.3k | case CHAR_FF: |
111 | 44.3k | *lenptr = 1; |
112 | 44.3k | return TRUE; |
113 | | |
114 | 32.1k | case CHAR_CR: |
115 | 32.1k | *lenptr = (ptr < endptr - 1 && ptr[1] == CHAR_LF)? 2 : 1; |
116 | 32.1k | return TRUE; |
117 | | |
118 | 0 | #ifndef EBCDIC |
119 | | #if PCRE2_CODE_UNIT_WIDTH == 8 |
120 | 11.2k | case CHAR_NEL: |
121 | 11.2k | *lenptr = utf? 2 : 1; |
122 | 11.2k | return TRUE; |
123 | | |
124 | 902 | case 0x2028: /* LS */ |
125 | 1.91k | case 0x2029: /* PS */ |
126 | 1.91k | *lenptr = 3; |
127 | 1.91k | return TRUE; |
128 | | |
129 | | #else /* 16-bit or 32-bit code units */ |
130 | 0 | case CHAR_NEL: |
131 | 0 | case 0x2028: /* LS */ |
132 | 0 | case 0x2029: /* PS */ |
133 | 0 | *lenptr = 1; |
134 | 0 | return TRUE; |
135 | 0 | #endif |
136 | 0 | #endif /* Not EBCDIC */ |
137 | | |
138 | 4.62M | default: |
139 | 4.62M | return FALSE; |
140 | 4.71M | } |
141 | 5.29M | } Line | Count | Source | 77 | 5.29M | { | 78 | 5.29M | uint32_t c; | 79 | | | 80 | 5.29M | #ifdef SUPPORT_UNICODE | 81 | 5.29M | if (utf) { GETCHAR(c, ptr); } else c = *ptr; | 82 | | #else | 83 | | (void)utf; | 84 | | c = *ptr; | 85 | | #endif /* SUPPORT_UNICODE */ | 86 | | | 87 | 5.29M | if (type == NLTYPE_ANYCRLF) switch(c) | 88 | 576k | { | 89 | 7.16k | case CHAR_LF: | 90 | 7.16k | *lenptr = 1; | 91 | 7.16k | return TRUE; | 92 | | | 93 | 17.0k | case CHAR_CR: | 94 | 17.0k | *lenptr = (ptr < endptr - 1 && ptr[1] == CHAR_LF)? 2 : 1; | 95 | 17.0k | return TRUE; | 96 | | | 97 | 552k | default: | 98 | 552k | return FALSE; | 99 | 576k | } | 100 | | | 101 | | /* NLTYPE_ANY */ | 102 | | | 103 | 4.71M | else switch(c) | 104 | 4.71M | { | 105 | | #ifdef EBCDIC | 106 | | case CHAR_NEL: | 107 | | #endif | 108 | 29.1k | case CHAR_LF: | 109 | 34.1k | case CHAR_VT: | 110 | 44.3k | case CHAR_FF: | 111 | 44.3k | *lenptr = 1; | 112 | 44.3k | return TRUE; | 113 | | | 114 | 32.1k | case CHAR_CR: | 115 | 32.1k | *lenptr = (ptr < endptr - 1 && ptr[1] == CHAR_LF)? 2 : 1; | 116 | 32.1k | return TRUE; | 117 | | | 118 | 0 | #ifndef EBCDIC | 119 | 0 | #if PCRE2_CODE_UNIT_WIDTH == 8 | 120 | 11.2k | case CHAR_NEL: | 121 | 11.2k | *lenptr = utf? 2 : 1; | 122 | 11.2k | return TRUE; | 123 | | | 124 | 902 | case 0x2028: /* LS */ | 125 | 1.91k | case 0x2029: /* PS */ | 126 | 1.91k | *lenptr = 3; | 127 | 1.91k | return TRUE; | 128 | | | 129 | | #else /* 16-bit or 32-bit code units */ | 130 | | case CHAR_NEL: | 131 | | case 0x2028: /* LS */ | 132 | | case 0x2029: /* PS */ | 133 | | *lenptr = 1; | 134 | | return TRUE; | 135 | | #endif | 136 | 0 | #endif /* Not EBCDIC */ | 137 | | | 138 | 4.62M | default: | 139 | 4.62M | return FALSE; | 140 | 4.71M | } | 141 | 5.29M | } |
Unexecuted instantiation: _pcre2_is_newline_32 Unexecuted instantiation: _pcre2_is_newline_16 |
142 | | |
143 | | |
144 | | |
145 | | /************************************************* |
146 | | * Check for newline at previous position * |
147 | | *************************************************/ |
148 | | |
149 | | /* This function is called only via the WAS_NEWLINE macro, which does so only |
150 | | when the newline type is NLTYPE_ANY or NLTYPE_ANYCRLF. The case of a fixed |
151 | | newline (NLTYPE_FIXED) is handled inline. It is guaranteed that the initial |
152 | | value of ptr is greater than the start of the string that is being processed. |
153 | | |
154 | | Arguments: |
155 | | ptr pointer to possible newline |
156 | | type the newline type |
157 | | startptr pointer to the start of the string |
158 | | lenptr where to return the length |
159 | | utf TRUE if in utf mode |
160 | | |
161 | | Returns: TRUE or FALSE |
162 | | */ |
163 | | |
164 | | BOOL |
165 | | PRIV(was_newline)(PCRE2_SPTR ptr, uint32_t type, PCRE2_SPTR startptr, |
166 | | uint32_t *lenptr, BOOL utf) |
167 | 177k | { |
168 | 177k | uint32_t c; |
169 | 177k | ptr--; |
170 | | |
171 | 177k | #ifdef SUPPORT_UNICODE |
172 | 177k | if (utf) |
173 | 76.9k | { |
174 | 76.9k | BACKCHAR(ptr); |
175 | 76.9k | GETCHAR(c, ptr); |
176 | 76.9k | } |
177 | 100k | else c = *ptr; |
178 | | #else |
179 | | (void)utf; |
180 | | c = *ptr; |
181 | | #endif /* SUPPORT_UNICODE */ |
182 | | |
183 | 177k | if (type == NLTYPE_ANYCRLF) switch(c) |
184 | 67.8k | { |
185 | 6.11k | case CHAR_LF: |
186 | 6.11k | *lenptr = (ptr > startptr && ptr[-1] == CHAR_CR)? 2 : 1; |
187 | 6.11k | return TRUE; |
188 | | |
189 | 3.72k | case CHAR_CR: |
190 | 3.72k | *lenptr = 1; |
191 | 3.72k | return TRUE; |
192 | | |
193 | 58.0k | default: |
194 | 58.0k | return FALSE; |
195 | 67.8k | } |
196 | | |
197 | | /* NLTYPE_ANY */ |
198 | | |
199 | 109k | else switch(c) |
200 | 109k | { |
201 | 3.64k | case CHAR_LF: |
202 | 3.64k | *lenptr = (ptr > startptr && ptr[-1] == CHAR_CR)? 2 : 1; |
203 | 3.64k | return TRUE; |
204 | | |
205 | | #ifdef EBCDIC |
206 | | case CHAR_NEL: |
207 | | #endif |
208 | 679 | case CHAR_VT: |
209 | 3.69k | case CHAR_FF: |
210 | 18.9k | case CHAR_CR: |
211 | 18.9k | *lenptr = 1; |
212 | 18.9k | return TRUE; |
213 | | |
214 | 0 | #ifndef EBCDIC |
215 | | #if PCRE2_CODE_UNIT_WIDTH == 8 |
216 | 1.44k | case CHAR_NEL: |
217 | 1.44k | *lenptr = utf? 2 : 1; |
218 | 1.44k | return TRUE; |
219 | | |
220 | 766 | case 0x2028: /* LS */ |
221 | 1.72k | case 0x2029: /* PS */ |
222 | 1.72k | *lenptr = 3; |
223 | 1.72k | return TRUE; |
224 | | |
225 | | #else /* 16-bit or 32-bit code units */ |
226 | 0 | case CHAR_NEL: |
227 | 0 | case 0x2028: /* LS */ |
228 | 0 | case 0x2029: /* PS */ |
229 | 0 | *lenptr = 1; |
230 | 0 | return TRUE; |
231 | 0 | #endif |
232 | 0 | #endif /* Not EBCDIC */ |
233 | | |
234 | 83.4k | default: |
235 | 83.4k | return FALSE; |
236 | 109k | } |
237 | 177k | } Line | Count | Source | 167 | 177k | { | 168 | 177k | uint32_t c; | 169 | 177k | ptr--; | 170 | | | 171 | 177k | #ifdef SUPPORT_UNICODE | 172 | 177k | if (utf) | 173 | 76.9k | { | 174 | 76.9k | BACKCHAR(ptr); | 175 | 76.9k | GETCHAR(c, ptr); | 176 | 76.9k | } | 177 | 100k | else c = *ptr; | 178 | | #else | 179 | | (void)utf; | 180 | | c = *ptr; | 181 | | #endif /* SUPPORT_UNICODE */ | 182 | | | 183 | 177k | if (type == NLTYPE_ANYCRLF) switch(c) | 184 | 67.8k | { | 185 | 6.11k | case CHAR_LF: | 186 | 6.11k | *lenptr = (ptr > startptr && ptr[-1] == CHAR_CR)? 2 : 1; | 187 | 6.11k | return TRUE; | 188 | | | 189 | 3.72k | case CHAR_CR: | 190 | 3.72k | *lenptr = 1; | 191 | 3.72k | return TRUE; | 192 | | | 193 | 58.0k | default: | 194 | 58.0k | return FALSE; | 195 | 67.8k | } | 196 | | | 197 | | /* NLTYPE_ANY */ | 198 | | | 199 | 109k | else switch(c) | 200 | 109k | { | 201 | 3.64k | case CHAR_LF: | 202 | 3.64k | *lenptr = (ptr > startptr && ptr[-1] == CHAR_CR)? 2 : 1; | 203 | 3.64k | return TRUE; | 204 | | | 205 | | #ifdef EBCDIC | 206 | | case CHAR_NEL: | 207 | | #endif | 208 | 679 | case CHAR_VT: | 209 | 3.69k | case CHAR_FF: | 210 | 18.9k | case CHAR_CR: | 211 | 18.9k | *lenptr = 1; | 212 | 18.9k | return TRUE; | 213 | | | 214 | 0 | #ifndef EBCDIC | 215 | 0 | #if PCRE2_CODE_UNIT_WIDTH == 8 | 216 | 1.44k | case CHAR_NEL: | 217 | 1.44k | *lenptr = utf? 2 : 1; | 218 | 1.44k | return TRUE; | 219 | | | 220 | 766 | case 0x2028: /* LS */ | 221 | 1.72k | case 0x2029: /* PS */ | 222 | 1.72k | *lenptr = 3; | 223 | 1.72k | return TRUE; | 224 | | | 225 | | #else /* 16-bit or 32-bit code units */ | 226 | | case CHAR_NEL: | 227 | | case 0x2028: /* LS */ | 228 | | case 0x2029: /* PS */ | 229 | | *lenptr = 1; | 230 | | return TRUE; | 231 | | #endif | 232 | 0 | #endif /* Not EBCDIC */ | 233 | | | 234 | 83.4k | default: | 235 | 83.4k | return FALSE; | 236 | 109k | } | 237 | 177k | } |
Unexecuted instantiation: _pcre2_was_newline_32 Unexecuted instantiation: _pcre2_was_newline_16 |
238 | | |
239 | | /* End of pcre2_newline.c */ |