/src/llvm-project/llvm/lib/Support/regengine.inc
Line | Count | Source (jump to first uncovered line) |
1 | | /*- |
2 | | * This code is derived from OpenBSD's libc/regex, original license follows: |
3 | | * |
4 | | * Copyright (c) 1992, 1993, 1994 Henry Spencer. |
5 | | * Copyright (c) 1992, 1993, 1994 |
6 | | * The Regents of the University of California. All rights reserved. |
7 | | * |
8 | | * This code is derived from software contributed to Berkeley by |
9 | | * Henry Spencer. |
10 | | * |
11 | | * Redistribution and use in source and binary forms, with or without |
12 | | * modification, are permitted provided that the following conditions |
13 | | * are met: |
14 | | * 1. Redistributions of source code must retain the above copyright |
15 | | * notice, this list of conditions and the following disclaimer. |
16 | | * 2. Redistributions in binary form must reproduce the above copyright |
17 | | * notice, this list of conditions and the following disclaimer in the |
18 | | * documentation and/or other materials provided with the distribution. |
19 | | * 3. Neither the name of the University nor the names of its contributors |
20 | | * may be used to endorse or promote products derived from this software |
21 | | * without specific prior written permission. |
22 | | * |
23 | | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND |
24 | | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
25 | | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
26 | | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE |
27 | | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
28 | | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
29 | | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
30 | | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
31 | | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
32 | | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
33 | | * SUCH DAMAGE. |
34 | | * |
35 | | * @(#)engine.c 8.5 (Berkeley) 3/20/94 |
36 | | */ |
37 | | |
38 | | /* |
39 | | * The matching engine and friends. This file is #included by regexec.c |
40 | | * after suitable #defines of a variety of macros used herein, so that |
41 | | * different state representations can be used without duplicating masses |
42 | | * of code. |
43 | | */ |
44 | | |
45 | | #ifdef SNAMES |
46 | | #define matcher smatcher |
47 | 17.3k | #define fast sfast |
48 | 123k | #define slow sslow |
49 | 23.1k | #define dissect sdissect |
50 | 0 | #define backref sbackref |
51 | 140M | #define step sstep |
52 | | #define print sprint |
53 | | #define at sat |
54 | | #define match smat |
55 | | #define nope snope |
56 | 0 | #define step_back sstep_back |
57 | | #endif |
58 | | #ifdef LNAMES |
59 | | #define matcher lmatcher |
60 | 0 | #define fast lfast |
61 | 0 | #define slow lslow |
62 | 0 | #define dissect ldissect |
63 | 0 | #define backref lbackref |
64 | 0 | #define step lstep |
65 | | #define print lprint |
66 | | #define at lat |
67 | | #define match lmat |
68 | | #define nope lnope |
69 | 0 | #define step_back lstep_back |
70 | | #endif |
71 | | |
72 | | /* another structure passed up and down to avoid zillions of parameters */ |
73 | | struct match { |
74 | | struct re_guts *g; |
75 | | int eflags; |
76 | | llvm_regmatch_t *pmatch; /* [nsub+1] (0 element unused) */ |
77 | | const char *offp; /* offsets work from here */ |
78 | | const char *beginp; /* start of string -- virtual NUL precedes */ |
79 | | const char *endp; /* end of string -- virtual NUL here */ |
80 | | const char *coldp; /* can be no match starting before here */ |
81 | | const char **lastpos; /* [nplus+1] */ |
82 | | STATEVARS; |
83 | | states st; /* current states */ |
84 | | states fresh; /* states for a fresh start */ |
85 | | states tmp; /* temporary */ |
86 | | states empty; /* empty set of states */ |
87 | | }; |
88 | | |
89 | | static int matcher(struct re_guts *, const char *, size_t, |
90 | | llvm_regmatch_t[], int); |
91 | | static const char *dissect(struct match *, const char *, const char *, sopno, |
92 | | sopno); |
93 | | static const char *backref(struct match *, const char *, const char *, sopno, |
94 | | sopno, sopno, int); |
95 | | static const char *fast(struct match *, const char *, const char *, sopno, sopno); |
96 | | static const char *slow(struct match *, const char *, const char *, sopno, sopno); |
97 | | static states step(struct re_guts *, sopno, sopno, states, int, states); |
98 | 0 | #define MAX_RECURSION 100 |
99 | 1.20G | #define BOL (OUT+1) |
100 | 307M | #define EOL (BOL+1) |
101 | 228M | #define BOLEOL (BOL+2) |
102 | 135k | #define NOTHING (BOL+3) |
103 | 272M | #define BOW (BOL+4) |
104 | 135M | #define EOW (BOL+5) |
105 | | #define CODEMAX (BOL+5) /* highest code used */ |
106 | 801M | #define NONCHAR(c) ((c) > CHAR_MAX) |
107 | | #define NNONCHAR (CODEMAX-CHAR_MAX) |
108 | | #ifdef REDEBUG |
109 | | static void print(struct match *, const char *, states, int, FILE *); |
110 | | #endif |
111 | | #ifdef REDEBUG |
112 | | static void at( |
113 | | struct match *, const char *, const char *, const char *, sopno, sopno); |
114 | | #endif |
115 | | #ifdef REDEBUG |
116 | | static char *pchar(int); |
117 | | #endif |
118 | | |
119 | | #ifdef REDEBUG |
120 | | #define SP(t, s, c) print(m, t, s, c, stdout) |
121 | | #define AT(t, p1, p2, s1, s2) at(m, t, p1, p2, s1, s2) |
122 | | #define NOTE(str) { if (m->eflags®_TRACE) (void)printf("=%s\n", (str)); } |
123 | | static int nope = 0; |
124 | | #else |
125 | | #define SP(t, s, c) /* nothing */ |
126 | | #define AT(t, p1, p2, s1, s2) /* nothing */ |
127 | | #define NOTE(s) /* nothing */ |
128 | | #endif |
129 | | |
130 | | /* |
131 | | - matcher - the actual matching engine |
132 | | */ |
133 | | static int /* 0 success, REG_NOMATCH failure */ |
134 | | matcher(struct re_guts *g, const char *string, size_t nmatch, |
135 | | llvm_regmatch_t pmatch[], |
136 | | int eflags) |
137 | 188k | { |
138 | 188k | const char *endp; |
139 | 188k | size_t i; |
140 | 188k | struct match mv; |
141 | 188k | struct match *m = &mv; |
142 | 188k | const char *dp; |
143 | 188k | const sopno gf = g->firststate+1; /* +1 for OEND */ |
144 | 188k | const sopno gl = g->laststate; |
145 | 188k | const char *start; |
146 | 188k | const char *stop; |
147 | | |
148 | | /* simplify the situation where possible */ |
149 | 188k | if (g->cflags®_NOSUB) |
150 | 0 | nmatch = 0; |
151 | 188k | if (eflags®_STARTEND) { |
152 | 188k | start = string + pmatch[0].rm_so; |
153 | 188k | stop = string + pmatch[0].rm_eo; |
154 | 188k | } else { |
155 | 0 | start = string; |
156 | 0 | stop = start + strlen(start); |
157 | 0 | } |
158 | 188k | if (stop < start) |
159 | 0 | return(REG_INVARG); |
160 | | |
161 | | /* prescreening; this does wonders for this rather slow code */ |
162 | 188k | if (g->must != NULL) { |
163 | 780M | for (dp = start; dp < stop; dp++) |
164 | 779M | if (*dp == g->must[0] && stop - dp >= g->mlen && |
165 | 779M | memcmp(dp, g->must, (size_t)g->mlen) == 0) |
166 | 16.2k | break; |
167 | 187k | if (dp == stop) /* we didn't find g->must */ |
168 | 171k | return(REG_NOMATCH); |
169 | 187k | } |
170 | | |
171 | | /* match struct setup */ |
172 | 17.3k | m->g = g; |
173 | 17.3k | m->eflags = eflags; |
174 | 17.3k | m->pmatch = NULL; |
175 | 17.3k | m->lastpos = NULL; |
176 | 17.3k | m->offp = string; |
177 | 17.3k | m->beginp = start; |
178 | 17.3k | m->endp = stop; |
179 | 17.3k | STATESETUP(m, 4); |
180 | 17.3k | SETUP(m->st); |
181 | 17.3k | SETUP(m->fresh); |
182 | 17.3k | SETUP(m->tmp); |
183 | 17.3k | SETUP(m->empty); |
184 | 17.3k | CLEAR(m->empty); |
185 | | |
186 | | /* this loop does only one repetition except for backrefs */ |
187 | 17.3k | for (;;) { |
188 | 17.3k | endp = fast(m, start, stop, gf, gl); |
189 | 17.3k | if (endp == NULL) { /* a miss */ |
190 | 14.5k | free(m->pmatch); |
191 | 14.5k | free((void*)m->lastpos); |
192 | 14.5k | STATETEARDOWN(m); |
193 | 14.5k | return(REG_NOMATCH); |
194 | 14.5k | } |
195 | 2.85k | if (nmatch == 0 && !g->backrefs) |
196 | 83 | break; /* no further info needed */ |
197 | | |
198 | | /* where? */ |
199 | 2.77k | assert(m->coldp != NULL); |
200 | 2.77k | for (;;) { |
201 | 2.77k | NOTE("finding start"); |
202 | 2.77k | endp = slow(m, m->coldp, stop, gf, gl); |
203 | 2.77k | if (endp != NULL) |
204 | 2.77k | break; |
205 | 0 | assert(m->coldp < m->endp); |
206 | 0 | m->coldp++; |
207 | 0 | } |
208 | 2.77k | if (nmatch == 1 && !g->backrefs) |
209 | 0 | break; /* no further info needed */ |
210 | | |
211 | | /* oh my, they want the subexpressions... */ |
212 | 2.77k | if (m->pmatch == NULL) |
213 | 2.77k | m->pmatch = (llvm_regmatch_t *)malloc((m->g->nsub + 1) * |
214 | 2.77k | sizeof(llvm_regmatch_t)); |
215 | 2.77k | if (m->pmatch == NULL) { |
216 | 0 | STATETEARDOWN(m); |
217 | 0 | return(REG_ESPACE); |
218 | 0 | } |
219 | 13.5k | for (i = 1; i <= m->g->nsub; i++) |
220 | 10.7k | m->pmatch[i].rm_so = m->pmatch[i].rm_eo = -1; |
221 | 2.77k | if (!g->backrefs && !(m->eflags®_BACKR)) { |
222 | 2.77k | NOTE("dissecting"); |
223 | 2.77k | dp = dissect(m, m->coldp, endp, gf, gl); |
224 | 2.77k | } else { |
225 | 0 | if (g->nplus > 0 && m->lastpos == NULL) |
226 | 0 | m->lastpos = (const char **)malloc((g->nplus+1) * |
227 | 0 | sizeof(char *)); |
228 | 0 | if (g->nplus > 0 && m->lastpos == NULL) { |
229 | 0 | free(m->pmatch); |
230 | 0 | STATETEARDOWN(m); |
231 | 0 | return(REG_ESPACE); |
232 | 0 | } |
233 | 0 | NOTE("backref dissect"); |
234 | 0 | dp = backref(m, m->coldp, endp, gf, gl, (sopno)0, 0); |
235 | 0 | } |
236 | 2.77k | if (dp != NULL) |
237 | 2.77k | break; |
238 | | |
239 | | /* uh-oh... we couldn't find a subexpression-level match */ |
240 | 0 | assert(g->backrefs); /* must be back references doing it */ |
241 | 0 | assert(g->nplus == 0 || m->lastpos != NULL); |
242 | 0 | for (;;) { |
243 | 0 | if (dp != NULL || endp <= m->coldp) |
244 | 0 | break; /* defeat */ |
245 | 0 | NOTE("backoff"); |
246 | 0 | endp = slow(m, m->coldp, endp-1, gf, gl); |
247 | 0 | if (endp == NULL) |
248 | 0 | break; /* defeat */ |
249 | | /* try it on a shorter possibility */ |
250 | | #ifndef NDEBUG |
251 | | for (i = 1; i <= m->g->nsub; i++) { |
252 | | assert(m->pmatch[i].rm_so == -1); |
253 | | assert(m->pmatch[i].rm_eo == -1); |
254 | | } |
255 | | #endif |
256 | 0 | NOTE("backoff dissect"); |
257 | 0 | dp = backref(m, m->coldp, endp, gf, gl, (sopno)0, 0); |
258 | 0 | } |
259 | 0 | assert(dp == NULL || dp == endp); |
260 | 0 | if (dp != NULL) /* found a shorter one */ |
261 | 0 | break; |
262 | | |
263 | | /* despite initial appearances, there is no match here */ |
264 | 0 | NOTE("false alarm"); |
265 | 0 | if (m->coldp == stop) |
266 | 0 | break; |
267 | 0 | start = m->coldp + 1; /* recycle starting later */ |
268 | 0 | } |
269 | | |
270 | | /* fill in the details if requested */ |
271 | 2.85k | if (nmatch > 0) { |
272 | 2.77k | pmatch[0].rm_so = m->coldp - m->offp; |
273 | 2.77k | pmatch[0].rm_eo = endp - m->offp; |
274 | 2.77k | } |
275 | 2.85k | if (nmatch > 1) { |
276 | 2.77k | assert(m->pmatch != NULL); |
277 | 13.5k | for (i = 1; i < nmatch; i++) |
278 | 10.7k | if (i <= m->g->nsub) |
279 | 10.7k | pmatch[i] = m->pmatch[i]; |
280 | 0 | else { |
281 | 0 | pmatch[i].rm_so = -1; |
282 | 0 | pmatch[i].rm_eo = -1; |
283 | 0 | } |
284 | 2.77k | } |
285 | | |
286 | 2.85k | if (m->pmatch != NULL) |
287 | 2.77k | free((char *)m->pmatch); |
288 | 2.85k | if (m->lastpos != NULL) |
289 | 0 | free((char *)m->lastpos); |
290 | 2.85k | STATETEARDOWN(m); |
291 | 2.85k | return(0); |
292 | 0 | } Line | Count | Source | 137 | 188k | { | 138 | 188k | const char *endp; | 139 | 188k | size_t i; | 140 | 188k | struct match mv; | 141 | 188k | struct match *m = &mv; | 142 | 188k | const char *dp; | 143 | 188k | const sopno gf = g->firststate+1; /* +1 for OEND */ | 144 | 188k | const sopno gl = g->laststate; | 145 | 188k | const char *start; | 146 | 188k | const char *stop; | 147 | | | 148 | | /* simplify the situation where possible */ | 149 | 188k | if (g->cflags®_NOSUB) | 150 | 0 | nmatch = 0; | 151 | 188k | if (eflags®_STARTEND) { | 152 | 188k | start = string + pmatch[0].rm_so; | 153 | 188k | stop = string + pmatch[0].rm_eo; | 154 | 188k | } else { | 155 | 0 | start = string; | 156 | 0 | stop = start + strlen(start); | 157 | 0 | } | 158 | 188k | if (stop < start) | 159 | 0 | return(REG_INVARG); | 160 | | | 161 | | /* prescreening; this does wonders for this rather slow code */ | 162 | 188k | if (g->must != NULL) { | 163 | 780M | for (dp = start; dp < stop; dp++) | 164 | 779M | if (*dp == g->must[0] && stop - dp >= g->mlen && | 165 | 779M | memcmp(dp, g->must, (size_t)g->mlen) == 0) | 166 | 16.2k | break; | 167 | 187k | if (dp == stop) /* we didn't find g->must */ | 168 | 171k | return(REG_NOMATCH); | 169 | 187k | } | 170 | | | 171 | | /* match struct setup */ | 172 | 17.3k | m->g = g; | 173 | 17.3k | m->eflags = eflags; | 174 | 17.3k | m->pmatch = NULL; | 175 | 17.3k | m->lastpos = NULL; | 176 | 17.3k | m->offp = string; | 177 | 17.3k | m->beginp = start; | 178 | 17.3k | m->endp = stop; | 179 | 17.3k | STATESETUP(m, 4); | 180 | 17.3k | SETUP(m->st); | 181 | 17.3k | SETUP(m->fresh); | 182 | 17.3k | SETUP(m->tmp); | 183 | 17.3k | SETUP(m->empty); | 184 | 17.3k | CLEAR(m->empty); | 185 | | | 186 | | /* this loop does only one repetition except for backrefs */ | 187 | 17.3k | for (;;) { | 188 | 17.3k | endp = fast(m, start, stop, gf, gl); | 189 | 17.3k | if (endp == NULL) { /* a miss */ | 190 | 14.5k | free(m->pmatch); | 191 | 14.5k | free((void*)m->lastpos); | 192 | 14.5k | STATETEARDOWN(m); | 193 | 14.5k | return(REG_NOMATCH); | 194 | 14.5k | } | 195 | 2.85k | if (nmatch == 0 && !g->backrefs) | 196 | 83 | break; /* no further info needed */ | 197 | | | 198 | | /* where? */ | 199 | 2.77k | assert(m->coldp != NULL); | 200 | 2.77k | for (;;) { | 201 | 2.77k | NOTE("finding start"); | 202 | 2.77k | endp = slow(m, m->coldp, stop, gf, gl); | 203 | 2.77k | if (endp != NULL) | 204 | 2.77k | break; | 205 | 0 | assert(m->coldp < m->endp); | 206 | 0 | m->coldp++; | 207 | 0 | } | 208 | 2.77k | if (nmatch == 1 && !g->backrefs) | 209 | 0 | break; /* no further info needed */ | 210 | | | 211 | | /* oh my, they want the subexpressions... */ | 212 | 2.77k | if (m->pmatch == NULL) | 213 | 2.77k | m->pmatch = (llvm_regmatch_t *)malloc((m->g->nsub + 1) * | 214 | 2.77k | sizeof(llvm_regmatch_t)); | 215 | 2.77k | if (m->pmatch == NULL) { | 216 | 0 | STATETEARDOWN(m); | 217 | 0 | return(REG_ESPACE); | 218 | 0 | } | 219 | 13.5k | for (i = 1; i <= m->g->nsub; i++) | 220 | 10.7k | m->pmatch[i].rm_so = m->pmatch[i].rm_eo = -1; | 221 | 2.77k | if (!g->backrefs && !(m->eflags®_BACKR)) { | 222 | 2.77k | NOTE("dissecting"); | 223 | 2.77k | dp = dissect(m, m->coldp, endp, gf, gl); | 224 | 2.77k | } else { | 225 | 0 | if (g->nplus > 0 && m->lastpos == NULL) | 226 | 0 | m->lastpos = (const char **)malloc((g->nplus+1) * | 227 | 0 | sizeof(char *)); | 228 | 0 | if (g->nplus > 0 && m->lastpos == NULL) { | 229 | 0 | free(m->pmatch); | 230 | 0 | STATETEARDOWN(m); | 231 | 0 | return(REG_ESPACE); | 232 | 0 | } | 233 | 0 | NOTE("backref dissect"); | 234 | 0 | dp = backref(m, m->coldp, endp, gf, gl, (sopno)0, 0); | 235 | 0 | } | 236 | 2.77k | if (dp != NULL) | 237 | 2.77k | break; | 238 | | | 239 | | /* uh-oh... we couldn't find a subexpression-level match */ | 240 | 0 | assert(g->backrefs); /* must be back references doing it */ | 241 | 0 | assert(g->nplus == 0 || m->lastpos != NULL); | 242 | 0 | for (;;) { | 243 | 0 | if (dp != NULL || endp <= m->coldp) | 244 | 0 | break; /* defeat */ | 245 | 0 | NOTE("backoff"); | 246 | 0 | endp = slow(m, m->coldp, endp-1, gf, gl); | 247 | 0 | if (endp == NULL) | 248 | 0 | break; /* defeat */ | 249 | | /* try it on a shorter possibility */ | 250 | | #ifndef NDEBUG | 251 | | for (i = 1; i <= m->g->nsub; i++) { | 252 | | assert(m->pmatch[i].rm_so == -1); | 253 | | assert(m->pmatch[i].rm_eo == -1); | 254 | | } | 255 | | #endif | 256 | 0 | NOTE("backoff dissect"); | 257 | 0 | dp = backref(m, m->coldp, endp, gf, gl, (sopno)0, 0); | 258 | 0 | } | 259 | 0 | assert(dp == NULL || dp == endp); | 260 | 0 | if (dp != NULL) /* found a shorter one */ | 261 | 0 | break; | 262 | | | 263 | | /* despite initial appearances, there is no match here */ | 264 | 0 | NOTE("false alarm"); | 265 | 0 | if (m->coldp == stop) | 266 | 0 | break; | 267 | 0 | start = m->coldp + 1; /* recycle starting later */ | 268 | 0 | } | 269 | | | 270 | | /* fill in the details if requested */ | 271 | 2.85k | if (nmatch > 0) { | 272 | 2.77k | pmatch[0].rm_so = m->coldp - m->offp; | 273 | 2.77k | pmatch[0].rm_eo = endp - m->offp; | 274 | 2.77k | } | 275 | 2.85k | if (nmatch > 1) { | 276 | 2.77k | assert(m->pmatch != NULL); | 277 | 13.5k | for (i = 1; i < nmatch; i++) | 278 | 10.7k | if (i <= m->g->nsub) | 279 | 10.7k | pmatch[i] = m->pmatch[i]; | 280 | 0 | else { | 281 | 0 | pmatch[i].rm_so = -1; | 282 | 0 | pmatch[i].rm_eo = -1; | 283 | 0 | } | 284 | 2.77k | } | 285 | | | 286 | 2.85k | if (m->pmatch != NULL) | 287 | 2.77k | free((char *)m->pmatch); | 288 | 2.85k | if (m->lastpos != NULL) | 289 | 0 | free((char *)m->lastpos); | 290 | 2.85k | STATETEARDOWN(m); | 291 | 2.85k | return(0); | 292 | 17.3k | } |
Unexecuted instantiation: regexec.c:lmatcher |
293 | | |
294 | | /* Step back from "stop" to a position where the strip startst..stopst might |
295 | | * match. This can always conservatively return "stop - 1", but may return an |
296 | | * earlier position if matches at later positions are impossible. */ |
297 | | static const char * |
298 | | step_back(struct re_guts *g, const char *start, const char *stop, sopno startst, |
299 | | sopno stopst) |
300 | 0 | { |
301 | | /* Always step back at least one character. */ |
302 | 0 | assert(stop > start); |
303 | 0 | const char *res = stop - 1; |
304 | | |
305 | | /* Check whether the strip startst..stropst starts with a fixed character, |
306 | | * ignoring any closing parentheses. If not, return a conservative result. */ |
307 | 0 | for (;;) { |
308 | 0 | if (startst >= stopst) |
309 | 0 | return res; |
310 | 0 | if (OP(g->strip[startst]) != ORPAREN) |
311 | 0 | break; |
312 | 0 | startst++; |
313 | 0 | } |
314 | 0 | if (OP(g->strip[startst]) != OCHAR) |
315 | 0 | return res; |
316 | | |
317 | | /* Find the character that starts the following match. */ |
318 | 0 | char ch = OPND(g->strip[startst]); |
319 | 0 | for (; res != start; --res) { |
320 | 0 | if (*res == ch) { |
321 | | /* Try to check the next fixed character as well. */ |
322 | 0 | sopno nextst = startst + 1; |
323 | 0 | const char *next = res + 1; |
324 | 0 | if (nextst >= stopst || OP(g->strip[nextst]) != OCHAR || next >= stop || |
325 | 0 | *next == (char)OPND(g->strip[nextst])) |
326 | 0 | break; |
327 | 0 | } |
328 | 0 | } |
329 | 0 | return res; |
330 | 0 | } Unexecuted instantiation: regexec.c:sstep_back Unexecuted instantiation: regexec.c:lstep_back |
331 | | |
332 | | /* |
333 | | - dissect - figure out what matched what, no back references |
334 | | */ |
335 | | static const char * /* == stop (success) always */ |
336 | | dissect(struct match *m, const char *start, const char *stop, sopno startst, |
337 | | sopno stopst) |
338 | 23.1k | { |
339 | 23.1k | int i; |
340 | 23.1k | sopno ss; /* start sop of current subRE */ |
341 | 23.1k | sopno es; /* end sop of current subRE */ |
342 | 23.1k | const char *sp; /* start of string matched by it */ |
343 | 23.1k | const char *stp; /* string matched by it cannot pass here */ |
344 | 23.1k | const char *rest; /* start of rest of string */ |
345 | 23.1k | const char *tail; /* string unmatched by rest of RE */ |
346 | 23.1k | sopno ssub; /* start sop of subsubRE */ |
347 | 23.1k | sopno esub; /* end sop of subsubRE */ |
348 | 23.1k | const char *ssp; /* start of string matched by subsubRE */ |
349 | 23.1k | const char *sep; /* end of string matched by subsubRE */ |
350 | 23.1k | const char *oldssp; /* previous ssp */ |
351 | | |
352 | 23.1k | AT("diss", start, stop, startst, stopst); |
353 | 23.1k | sp = start; |
354 | 90.3k | for (ss = startst; ss < stopst; ss = es) { |
355 | | /* identify end of subRE */ |
356 | 67.1k | es = ss; |
357 | 67.1k | switch (OP(m->g->strip[es])) { |
358 | 10.0k | case OPLUS_: |
359 | 20.0k | case OQUEST_: |
360 | 20.0k | es += OPND(m->g->strip[es]); |
361 | 20.0k | break; |
362 | 323 | case OCH_: |
363 | 969 | while (OP(m->g->strip[es]) != O_CH) |
364 | 646 | es += OPND(m->g->strip[es]); |
365 | 323 | break; |
366 | 67.1k | } |
367 | 67.1k | es++; |
368 | | |
369 | | /* figure out what it matched */ |
370 | 67.1k | switch (OP(m->g->strip[ss])) { |
371 | 0 | case OEND: |
372 | 0 | assert(nope); |
373 | 0 | break; |
374 | 10.0k | case OCHAR: |
375 | 10.0k | sp++; |
376 | 10.0k | break; |
377 | 2.45k | case OBOL: |
378 | 5.21k | case OEOL: |
379 | 5.21k | case OBOW: |
380 | 5.21k | case OEOW: |
381 | 5.21k | break; |
382 | 323 | case OANY: |
383 | 10.6k | case OANYOF: |
384 | 10.6k | sp++; |
385 | 10.6k | break; |
386 | 0 | case OBACK_: |
387 | 0 | case O_BACK: |
388 | 0 | assert(nope); |
389 | 0 | break; |
390 | | /* cases where length of match is hard to find */ |
391 | 10.0k | case OQUEST_: |
392 | 10.0k | stp = stop; |
393 | 10.0k | for (;;) { |
394 | | /* how long could this one be? */ |
395 | 10.0k | rest = slow(m, sp, stp, ss, es); |
396 | 10.0k | assert(rest != NULL); /* it did match */ |
397 | | /* could the rest match the rest? */ |
398 | 10.0k | tail = slow(m, rest, stop, es, stopst); |
399 | 10.0k | if (tail == stop) |
400 | 10.0k | break; /* yes! */ |
401 | | /* no -- try a shorter match for this one */ |
402 | 0 | stp = step_back(m->g, sp, rest, es, stopst); |
403 | 0 | assert(stp >= sp); /* it did work */ |
404 | 0 | } |
405 | 10.0k | ssub = ss + 1; |
406 | 10.0k | esub = es - 1; |
407 | | /* did innards match? */ |
408 | 10.0k | if (slow(m, sp, rest, ssub, esub) != NULL) { |
409 | 10.0k | const char *dp = dissect(m, sp, rest, ssub, esub); |
410 | 10.0k | (void)dp; /* avoid warning if assertions off */ |
411 | 10.0k | assert(dp == rest); |
412 | 10.0k | } else /* no */ |
413 | 40 | assert(sp == rest); |
414 | 10.0k | sp = rest; |
415 | 10.0k | break; |
416 | 10.0k | case OPLUS_: |
417 | 10.0k | stp = stop; |
418 | 10.0k | for (;;) { |
419 | | /* how long could this one be? */ |
420 | 10.0k | rest = slow(m, sp, stp, ss, es); |
421 | 10.0k | assert(rest != NULL); /* it did match */ |
422 | | /* could the rest match the rest? */ |
423 | 10.0k | tail = slow(m, rest, stop, es, stopst); |
424 | 10.0k | if (tail == stop) |
425 | 10.0k | break; /* yes! */ |
426 | | /* no -- try a shorter match for this one */ |
427 | 0 | stp = step_back(m->g, sp, rest, es, stopst); |
428 | 0 | assert(stp >= sp); /* it did work */ |
429 | 0 | } |
430 | 10.0k | ssub = ss + 1; |
431 | 10.0k | esub = es - 1; |
432 | 10.0k | ssp = sp; |
433 | 10.0k | oldssp = ssp; |
434 | 69.4k | for (;;) { /* find last match of innards */ |
435 | 69.4k | sep = slow(m, ssp, rest, ssub, esub); |
436 | 69.4k | if (sep == NULL || sep == ssp) |
437 | 10.0k | break; /* failed or matched null */ |
438 | 59.3k | oldssp = ssp; /* on to next try */ |
439 | 59.3k | ssp = sep; |
440 | 59.3k | } |
441 | 10.0k | if (sep == NULL) { |
442 | | /* last successful match */ |
443 | 10.0k | sep = ssp; |
444 | 10.0k | ssp = oldssp; |
445 | 10.0k | } |
446 | 10.0k | assert(sep == rest); /* must exhaust substring */ |
447 | 10.0k | assert(slow(m, ssp, sep, ssub, esub) == rest); |
448 | 10.0k | { |
449 | 10.0k | const char *dp = dissect(m, ssp, sep, ssub, esub); |
450 | 10.0k | (void)dp; /* avoid warning if assertions off */ |
451 | 10.0k | assert(dp == sep); |
452 | 10.0k | } |
453 | 10.0k | sp = rest; |
454 | 10.0k | break; |
455 | 323 | case OCH_: |
456 | 323 | stp = stop; |
457 | 323 | for (;;) { |
458 | | /* how long could this one be? */ |
459 | 323 | rest = slow(m, sp, stp, ss, es); |
460 | 323 | assert(rest != NULL); /* it did match */ |
461 | | /* could the rest match the rest? */ |
462 | 323 | tail = slow(m, rest, stop, es, stopst); |
463 | 323 | if (tail == stop) |
464 | 323 | break; /* yes! */ |
465 | | /* no -- try a shorter match for this one */ |
466 | 0 | stp = rest - 1; |
467 | 0 | assert(stp >= sp); /* it did work */ |
468 | 0 | } |
469 | 323 | ssub = ss + 1; |
470 | 323 | esub = ss + OPND(m->g->strip[ss]) - 1; |
471 | 323 | assert(OP(m->g->strip[esub]) == OOR1); |
472 | 620 | for (;;) { /* find first matching branch */ |
473 | 620 | if (slow(m, sp, rest, ssub, esub) == rest) |
474 | 323 | break; /* it matched all of it */ |
475 | | /* that one missed, try next one */ |
476 | 297 | assert(OP(m->g->strip[esub]) == OOR1); |
477 | 297 | esub++; |
478 | 297 | assert(OP(m->g->strip[esub]) == OOR2); |
479 | 297 | ssub = esub + 1; |
480 | 297 | esub += OPND(m->g->strip[esub]); |
481 | 297 | if (OP(m->g->strip[esub]) == OOR2) |
482 | 0 | esub--; |
483 | 297 | else |
484 | 297 | assert(OP(m->g->strip[esub]) == O_CH); |
485 | 297 | } |
486 | 323 | { |
487 | 323 | const char *dp = dissect(m, sp, rest, ssub, esub); |
488 | 323 | (void)dp; /* avoid warning if assertions off */ |
489 | 323 | assert(dp == rest); |
490 | 323 | } |
491 | 323 | sp = rest; |
492 | 323 | break; |
493 | 0 | case O_PLUS: |
494 | 0 | case O_QUEST: |
495 | 0 | case OOR1: |
496 | 0 | case OOR2: |
497 | 0 | case O_CH: |
498 | 0 | assert(nope); |
499 | 0 | break; |
500 | 10.4k | case OLPAREN: |
501 | 10.4k | i = OPND(m->g->strip[ss]); |
502 | 10.4k | assert(0 < i && i <= m->g->nsub); |
503 | 10.4k | m->pmatch[i].rm_so = sp - m->offp; |
504 | 10.4k | break; |
505 | 10.4k | case ORPAREN: |
506 | 10.4k | i = OPND(m->g->strip[ss]); |
507 | 10.4k | assert(0 < i && i <= m->g->nsub); |
508 | 10.4k | m->pmatch[i].rm_eo = sp - m->offp; |
509 | 10.4k | break; |
510 | 0 | default: /* uh oh */ |
511 | 0 | assert(nope); |
512 | 0 | break; |
513 | 67.1k | } |
514 | 67.1k | } |
515 | | |
516 | 23.1k | assert(sp == stop); |
517 | 23.1k | return(sp); |
518 | 23.1k | } Line | Count | Source | 338 | 23.1k | { | 339 | 23.1k | int i; | 340 | 23.1k | sopno ss; /* start sop of current subRE */ | 341 | 23.1k | sopno es; /* end sop of current subRE */ | 342 | 23.1k | const char *sp; /* start of string matched by it */ | 343 | 23.1k | const char *stp; /* string matched by it cannot pass here */ | 344 | 23.1k | const char *rest; /* start of rest of string */ | 345 | 23.1k | const char *tail; /* string unmatched by rest of RE */ | 346 | 23.1k | sopno ssub; /* start sop of subsubRE */ | 347 | 23.1k | sopno esub; /* end sop of subsubRE */ | 348 | 23.1k | const char *ssp; /* start of string matched by subsubRE */ | 349 | 23.1k | const char *sep; /* end of string matched by subsubRE */ | 350 | 23.1k | const char *oldssp; /* previous ssp */ | 351 | | | 352 | 23.1k | AT("diss", start, stop, startst, stopst); | 353 | 23.1k | sp = start; | 354 | 90.3k | for (ss = startst; ss < stopst; ss = es) { | 355 | | /* identify end of subRE */ | 356 | 67.1k | es = ss; | 357 | 67.1k | switch (OP(m->g->strip[es])) { | 358 | 10.0k | case OPLUS_: | 359 | 20.0k | case OQUEST_: | 360 | 20.0k | es += OPND(m->g->strip[es]); | 361 | 20.0k | break; | 362 | 323 | case OCH_: | 363 | 969 | while (OP(m->g->strip[es]) != O_CH) | 364 | 646 | es += OPND(m->g->strip[es]); | 365 | 323 | break; | 366 | 67.1k | } | 367 | 67.1k | es++; | 368 | | | 369 | | /* figure out what it matched */ | 370 | 67.1k | switch (OP(m->g->strip[ss])) { | 371 | 0 | case OEND: | 372 | 0 | assert(nope); | 373 | 0 | break; | 374 | 10.0k | case OCHAR: | 375 | 10.0k | sp++; | 376 | 10.0k | break; | 377 | 2.45k | case OBOL: | 378 | 5.21k | case OEOL: | 379 | 5.21k | case OBOW: | 380 | 5.21k | case OEOW: | 381 | 5.21k | break; | 382 | 323 | case OANY: | 383 | 10.6k | case OANYOF: | 384 | 10.6k | sp++; | 385 | 10.6k | break; | 386 | 0 | case OBACK_: | 387 | 0 | case O_BACK: | 388 | 0 | assert(nope); | 389 | 0 | break; | 390 | | /* cases where length of match is hard to find */ | 391 | 10.0k | case OQUEST_: | 392 | 10.0k | stp = stop; | 393 | 10.0k | for (;;) { | 394 | | /* how long could this one be? */ | 395 | 10.0k | rest = slow(m, sp, stp, ss, es); | 396 | 10.0k | assert(rest != NULL); /* it did match */ | 397 | | /* could the rest match the rest? */ | 398 | 10.0k | tail = slow(m, rest, stop, es, stopst); | 399 | 10.0k | if (tail == stop) | 400 | 10.0k | break; /* yes! */ | 401 | | /* no -- try a shorter match for this one */ | 402 | 0 | stp = step_back(m->g, sp, rest, es, stopst); | 403 | 0 | assert(stp >= sp); /* it did work */ | 404 | 0 | } | 405 | 10.0k | ssub = ss + 1; | 406 | 10.0k | esub = es - 1; | 407 | | /* did innards match? */ | 408 | 10.0k | if (slow(m, sp, rest, ssub, esub) != NULL) { | 409 | 10.0k | const char *dp = dissect(m, sp, rest, ssub, esub); | 410 | 10.0k | (void)dp; /* avoid warning if assertions off */ | 411 | 10.0k | assert(dp == rest); | 412 | 10.0k | } else /* no */ | 413 | 40 | assert(sp == rest); | 414 | 10.0k | sp = rest; | 415 | 10.0k | break; | 416 | 10.0k | case OPLUS_: | 417 | 10.0k | stp = stop; | 418 | 10.0k | for (;;) { | 419 | | /* how long could this one be? */ | 420 | 10.0k | rest = slow(m, sp, stp, ss, es); | 421 | 10.0k | assert(rest != NULL); /* it did match */ | 422 | | /* could the rest match the rest? */ | 423 | 10.0k | tail = slow(m, rest, stop, es, stopst); | 424 | 10.0k | if (tail == stop) | 425 | 10.0k | break; /* yes! */ | 426 | | /* no -- try a shorter match for this one */ | 427 | 0 | stp = step_back(m->g, sp, rest, es, stopst); | 428 | 0 | assert(stp >= sp); /* it did work */ | 429 | 0 | } | 430 | 10.0k | ssub = ss + 1; | 431 | 10.0k | esub = es - 1; | 432 | 10.0k | ssp = sp; | 433 | 10.0k | oldssp = ssp; | 434 | 69.4k | for (;;) { /* find last match of innards */ | 435 | 69.4k | sep = slow(m, ssp, rest, ssub, esub); | 436 | 69.4k | if (sep == NULL || sep == ssp) | 437 | 10.0k | break; /* failed or matched null */ | 438 | 59.3k | oldssp = ssp; /* on to next try */ | 439 | 59.3k | ssp = sep; | 440 | 59.3k | } | 441 | 10.0k | if (sep == NULL) { | 442 | | /* last successful match */ | 443 | 10.0k | sep = ssp; | 444 | 10.0k | ssp = oldssp; | 445 | 10.0k | } | 446 | 10.0k | assert(sep == rest); /* must exhaust substring */ | 447 | 10.0k | assert(slow(m, ssp, sep, ssub, esub) == rest); | 448 | 10.0k | { | 449 | 10.0k | const char *dp = dissect(m, ssp, sep, ssub, esub); | 450 | 10.0k | (void)dp; /* avoid warning if assertions off */ | 451 | 10.0k | assert(dp == sep); | 452 | 10.0k | } | 453 | 10.0k | sp = rest; | 454 | 10.0k | break; | 455 | 323 | case OCH_: | 456 | 323 | stp = stop; | 457 | 323 | for (;;) { | 458 | | /* how long could this one be? */ | 459 | 323 | rest = slow(m, sp, stp, ss, es); | 460 | 323 | assert(rest != NULL); /* it did match */ | 461 | | /* could the rest match the rest? */ | 462 | 323 | tail = slow(m, rest, stop, es, stopst); | 463 | 323 | if (tail == stop) | 464 | 323 | break; /* yes! */ | 465 | | /* no -- try a shorter match for this one */ | 466 | 0 | stp = rest - 1; | 467 | 0 | assert(stp >= sp); /* it did work */ | 468 | 0 | } | 469 | 323 | ssub = ss + 1; | 470 | 323 | esub = ss + OPND(m->g->strip[ss]) - 1; | 471 | 323 | assert(OP(m->g->strip[esub]) == OOR1); | 472 | 620 | for (;;) { /* find first matching branch */ | 473 | 620 | if (slow(m, sp, rest, ssub, esub) == rest) | 474 | 323 | break; /* it matched all of it */ | 475 | | /* that one missed, try next one */ | 476 | 297 | assert(OP(m->g->strip[esub]) == OOR1); | 477 | 297 | esub++; | 478 | 297 | assert(OP(m->g->strip[esub]) == OOR2); | 479 | 297 | ssub = esub + 1; | 480 | 297 | esub += OPND(m->g->strip[esub]); | 481 | 297 | if (OP(m->g->strip[esub]) == OOR2) | 482 | 0 | esub--; | 483 | 297 | else | 484 | 297 | assert(OP(m->g->strip[esub]) == O_CH); | 485 | 297 | } | 486 | 323 | { | 487 | 323 | const char *dp = dissect(m, sp, rest, ssub, esub); | 488 | 323 | (void)dp; /* avoid warning if assertions off */ | 489 | 323 | assert(dp == rest); | 490 | 323 | } | 491 | 323 | sp = rest; | 492 | 323 | break; | 493 | 0 | case O_PLUS: | 494 | 0 | case O_QUEST: | 495 | 0 | case OOR1: | 496 | 0 | case OOR2: | 497 | 0 | case O_CH: | 498 | 0 | assert(nope); | 499 | 0 | break; | 500 | 10.4k | case OLPAREN: | 501 | 10.4k | i = OPND(m->g->strip[ss]); | 502 | 10.4k | assert(0 < i && i <= m->g->nsub); | 503 | 10.4k | m->pmatch[i].rm_so = sp - m->offp; | 504 | 10.4k | break; | 505 | 10.4k | case ORPAREN: | 506 | 10.4k | i = OPND(m->g->strip[ss]); | 507 | 10.4k | assert(0 < i && i <= m->g->nsub); | 508 | 10.4k | m->pmatch[i].rm_eo = sp - m->offp; | 509 | 10.4k | break; | 510 | 0 | default: /* uh oh */ | 511 | 0 | assert(nope); | 512 | 0 | break; | 513 | 67.1k | } | 514 | 67.1k | } | 515 | | | 516 | 23.1k | assert(sp == stop); | 517 | 23.1k | return(sp); | 518 | 23.1k | } |
Unexecuted instantiation: regexec.c:ldissect |
519 | | |
520 | | /* |
521 | | - backref - figure out what matched what, figuring in back references |
522 | | */ |
523 | | static const char * /* == stop (success) or NULL (failure) */ |
524 | | backref(struct match *m, const char *start, const char *stop, sopno startst, |
525 | | sopno stopst, sopno lev, int rec) /* PLUS nesting level */ |
526 | 0 | { |
527 | 0 | int i; |
528 | 0 | sopno ss; /* start sop of current subRE */ |
529 | 0 | const char *sp; /* start of string matched by it */ |
530 | 0 | sopno ssub; /* start sop of subsubRE */ |
531 | 0 | sopno esub; /* end sop of subsubRE */ |
532 | 0 | const char *ssp; /* start of string matched by subsubRE */ |
533 | 0 | const char *dp; |
534 | 0 | size_t len; |
535 | 0 | int hard; |
536 | 0 | sop s; |
537 | 0 | llvm_regoff_t offsave; |
538 | 0 | cset *cs; |
539 | |
|
540 | 0 | AT("back", start, stop, startst, stopst); |
541 | 0 | sp = start; |
542 | | |
543 | | /* get as far as we can with easy stuff */ |
544 | 0 | hard = 0; |
545 | 0 | for (ss = startst; !hard && ss < stopst; ss++) |
546 | 0 | switch (OP(s = m->g->strip[ss])) { |
547 | 0 | case OCHAR: |
548 | 0 | if (sp == stop || *sp++ != (char)OPND(s)) |
549 | 0 | return(NULL); |
550 | 0 | break; |
551 | 0 | case OANY: |
552 | 0 | if (sp == stop) |
553 | 0 | return(NULL); |
554 | 0 | sp++; |
555 | 0 | break; |
556 | 0 | case OANYOF: |
557 | 0 | cs = &m->g->sets[OPND(s)]; |
558 | 0 | if (sp == stop || !CHIN(cs, *sp++)) |
559 | 0 | return(NULL); |
560 | 0 | break; |
561 | 0 | case OBOL: |
562 | 0 | if ( (sp == m->beginp && !(m->eflags®_NOTBOL)) || |
563 | 0 | (sp < m->endp && *(sp-1) == '\n' && |
564 | 0 | (m->g->cflags®_NEWLINE)) ) |
565 | 0 | { /* yes */ } |
566 | 0 | else |
567 | 0 | return(NULL); |
568 | 0 | break; |
569 | 0 | case OEOL: |
570 | 0 | if ( (sp == m->endp && !(m->eflags®_NOTEOL)) || |
571 | 0 | (sp < m->endp && *sp == '\n' && |
572 | 0 | (m->g->cflags®_NEWLINE)) ) |
573 | 0 | { /* yes */ } |
574 | 0 | else |
575 | 0 | return(NULL); |
576 | 0 | break; |
577 | 0 | case OBOW: |
578 | 0 | if (( (sp == m->beginp && !(m->eflags®_NOTBOL)) || |
579 | 0 | (sp < m->endp && *(sp-1) == '\n' && |
580 | 0 | (m->g->cflags®_NEWLINE)) || |
581 | 0 | (sp > m->beginp && |
582 | 0 | !ISWORD(*(sp-1))) ) && |
583 | 0 | (sp < m->endp && ISWORD(*sp)) ) |
584 | 0 | { /* yes */ } |
585 | 0 | else |
586 | 0 | return(NULL); |
587 | 0 | break; |
588 | 0 | case OEOW: |
589 | 0 | if (( (sp == m->endp && !(m->eflags®_NOTEOL)) || |
590 | 0 | (sp < m->endp && *sp == '\n' && |
591 | 0 | (m->g->cflags®_NEWLINE)) || |
592 | 0 | (sp < m->endp && !ISWORD(*sp)) ) && |
593 | 0 | (sp > m->beginp && ISWORD(*(sp-1))) ) |
594 | 0 | { /* yes */ } |
595 | 0 | else |
596 | 0 | return(NULL); |
597 | 0 | break; |
598 | 0 | case O_QUEST: |
599 | 0 | case O_CH: |
600 | 0 | break; |
601 | 0 | case OOR1: /* matches null but needs to skip */ |
602 | 0 | ss++; |
603 | 0 | s = m->g->strip[ss]; |
604 | 0 | do { |
605 | 0 | assert(OP(s) == OOR2); |
606 | 0 | ss += OPND(s); |
607 | 0 | } while (OP(s = m->g->strip[ss]) != O_CH); |
608 | | /* note that the ss++ gets us past the O_CH */ |
609 | 0 | break; |
610 | 0 | default: /* have to make a choice */ |
611 | 0 | hard = 1; |
612 | 0 | break; |
613 | 0 | } |
614 | 0 | if (!hard) { /* that was it! */ |
615 | 0 | if (sp != stop) |
616 | 0 | return(NULL); |
617 | 0 | return(sp); |
618 | 0 | } |
619 | 0 | ss--; /* adjust for the for's final increment */ |
620 | | |
621 | | /* the hard stuff */ |
622 | 0 | AT("hard", sp, stop, ss, stopst); |
623 | 0 | s = m->g->strip[ss]; |
624 | 0 | switch (OP(s)) { |
625 | 0 | case OBACK_: /* the vilest depths */ |
626 | 0 | i = OPND(s); |
627 | 0 | assert(0 < i && i <= m->g->nsub); |
628 | 0 | if (m->pmatch[i].rm_eo == -1) |
629 | 0 | return(NULL); |
630 | 0 | assert(m->pmatch[i].rm_so != -1); |
631 | 0 | len = m->pmatch[i].rm_eo - m->pmatch[i].rm_so; |
632 | 0 | if (len == 0 && rec++ > MAX_RECURSION) |
633 | 0 | return(NULL); |
634 | 0 | assert(stop - m->beginp >= len); |
635 | 0 | if (sp > stop - len) |
636 | 0 | return(NULL); /* not enough left to match */ |
637 | 0 | ssp = m->offp + m->pmatch[i].rm_so; |
638 | 0 | if (memcmp(sp, ssp, len) != 0) |
639 | 0 | return(NULL); |
640 | 0 | while (m->g->strip[ss] != SOP(O_BACK, i)) |
641 | 0 | ss++; |
642 | 0 | return(backref(m, sp+len, stop, ss+1, stopst, lev, rec)); |
643 | 0 | break; |
644 | 0 | case OQUEST_: /* to null or not */ |
645 | 0 | dp = backref(m, sp, stop, ss+1, stopst, lev, rec); |
646 | 0 | if (dp != NULL) |
647 | 0 | return(dp); /* not */ |
648 | 0 | return(backref(m, sp, stop, ss+OPND(s)+1, stopst, lev, rec)); |
649 | 0 | break; |
650 | 0 | case OPLUS_: |
651 | 0 | assert(m->lastpos != NULL); |
652 | 0 | assert(lev+1 <= m->g->nplus); |
653 | 0 | m->lastpos[lev+1] = sp; |
654 | 0 | return(backref(m, sp, stop, ss+1, stopst, lev+1, rec)); |
655 | 0 | break; |
656 | 0 | case O_PLUS: |
657 | 0 | if (sp == m->lastpos[lev]) /* last pass matched null */ |
658 | 0 | return(backref(m, sp, stop, ss+1, stopst, lev-1, rec)); |
659 | | /* try another pass */ |
660 | 0 | m->lastpos[lev] = sp; |
661 | 0 | dp = backref(m, sp, stop, ss-OPND(s)+1, stopst, lev, rec); |
662 | 0 | if (dp == NULL) |
663 | 0 | return(backref(m, sp, stop, ss+1, stopst, lev-1, rec)); |
664 | 0 | else |
665 | 0 | return(dp); |
666 | 0 | break; |
667 | 0 | case OCH_: /* find the right one, if any */ |
668 | 0 | ssub = ss + 1; |
669 | 0 | esub = ss + OPND(s) - 1; |
670 | 0 | assert(OP(m->g->strip[esub]) == OOR1); |
671 | 0 | for (;;) { /* find first matching branch */ |
672 | 0 | dp = backref(m, sp, stop, ssub, stopst, lev, rec); |
673 | 0 | if (dp != NULL) |
674 | 0 | return(dp); |
675 | | /* that one missed, try next one */ |
676 | 0 | if (OP(m->g->strip[esub]) == O_CH) |
677 | 0 | return(NULL); /* there is none */ |
678 | 0 | esub++; |
679 | 0 | assert(OP(m->g->strip[esub]) == OOR2); |
680 | 0 | ssub = esub + 1; |
681 | 0 | esub += OPND(m->g->strip[esub]); |
682 | 0 | if (OP(m->g->strip[esub]) == OOR2) |
683 | 0 | esub--; |
684 | 0 | else |
685 | 0 | assert(OP(m->g->strip[esub]) == O_CH); |
686 | 0 | } |
687 | 0 | break; |
688 | 0 | case OLPAREN: /* must undo assignment if rest fails */ |
689 | 0 | i = OPND(s); |
690 | 0 | assert(0 < i && i <= m->g->nsub); |
691 | 0 | offsave = m->pmatch[i].rm_so; |
692 | 0 | m->pmatch[i].rm_so = sp - m->offp; |
693 | 0 | dp = backref(m, sp, stop, ss+1, stopst, lev, rec); |
694 | 0 | if (dp != NULL) |
695 | 0 | return(dp); |
696 | 0 | m->pmatch[i].rm_so = offsave; |
697 | 0 | return(NULL); |
698 | 0 | break; |
699 | 0 | case ORPAREN: /* must undo assignment if rest fails */ |
700 | 0 | i = OPND(s); |
701 | 0 | assert(0 < i && i <= m->g->nsub); |
702 | 0 | offsave = m->pmatch[i].rm_eo; |
703 | 0 | m->pmatch[i].rm_eo = sp - m->offp; |
704 | 0 | dp = backref(m, sp, stop, ss+1, stopst, lev, rec); |
705 | 0 | if (dp != NULL) |
706 | 0 | return(dp); |
707 | 0 | m->pmatch[i].rm_eo = offsave; |
708 | 0 | return(NULL); |
709 | 0 | break; |
710 | 0 | default: /* uh oh */ |
711 | 0 | assert(nope); |
712 | 0 | break; |
713 | 0 | } |
714 | | |
715 | | /* "can't happen" */ |
716 | 0 | assert(nope); |
717 | | /* NOTREACHED */ |
718 | 0 | return NULL; |
719 | 0 | } Unexecuted instantiation: regexec.c:sbackref Unexecuted instantiation: regexec.c:lbackref |
720 | | |
721 | | /* |
722 | | - fast - step through the string at top speed |
723 | | */ |
724 | | static const char * /* where tentative match ended, or NULL */ |
725 | | fast(struct match *m, const char *start, const char *stop, sopno startst, |
726 | | sopno stopst) |
727 | 17.3k | { |
728 | 17.3k | states st = m->st; |
729 | 17.3k | states fresh = m->fresh; |
730 | 17.3k | states tmp = m->tmp; |
731 | 17.3k | const char *p = start; |
732 | 17.3k | int c = (start == m->beginp) ? OUT : *(start-1); |
733 | 17.3k | int lastc; /* previous c */ |
734 | 17.3k | int flagch; |
735 | 17.3k | int i; |
736 | 17.3k | const char *coldp; /* last p after which no match was underway */ |
737 | | |
738 | 17.3k | CLEAR(st); |
739 | 17.3k | SET1(st, startst); |
740 | 17.3k | st = step(m->g, startst, stopst, st, NOTHING, st); |
741 | 17.3k | ASSIGN(fresh, st); |
742 | 17.3k | SP("start", st, *p); |
743 | 17.3k | coldp = NULL; |
744 | 133M | for (;;) { |
745 | | /* next character */ |
746 | 133M | lastc = c; |
747 | 133M | c = (p == m->endp) ? OUT : *p; |
748 | 133M | if (EQ(st, fresh)) |
749 | 130M | coldp = p; |
750 | | |
751 | | /* is there an EOL and/or BOL between lastc and c? */ |
752 | 133M | flagch = '\0'; |
753 | 133M | i = 0; |
754 | 133M | if ( (lastc == '\n' && m->g->cflags®_NEWLINE) || |
755 | 133M | (lastc == OUT && !(m->eflags®_NOTBOL)) ) { |
756 | 17.3k | flagch = BOL; |
757 | 17.3k | i = m->g->nbol; |
758 | 17.3k | } |
759 | 133M | if ( (c == '\n' && m->g->cflags®_NEWLINE) || |
760 | 133M | (c == OUT && !(m->eflags®_NOTEOL)) ) { |
761 | 17.3k | flagch = (flagch == BOL) ? BOLEOL : EOL; |
762 | 17.3k | i += m->g->neol; |
763 | 17.3k | } |
764 | 133M | if (i != 0) { |
765 | 42.7k | for (; i > 0; i--) |
766 | 21.3k | st = step(m->g, startst, stopst, st, flagch, st); |
767 | 21.3k | SP("boleol", st, c); |
768 | 21.3k | } |
769 | | |
770 | | /* how about a word boundary? */ |
771 | 133M | if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) && |
772 | 133M | (c != OUT && ISWORD(c)) ) { |
773 | 2.32M | flagch = BOW; |
774 | 2.32M | } |
775 | 133M | if ( (lastc != OUT && ISWORD(lastc)) && |
776 | 133M | (flagch == EOL || (c != OUT && !ISWORD(c))) ) { |
777 | 2.32M | flagch = EOW; |
778 | 2.32M | } |
779 | 133M | if (flagch == BOW || flagch == EOW) { |
780 | 4.65M | st = step(m->g, startst, stopst, st, flagch, st); |
781 | 4.65M | SP("boweow", st, c); |
782 | 4.65M | } |
783 | | |
784 | | /* are we done? */ |
785 | 133M | if (ISSET(st, stopst) || p == stop) |
786 | 17.3k | break; /* NOTE BREAK OUT */ |
787 | | |
788 | | /* no, we must deal with this character */ |
789 | 133M | ASSIGN(tmp, st); |
790 | 133M | ASSIGN(st, fresh); |
791 | 133M | assert(c != OUT); |
792 | 133M | st = step(m->g, startst, stopst, tmp, c, st); |
793 | 133M | SP("aft", st, c); |
794 | 133M | assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st)); |
795 | 133M | p++; |
796 | 133M | } |
797 | | |
798 | 17.3k | assert(coldp != NULL); |
799 | 17.3k | m->coldp = coldp; |
800 | 17.3k | if (ISSET(st, stopst)) |
801 | 2.85k | return(p+1); |
802 | 14.5k | else |
803 | 14.5k | return(NULL); |
804 | 17.3k | } Line | Count | Source | 727 | 17.3k | { | 728 | 17.3k | states st = m->st; | 729 | 17.3k | states fresh = m->fresh; | 730 | 17.3k | states tmp = m->tmp; | 731 | 17.3k | const char *p = start; | 732 | 17.3k | int c = (start == m->beginp) ? OUT : *(start-1); | 733 | 17.3k | int lastc; /* previous c */ | 734 | 17.3k | int flagch; | 735 | 17.3k | int i; | 736 | 17.3k | const char *coldp; /* last p after which no match was underway */ | 737 | | | 738 | 17.3k | CLEAR(st); | 739 | 17.3k | SET1(st, startst); | 740 | 17.3k | st = step(m->g, startst, stopst, st, NOTHING, st); | 741 | 17.3k | ASSIGN(fresh, st); | 742 | 17.3k | SP("start", st, *p); | 743 | 17.3k | coldp = NULL; | 744 | 133M | for (;;) { | 745 | | /* next character */ | 746 | 133M | lastc = c; | 747 | 133M | c = (p == m->endp) ? OUT : *p; | 748 | 133M | if (EQ(st, fresh)) | 749 | 130M | coldp = p; | 750 | | | 751 | | /* is there an EOL and/or BOL between lastc and c? */ | 752 | 133M | flagch = '\0'; | 753 | 133M | i = 0; | 754 | 133M | if ( (lastc == '\n' && m->g->cflags®_NEWLINE) || | 755 | 133M | (lastc == OUT && !(m->eflags®_NOTBOL)) ) { | 756 | 17.3k | flagch = BOL; | 757 | 17.3k | i = m->g->nbol; | 758 | 17.3k | } | 759 | 133M | if ( (c == '\n' && m->g->cflags®_NEWLINE) || | 760 | 133M | (c == OUT && !(m->eflags®_NOTEOL)) ) { | 761 | 17.3k | flagch = (flagch == BOL) ? BOLEOL : EOL; | 762 | 17.3k | i += m->g->neol; | 763 | 17.3k | } | 764 | 133M | if (i != 0) { | 765 | 42.7k | for (; i > 0; i--) | 766 | 21.3k | st = step(m->g, startst, stopst, st, flagch, st); | 767 | 21.3k | SP("boleol", st, c); | 768 | 21.3k | } | 769 | | | 770 | | /* how about a word boundary? */ | 771 | 133M | if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) && | 772 | 133M | (c != OUT && ISWORD(c)) ) { | 773 | 2.32M | flagch = BOW; | 774 | 2.32M | } | 775 | 133M | if ( (lastc != OUT && ISWORD(lastc)) && | 776 | 133M | (flagch == EOL || (c != OUT && !ISWORD(c))) ) { | 777 | 2.32M | flagch = EOW; | 778 | 2.32M | } | 779 | 133M | if (flagch == BOW || flagch == EOW) { | 780 | 4.65M | st = step(m->g, startst, stopst, st, flagch, st); | 781 | 4.65M | SP("boweow", st, c); | 782 | 4.65M | } | 783 | | | 784 | | /* are we done? */ | 785 | 133M | if (ISSET(st, stopst) || p == stop) | 786 | 17.3k | break; /* NOTE BREAK OUT */ | 787 | | | 788 | | /* no, we must deal with this character */ | 789 | 133M | ASSIGN(tmp, st); | 790 | 133M | ASSIGN(st, fresh); | 791 | 133M | assert(c != OUT); | 792 | 133M | st = step(m->g, startst, stopst, tmp, c, st); | 793 | 133M | SP("aft", st, c); | 794 | 133M | assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st)); | 795 | 133M | p++; | 796 | 133M | } | 797 | | | 798 | 17.3k | assert(coldp != NULL); | 799 | 17.3k | m->coldp = coldp; | 800 | 17.3k | if (ISSET(st, stopst)) | 801 | 2.85k | return(p+1); | 802 | 14.5k | else | 803 | 14.5k | return(NULL); | 804 | 17.3k | } |
Unexecuted instantiation: regexec.c:lfast |
805 | | |
806 | | /* |
807 | | - slow - step through the string more deliberately |
808 | | */ |
809 | | static const char * /* where it ended */ |
810 | | slow(struct match *m, const char *start, const char *stop, sopno startst, |
811 | | sopno stopst) |
812 | 123k | { |
813 | | /* Quickly skip over fixed character matches at the start. */ |
814 | 123k | const char *p = start; |
815 | 192k | for (; startst < stopst; ++startst) { |
816 | 177k | int hard = 0; |
817 | 177k | sop s = m->g->strip[startst]; |
818 | 177k | switch (OP(s)) { |
819 | 32.4k | case OLPAREN: |
820 | 42.5k | case ORPAREN: |
821 | | /* Not relevant here. */ |
822 | 42.5k | break; |
823 | 31.1k | case OCHAR: |
824 | 31.1k | if (p == stop || *p != (char)OPND(s)) |
825 | 5.13k | return NULL; |
826 | 26.0k | ++p; |
827 | 26.0k | break; |
828 | 103k | default: |
829 | 103k | hard = 1; |
830 | 103k | break; |
831 | 177k | } |
832 | 171k | if (hard) |
833 | 103k | break; |
834 | 171k | } |
835 | | |
836 | 118k | states st = m->st; |
837 | 118k | states empty = m->empty; |
838 | 118k | states tmp = m->tmp; |
839 | 118k | int c = (p == m->beginp) ? OUT : *(p-1); |
840 | 118k | int lastc; /* previous c */ |
841 | 118k | int flagch; |
842 | 118k | int i; |
843 | 118k | const char *matchp; /* last p at which a match ended */ |
844 | | |
845 | 118k | AT("slow", start, stop, startst, stopst); |
846 | 118k | CLEAR(st); |
847 | 118k | SET1(st, startst); |
848 | 118k | SP("sstart", st, *p); |
849 | 118k | st = step(m->g, startst, stopst, st, NOTHING, st); |
850 | 118k | matchp = NULL; |
851 | 1.52M | for (;;) { |
852 | | /* next character */ |
853 | 1.52M | lastc = c; |
854 | 1.52M | c = (p == m->endp) ? OUT : *p; |
855 | | |
856 | | /* is there an EOL and/or BOL between lastc and c? */ |
857 | 1.52M | flagch = '\0'; |
858 | 1.52M | i = 0; |
859 | 1.52M | if ( (lastc == '\n' && m->g->cflags®_NEWLINE) || |
860 | 1.52M | (lastc == OUT && !(m->eflags®_NOTBOL)) ) { |
861 | 2.47k | flagch = BOL; |
862 | 2.47k | i = m->g->nbol; |
863 | 2.47k | } |
864 | 1.52M | if ( (c == '\n' && m->g->cflags®_NEWLINE) || |
865 | 1.52M | (c == OUT && !(m->eflags®_NOTEOL)) ) { |
866 | 42.3k | flagch = (flagch == BOL) ? BOLEOL : EOL; |
867 | 42.3k | i += m->g->neol; |
868 | 42.3k | } |
869 | 1.52M | if (i != 0) { |
870 | 89.6k | for (; i > 0; i--) |
871 | 44.8k | st = step(m->g, startst, stopst, st, flagch, st); |
872 | 44.8k | SP("sboleol", st, c); |
873 | 44.8k | } |
874 | | |
875 | | /* how about a word boundary? */ |
876 | 1.52M | if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) && |
877 | 1.52M | (c != OUT && ISWORD(c)) ) { |
878 | 441k | flagch = BOW; |
879 | 441k | } |
880 | 1.52M | if ( (lastc != OUT && ISWORD(lastc)) && |
881 | 1.52M | (flagch == EOL || (c != OUT && !ISWORD(c))) ) { |
882 | 484k | flagch = EOW; |
883 | 484k | } |
884 | 1.52M | if (flagch == BOW || flagch == EOW) { |
885 | 925k | st = step(m->g, startst, stopst, st, flagch, st); |
886 | 925k | SP("sboweow", st, c); |
887 | 925k | } |
888 | | |
889 | | /* are we done? */ |
890 | 1.52M | if (ISSET(st, stopst)) |
891 | 964k | matchp = p; |
892 | 1.52M | if (EQ(st, empty) || p == stop) |
893 | 118k | break; /* NOTE BREAK OUT */ |
894 | | |
895 | | /* no, we must deal with this character */ |
896 | 1.40M | ASSIGN(tmp, st); |
897 | 1.40M | ASSIGN(st, empty); |
898 | 1.40M | assert(c != OUT); |
899 | 1.40M | st = step(m->g, startst, stopst, tmp, c, st); |
900 | 1.40M | SP("saft", st, c); |
901 | 1.40M | assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st)); |
902 | 1.40M | p++; |
903 | 1.40M | } |
904 | | |
905 | 118k | return(matchp); |
906 | 123k | } Line | Count | Source | 812 | 123k | { | 813 | | /* Quickly skip over fixed character matches at the start. */ | 814 | 123k | const char *p = start; | 815 | 192k | for (; startst < stopst; ++startst) { | 816 | 177k | int hard = 0; | 817 | 177k | sop s = m->g->strip[startst]; | 818 | 177k | switch (OP(s)) { | 819 | 32.4k | case OLPAREN: | 820 | 42.5k | case ORPAREN: | 821 | | /* Not relevant here. */ | 822 | 42.5k | break; | 823 | 31.1k | case OCHAR: | 824 | 31.1k | if (p == stop || *p != (char)OPND(s)) | 825 | 5.13k | return NULL; | 826 | 26.0k | ++p; | 827 | 26.0k | break; | 828 | 103k | default: | 829 | 103k | hard = 1; | 830 | 103k | break; | 831 | 177k | } | 832 | 171k | if (hard) | 833 | 103k | break; | 834 | 171k | } | 835 | | | 836 | 118k | states st = m->st; | 837 | 118k | states empty = m->empty; | 838 | 118k | states tmp = m->tmp; | 839 | 118k | int c = (p == m->beginp) ? OUT : *(p-1); | 840 | 118k | int lastc; /* previous c */ | 841 | 118k | int flagch; | 842 | 118k | int i; | 843 | 118k | const char *matchp; /* last p at which a match ended */ | 844 | | | 845 | 118k | AT("slow", start, stop, startst, stopst); | 846 | 118k | CLEAR(st); | 847 | 118k | SET1(st, startst); | 848 | 118k | SP("sstart", st, *p); | 849 | 118k | st = step(m->g, startst, stopst, st, NOTHING, st); | 850 | 118k | matchp = NULL; | 851 | 1.52M | for (;;) { | 852 | | /* next character */ | 853 | 1.52M | lastc = c; | 854 | 1.52M | c = (p == m->endp) ? OUT : *p; | 855 | | | 856 | | /* is there an EOL and/or BOL between lastc and c? */ | 857 | 1.52M | flagch = '\0'; | 858 | 1.52M | i = 0; | 859 | 1.52M | if ( (lastc == '\n' && m->g->cflags®_NEWLINE) || | 860 | 1.52M | (lastc == OUT && !(m->eflags®_NOTBOL)) ) { | 861 | 2.47k | flagch = BOL; | 862 | 2.47k | i = m->g->nbol; | 863 | 2.47k | } | 864 | 1.52M | if ( (c == '\n' && m->g->cflags®_NEWLINE) || | 865 | 1.52M | (c == OUT && !(m->eflags®_NOTEOL)) ) { | 866 | 42.3k | flagch = (flagch == BOL) ? BOLEOL : EOL; | 867 | 42.3k | i += m->g->neol; | 868 | 42.3k | } | 869 | 1.52M | if (i != 0) { | 870 | 89.6k | for (; i > 0; i--) | 871 | 44.8k | st = step(m->g, startst, stopst, st, flagch, st); | 872 | 44.8k | SP("sboleol", st, c); | 873 | 44.8k | } | 874 | | | 875 | | /* how about a word boundary? */ | 876 | 1.52M | if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) && | 877 | 1.52M | (c != OUT && ISWORD(c)) ) { | 878 | 441k | flagch = BOW; | 879 | 441k | } | 880 | 1.52M | if ( (lastc != OUT && ISWORD(lastc)) && | 881 | 1.52M | (flagch == EOL || (c != OUT && !ISWORD(c))) ) { | 882 | 484k | flagch = EOW; | 883 | 484k | } | 884 | 1.52M | if (flagch == BOW || flagch == EOW) { | 885 | 925k | st = step(m->g, startst, stopst, st, flagch, st); | 886 | 925k | SP("sboweow", st, c); | 887 | 925k | } | 888 | | | 889 | | /* are we done? */ | 890 | 1.52M | if (ISSET(st, stopst)) | 891 | 964k | matchp = p; | 892 | 1.52M | if (EQ(st, empty) || p == stop) | 893 | 118k | break; /* NOTE BREAK OUT */ | 894 | | | 895 | | /* no, we must deal with this character */ | 896 | 1.40M | ASSIGN(tmp, st); | 897 | 1.40M | ASSIGN(st, empty); | 898 | 1.40M | assert(c != OUT); | 899 | 1.40M | st = step(m->g, startst, stopst, tmp, c, st); | 900 | 1.40M | SP("saft", st, c); | 901 | 1.40M | assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st)); | 902 | 1.40M | p++; | 903 | 1.40M | } | 904 | | | 905 | 118k | return(matchp); | 906 | 123k | } |
Unexecuted instantiation: regexec.c:lslow |
907 | | |
908 | | |
909 | | /* |
910 | | - step - map set of states reachable before char to set reachable after |
911 | | */ |
912 | | static states |
913 | | step(struct re_guts *g, |
914 | | sopno start, /* start state within strip */ |
915 | | sopno stop, /* state after stop state within strip */ |
916 | | states bef, /* states reachable before */ |
917 | | int ch, /* character or NONCHAR code */ |
918 | | states aft) /* states already known reachable after */ |
919 | 140M | { |
920 | 140M | cset *cs; |
921 | 140M | sop s; |
922 | 140M | sopno pc; |
923 | 140M | onestate here; /* note, macros know this name */ |
924 | 140M | sopno look; |
925 | 140M | int i; |
926 | | |
927 | 3.23G | for (pc = start, INIT(here, pc); pc != stop; pc++, INC(here)) { |
928 | 3.09G | s = g->strip[pc]; |
929 | 3.09G | switch (OP(s)) { |
930 | 0 | case OEND: |
931 | 0 | assert(pc == stop-1); |
932 | 0 | break; |
933 | 567M | case OCHAR: |
934 | | /* only characters can match */ |
935 | 567M | assert(!NONCHAR(ch) || ch != (char)OPND(s)); |
936 | 567M | if (ch == (char)OPND(s)) |
937 | 10.0M | FWD(aft, bef, 1); |
938 | 567M | break; |
939 | 138M | case OBOL: |
940 | 138M | if (ch == BOL || ch == BOLEOL) |
941 | 19.4k | FWD(aft, bef, 1); |
942 | 138M | break; |
943 | 89.9M | case OEOL: |
944 | 89.9M | if (ch == EOL || ch == BOLEOL) |
945 | 12.6k | FWD(aft, bef, 1); |
946 | 89.9M | break; |
947 | 0 | case OBOW: |
948 | 0 | if (ch == BOW) |
949 | 0 | FWD(aft, bef, 1); |
950 | 0 | break; |
951 | 0 | case OEOW: |
952 | 0 | if (ch == EOW) |
953 | 0 | FWD(aft, bef, 1); |
954 | 0 | break; |
955 | 199k | case OANY: |
956 | 199k | if (!NONCHAR(ch)) |
957 | 132k | FWD(aft, bef, 1); |
958 | 199k | break; |
959 | 400M | case OANYOF: |
960 | 400M | cs = &g->sets[OPND(s)]; |
961 | 400M | if (!NONCHAR(ch) && CHIN(cs, ch)) |
962 | 92.7M | FWD(aft, bef, 1); |
963 | 400M | break; |
964 | 0 | case OBACK_: /* ignored here */ |
965 | 0 | case O_BACK: |
966 | 0 | FWD(aft, aft, 1); |
967 | 0 | break; |
968 | 168M | case OPLUS_: /* forward, this is just an empty */ |
969 | 168M | FWD(aft, aft, 1); |
970 | 168M | break; |
971 | 168M | case O_PLUS: /* both forward and back */ |
972 | 168M | FWD(aft, aft, 1); |
973 | 168M | i = ISSETBACK(aft, OPND(s)); |
974 | 168M | BACK(aft, aft, OPND(s)); |
975 | 168M | if (!i && ISSETBACK(aft, OPND(s))) { |
976 | | /* oho, must reconsider loop body */ |
977 | 5.07M | pc -= OPND(s) + 1; |
978 | 5.07M | INIT(here, pc); |
979 | 5.07M | } |
980 | 168M | break; |
981 | 40.0M | case OQUEST_: /* two branches, both forward */ |
982 | 40.0M | FWD(aft, aft, 1); |
983 | 40.0M | FWD(aft, aft, OPND(s)); |
984 | 40.0M | break; |
985 | 40.0M | case O_QUEST: /* just an empty */ |
986 | 40.0M | FWD(aft, aft, 1); |
987 | 40.0M | break; |
988 | 139M | case OLPAREN: /* not significant here */ |
989 | 280M | case ORPAREN: |
990 | 280M | FWD(aft, aft, 1); |
991 | 280M | break; |
992 | 283M | case OCH_: /* mark the first two branches */ |
993 | 283M | FWD(aft, aft, 1); |
994 | 283M | assert(OP(g->strip[pc+OPND(s)]) == OOR2); |
995 | 283M | FWD(aft, aft, OPND(s)); |
996 | 283M | break; |
997 | 317M | case OOR1: /* done a branch, find the O_CH */ |
998 | 317M | if (ISSTATEIN(aft, here)) { |
999 | 145k | for (look = 1; |
1000 | 291k | OP(s = g->strip[pc+look]) != O_CH; |
1001 | 145k | look += OPND(s)) |
1002 | 145k | assert(OP(s) == OOR2); |
1003 | 145k | FWD(aft, aft, look); |
1004 | 145k | } |
1005 | 317M | break; |
1006 | 317M | case OOR2: /* propagate OCH_'s marking */ |
1007 | 317M | FWD(aft, aft, 1); |
1008 | 317M | if (OP(g->strip[pc+OPND(s)]) != O_CH) { |
1009 | 33.5M | assert(OP(g->strip[pc+OPND(s)]) == OOR2); |
1010 | 33.5M | FWD(aft, aft, OPND(s)); |
1011 | 33.5M | } |
1012 | 317M | break; |
1013 | 283M | case O_CH: /* just empty */ |
1014 | 283M | FWD(aft, aft, 1); |
1015 | 283M | break; |
1016 | 0 | default: /* ooooops... */ |
1017 | 0 | assert(nope); |
1018 | 0 | break; |
1019 | 3.09G | } |
1020 | 3.09G | } |
1021 | | |
1022 | 140M | return(aft); |
1023 | 140M | } Line | Count | Source | 919 | 140M | { | 920 | 140M | cset *cs; | 921 | 140M | sop s; | 922 | 140M | sopno pc; | 923 | 140M | onestate here; /* note, macros know this name */ | 924 | 140M | sopno look; | 925 | 140M | int i; | 926 | | | 927 | 3.23G | for (pc = start, INIT(here, pc); pc != stop; pc++, INC(here)) { | 928 | 3.09G | s = g->strip[pc]; | 929 | 3.09G | switch (OP(s)) { | 930 | 0 | case OEND: | 931 | 0 | assert(pc == stop-1); | 932 | 0 | break; | 933 | 567M | case OCHAR: | 934 | | /* only characters can match */ | 935 | 567M | assert(!NONCHAR(ch) || ch != (char)OPND(s)); | 936 | 567M | if (ch == (char)OPND(s)) | 937 | 10.0M | FWD(aft, bef, 1); | 938 | 567M | break; | 939 | 138M | case OBOL: | 940 | 138M | if (ch == BOL || ch == BOLEOL) | 941 | 19.4k | FWD(aft, bef, 1); | 942 | 138M | break; | 943 | 89.9M | case OEOL: | 944 | 89.9M | if (ch == EOL || ch == BOLEOL) | 945 | 12.6k | FWD(aft, bef, 1); | 946 | 89.9M | break; | 947 | 0 | case OBOW: | 948 | 0 | if (ch == BOW) | 949 | 0 | FWD(aft, bef, 1); | 950 | 0 | break; | 951 | 0 | case OEOW: | 952 | 0 | if (ch == EOW) | 953 | 0 | FWD(aft, bef, 1); | 954 | 0 | break; | 955 | 199k | case OANY: | 956 | 199k | if (!NONCHAR(ch)) | 957 | 132k | FWD(aft, bef, 1); | 958 | 199k | break; | 959 | 400M | case OANYOF: | 960 | 400M | cs = &g->sets[OPND(s)]; | 961 | 400M | if (!NONCHAR(ch) && CHIN(cs, ch)) | 962 | 92.7M | FWD(aft, bef, 1); | 963 | 400M | break; | 964 | 0 | case OBACK_: /* ignored here */ | 965 | 0 | case O_BACK: | 966 | 0 | FWD(aft, aft, 1); | 967 | 0 | break; | 968 | 168M | case OPLUS_: /* forward, this is just an empty */ | 969 | 168M | FWD(aft, aft, 1); | 970 | 168M | break; | 971 | 168M | case O_PLUS: /* both forward and back */ | 972 | 168M | FWD(aft, aft, 1); | 973 | 168M | i = ISSETBACK(aft, OPND(s)); | 974 | 168M | BACK(aft, aft, OPND(s)); | 975 | 168M | if (!i && ISSETBACK(aft, OPND(s))) { | 976 | | /* oho, must reconsider loop body */ | 977 | 5.07M | pc -= OPND(s) + 1; | 978 | 5.07M | INIT(here, pc); | 979 | 5.07M | } | 980 | 168M | break; | 981 | 40.0M | case OQUEST_: /* two branches, both forward */ | 982 | 40.0M | FWD(aft, aft, 1); | 983 | 40.0M | FWD(aft, aft, OPND(s)); | 984 | 40.0M | break; | 985 | 40.0M | case O_QUEST: /* just an empty */ | 986 | 40.0M | FWD(aft, aft, 1); | 987 | 40.0M | break; | 988 | 139M | case OLPAREN: /* not significant here */ | 989 | 280M | case ORPAREN: | 990 | 280M | FWD(aft, aft, 1); | 991 | 280M | break; | 992 | 283M | case OCH_: /* mark the first two branches */ | 993 | 283M | FWD(aft, aft, 1); | 994 | 283M | assert(OP(g->strip[pc+OPND(s)]) == OOR2); | 995 | 283M | FWD(aft, aft, OPND(s)); | 996 | 283M | break; | 997 | 317M | case OOR1: /* done a branch, find the O_CH */ | 998 | 317M | if (ISSTATEIN(aft, here)) { | 999 | 145k | for (look = 1; | 1000 | 291k | OP(s = g->strip[pc+look]) != O_CH; | 1001 | 145k | look += OPND(s)) | 1002 | 145k | assert(OP(s) == OOR2); | 1003 | 145k | FWD(aft, aft, look); | 1004 | 145k | } | 1005 | 317M | break; | 1006 | 317M | case OOR2: /* propagate OCH_'s marking */ | 1007 | 317M | FWD(aft, aft, 1); | 1008 | 317M | if (OP(g->strip[pc+OPND(s)]) != O_CH) { | 1009 | 33.5M | assert(OP(g->strip[pc+OPND(s)]) == OOR2); | 1010 | 33.5M | FWD(aft, aft, OPND(s)); | 1011 | 33.5M | } | 1012 | 317M | break; | 1013 | 283M | case O_CH: /* just empty */ | 1014 | 283M | FWD(aft, aft, 1); | 1015 | 283M | break; | 1016 | 0 | default: /* ooooops... */ | 1017 | 0 | assert(nope); | 1018 | 0 | break; | 1019 | 3.09G | } | 1020 | 3.09G | } | 1021 | | | 1022 | 140M | return(aft); | 1023 | 140M | } |
Unexecuted instantiation: regexec.c:lstep |
1024 | | |
1025 | | #ifdef REDEBUG |
1026 | | /* |
1027 | | - print - print a set of states |
1028 | | */ |
1029 | | static void |
1030 | | print(struct match *m, const char *caption, states st, int ch, FILE *d) |
1031 | | { |
1032 | | struct re_guts *g = m->g; |
1033 | | int i; |
1034 | | int first = 1; |
1035 | | |
1036 | | if (!(m->eflags®_TRACE)) |
1037 | | return; |
1038 | | |
1039 | | (void)fprintf(d, "%s", caption); |
1040 | | if (ch != '\0') |
1041 | | (void)fprintf(d, " %s", pchar(ch)); |
1042 | | for (i = 0; i < g->nstates; i++) |
1043 | | if (ISSET(st, i)) { |
1044 | | (void)fprintf(d, "%s%d", (first) ? "\t" : ", ", i); |
1045 | | first = 0; |
1046 | | } |
1047 | | (void)fprintf(d, "\n"); |
1048 | | } |
1049 | | |
1050 | | /* |
1051 | | - at - print current situation |
1052 | | */ |
1053 | | static void |
1054 | | at(struct match *m, const char *title, const char *start, const char *stop, |
1055 | | sopno startst, sopno stopst) |
1056 | | { |
1057 | | if (!(m->eflags®_TRACE)) |
1058 | | return; |
1059 | | |
1060 | | (void)printf("%s %s-", title, pchar(*start)); |
1061 | | (void)printf("%s ", pchar(*stop)); |
1062 | | (void)printf("%ld-%ld\n", (long)startst, (long)stopst); |
1063 | | } |
1064 | | |
1065 | | #ifndef PCHARDONE |
1066 | | #define PCHARDONE /* never again */ |
1067 | | /* |
1068 | | - pchar - make a character printable |
1069 | | * |
1070 | | * Is this identical to regchar() over in debug.c? Well, yes. But a |
1071 | | * duplicate here avoids having a debugging-capable regexec.o tied to |
1072 | | * a matching debug.o, and this is convenient. It all disappears in |
1073 | | * the non-debug compilation anyway, so it doesn't matter much. |
1074 | | */ |
1075 | | static char * /* -> representation */ |
1076 | | pchar(int ch) |
1077 | | { |
1078 | | static char pbuf[10]; |
1079 | | |
1080 | | if (isprint(ch) || ch == ' ') |
1081 | | (void)snprintf(pbuf, sizeof pbuf, "%c", ch); |
1082 | | else |
1083 | | (void)snprintf(pbuf, sizeof pbuf, "\\%o", ch); |
1084 | | return(pbuf); |
1085 | | } |
1086 | | #endif |
1087 | | #endif |
1088 | | |
1089 | | #undef matcher |
1090 | | #undef fast |
1091 | | #undef slow |
1092 | | #undef dissect |
1093 | | #undef backref |
1094 | | #undef step |
1095 | | #undef print |
1096 | | #undef at |
1097 | | #undef match |
1098 | | #undef nope |
1099 | | #undef step_back |