Coverage Report

Created: 2025-07-11 06:59

/src/Python-3.8.3/Objects/stringlib/fastsearch.h
Line
Count
Source (jump to first uncovered line)
1
/* stringlib: fastsearch implementation */
2
3
#define STRINGLIB_FASTSEARCH_H
4
5
/* fast search/count implementation, based on a mix between boyer-
6
   moore and horspool, with a few more bells and whistles on the top.
7
   for some more background, see: http://effbot.org/zone/stringlib.htm */
8
9
/* note: fastsearch may access s[n], which isn't a problem when using
10
   Python's ordinary string types, but may cause problems if you're
11
   using this code in other contexts.  also, the count mode returns -1
12
   if there cannot possible be a match in the target string, and 0 if
13
   it has actually checked for matches, but didn't find any.  callers
14
   beware! */
15
16
4.55k
#define FAST_COUNT 0
17
2.27k
#define FAST_SEARCH 1
18
4.44k
#define FAST_RSEARCH 2
19
20
#if LONG_BIT >= 128
21
#define STRINGLIB_BLOOM_WIDTH 128
22
#elif LONG_BIT >= 64
23
273
#define STRINGLIB_BLOOM_WIDTH 64
24
#elif LONG_BIT >= 32
25
#define STRINGLIB_BLOOM_WIDTH 32
26
#else
27
#error "LONG_BIT is smaller than 32"
28
#endif
29
30
#define STRINGLIB_BLOOM_ADD(mask, ch) \
31
182
    ((mask |= (1UL << ((ch) & (STRINGLIB_BLOOM_WIDTH -1)))))
32
#define STRINGLIB_BLOOM(mask, ch)     \
33
91
    ((mask &  (1UL << ((ch) & (STRINGLIB_BLOOM_WIDTH -1)))))
34
35
#if STRINGLIB_SIZEOF_CHAR == 1
36
4.97k
#  define MEMCHR_CUT_OFF 15
37
#else
38
0
#  define MEMCHR_CUT_OFF 40
39
#endif
40
41
Py_LOCAL_INLINE(Py_ssize_t)
42
STRINGLIB(find_char)(const STRINGLIB_CHAR* s, Py_ssize_t n, STRINGLIB_CHAR ch)
43
2.68k
{
44
2.68k
    const STRINGLIB_CHAR *p, *e;
45
46
2.68k
    p = s;
47
2.68k
    e = s + n;
48
2.68k
    if (n > MEMCHR_CUT_OFF) {
49
#if STRINGLIB_SIZEOF_CHAR == 1
50
        p = memchr(s, ch, n);
51
174
        if (p != NULL)
52
112
            return (p - s);
53
62
        return -1;
54
#else
55
        /* use memchr if we can choose a needle without too many likely
56
           false positives */
57
        const STRINGLIB_CHAR *s1, *e1;
58
        unsigned char needle = ch & 0xff;
59
        /* If looking for a multiple of 256, we'd have too
60
           many false positives looking for the '\0' byte in UCS2
61
           and UCS4 representations. */
62
0
        if (needle != 0) {
63
0
            do {
64
0
                void *candidate = memchr(p, needle,
65
0
                                         (e - p) * sizeof(STRINGLIB_CHAR));
66
0
                if (candidate == NULL)
67
0
                    return -1;
68
0
                s1 = p;
69
0
                p = (const STRINGLIB_CHAR *)
70
0
                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
71
0
                if (*p == ch)
72
0
                    return (p - s);
73
                /* False positive */
74
0
                p++;
75
0
                if (p - s1 > MEMCHR_CUT_OFF)
76
0
                    continue;
77
0
                if (e - p <= MEMCHR_CUT_OFF)
78
0
                    break;
79
0
                e1 = p + MEMCHR_CUT_OFF;
80
0
                while (p != e1) {
81
0
                    if (*p == ch)
82
0
                        return (p - s);
83
0
                    p++;
84
0
                }
85
0
            }
86
0
            while (e - p > MEMCHR_CUT_OFF);
87
0
        }
88
#endif
89
174
    }
90
11.2k
    while (p < e) {
91
8.93k
        if (*p == ch)
92
204
            return (p - s);
93
8.73k
        p++;
94
8.73k
    }
95
2.30k
    return -1;
96
2.51k
}
Unexecuted instantiation: bytearrayobject.c:stringlib_find_char
Unexecuted instantiation: bytesobject.c:stringlib_find_char
unicodeobject.c:ucs1lib_find_char
Line
Count
Source
43
2.57k
{
44
2.57k
    const STRINGLIB_CHAR *p, *e;
45
46
2.57k
    p = s;
47
2.57k
    e = s + n;
48
2.57k
    if (n > MEMCHR_CUT_OFF) {
49
73
#if STRINGLIB_SIZEOF_CHAR == 1
50
73
        p = memchr(s, ch, n);
51
73
        if (p != NULL)
52
28
            return (p - s);
53
45
        return -1;
54
#else
55
        /* use memchr if we can choose a needle without too many likely
56
           false positives */
57
        const STRINGLIB_CHAR *s1, *e1;
58
        unsigned char needle = ch & 0xff;
59
        /* If looking for a multiple of 256, we'd have too
60
           many false positives looking for the '\0' byte in UCS2
61
           and UCS4 representations. */
62
        if (needle != 0) {
63
            do {
64
                void *candidate = memchr(p, needle,
65
                                         (e - p) * sizeof(STRINGLIB_CHAR));
66
                if (candidate == NULL)
67
                    return -1;
68
                s1 = p;
69
                p = (const STRINGLIB_CHAR *)
70
                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
71
                if (*p == ch)
72
                    return (p - s);
73
                /* False positive */
74
                p++;
75
                if (p - s1 > MEMCHR_CUT_OFF)
76
                    continue;
77
                if (e - p <= MEMCHR_CUT_OFF)
78
                    break;
79
                e1 = p + MEMCHR_CUT_OFF;
80
                while (p != e1) {
81
                    if (*p == ch)
82
                        return (p - s);
83
                    p++;
84
                }
85
            }
86
            while (e - p > MEMCHR_CUT_OFF);
87
        }
88
#endif
89
73
    }
90
11.2k
    while (p < e) {
91
8.91k
        if (*p == ch)
92
199
            return (p - s);
93
8.71k
        p++;
94
8.71k
    }
95
2.30k
    return -1;
96
2.50k
}
Unexecuted instantiation: unicodeobject.c:ucs2lib_find_char
Unexecuted instantiation: unicodeobject.c:ucs4lib_find_char
unicodeobject.c:asciilib_find_char
Line
Count
Source
43
18
{
44
18
    const STRINGLIB_CHAR *p, *e;
45
46
18
    p = s;
47
18
    e = s + n;
48
18
    if (n > MEMCHR_CUT_OFF) {
49
13
#if STRINGLIB_SIZEOF_CHAR == 1
50
13
        p = memchr(s, ch, n);
51
13
        if (p != NULL)
52
13
            return (p - s);
53
0
        return -1;
54
#else
55
        /* use memchr if we can choose a needle without too many likely
56
           false positives */
57
        const STRINGLIB_CHAR *s1, *e1;
58
        unsigned char needle = ch & 0xff;
59
        /* If looking for a multiple of 256, we'd have too
60
           many false positives looking for the '\0' byte in UCS2
61
           and UCS4 representations. */
62
        if (needle != 0) {
63
            do {
64
                void *candidate = memchr(p, needle,
65
                                         (e - p) * sizeof(STRINGLIB_CHAR));
66
                if (candidate == NULL)
67
                    return -1;
68
                s1 = p;
69
                p = (const STRINGLIB_CHAR *)
70
                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
71
                if (*p == ch)
72
                    return (p - s);
73
                /* False positive */
74
                p++;
75
                if (p - s1 > MEMCHR_CUT_OFF)
76
                    continue;
77
                if (e - p <= MEMCHR_CUT_OFF)
78
                    break;
79
                e1 = p + MEMCHR_CUT_OFF;
80
                while (p != e1) {
81
                    if (*p == ch)
82
                        return (p - s);
83
                    p++;
84
                }
85
            }
86
            while (e - p > MEMCHR_CUT_OFF);
87
        }
88
#endif
89
13
    }
90
24
    while (p < e) {
91
24
        if (*p == ch)
92
5
            return (p - s);
93
19
        p++;
94
19
    }
95
0
    return -1;
96
5
}
Unexecuted instantiation: unicodeobject.c:stringlib_find_char
bytes_methods.c:stringlib_find_char
Line
Count
Source
43
88
{
44
88
    const STRINGLIB_CHAR *p, *e;
45
46
88
    p = s;
47
88
    e = s + n;
48
88
    if (n > MEMCHR_CUT_OFF) {
49
88
#if STRINGLIB_SIZEOF_CHAR == 1
50
88
        p = memchr(s, ch, n);
51
88
        if (p != NULL)
52
71
            return (p - s);
53
17
        return -1;
54
#else
55
        /* use memchr if we can choose a needle without too many likely
56
           false positives */
57
        const STRINGLIB_CHAR *s1, *e1;
58
        unsigned char needle = ch & 0xff;
59
        /* If looking for a multiple of 256, we'd have too
60
           many false positives looking for the '\0' byte in UCS2
61
           and UCS4 representations. */
62
        if (needle != 0) {
63
            do {
64
                void *candidate = memchr(p, needle,
65
                                         (e - p) * sizeof(STRINGLIB_CHAR));
66
                if (candidate == NULL)
67
                    return -1;
68
                s1 = p;
69
                p = (const STRINGLIB_CHAR *)
70
                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
71
                if (*p == ch)
72
                    return (p - s);
73
                /* False positive */
74
                p++;
75
                if (p - s1 > MEMCHR_CUT_OFF)
76
                    continue;
77
                if (e - p <= MEMCHR_CUT_OFF)
78
                    break;
79
                e1 = p + MEMCHR_CUT_OFF;
80
                while (p != e1) {
81
                    if (*p == ch)
82
                        return (p - s);
83
                    p++;
84
                }
85
            }
86
            while (e - p > MEMCHR_CUT_OFF);
87
        }
88
#endif
89
88
    }
90
0
    while (p < e) {
91
0
        if (*p == ch)
92
0
            return (p - s);
93
0
        p++;
94
0
    }
95
0
    return -1;
96
0
}
97
98
Py_LOCAL_INLINE(Py_ssize_t)
99
STRINGLIB(rfind_char)(const STRINGLIB_CHAR* s, Py_ssize_t n, STRINGLIB_CHAR ch)
100
2.29k
{
101
2.29k
    const STRINGLIB_CHAR *p;
102
2.29k
#ifdef HAVE_MEMRCHR
103
    /* memrchr() is a GNU extension, available since glibc 2.1.91.
104
       it doesn't seem as optimized as memchr(), but is still quite
105
       faster than our hand-written loop below */
106
107
2.29k
    if (n > MEMCHR_CUT_OFF) {
108
#if STRINGLIB_SIZEOF_CHAR == 1
109
        p = memrchr(s, ch, n);
110
864
        if (p != NULL)
111
780
            return (p - s);
112
84
        return -1;
113
#else
114
        /* use memrchr if we can choose a needle without too many likely
115
           false positives */
116
        const STRINGLIB_CHAR *s1;
117
        Py_ssize_t n1;
118
        unsigned char needle = ch & 0xff;
119
        /* If looking for a multiple of 256, we'd have too
120
           many false positives looking for the '\0' byte in UCS2
121
           and UCS4 representations. */
122
0
        if (needle != 0) {
123
0
            do {
124
0
                void *candidate = memrchr(s, needle,
125
0
                                          n * sizeof(STRINGLIB_CHAR));
126
0
                if (candidate == NULL)
127
0
                    return -1;
128
0
                n1 = n;
129
0
                p = (const STRINGLIB_CHAR *)
130
0
                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
131
0
                n = p - s;
132
0
                if (*p == ch)
133
0
                    return n;
134
                /* False positive */
135
0
                if (n1 - n > MEMCHR_CUT_OFF)
136
0
                    continue;
137
0
                if (n <= MEMCHR_CUT_OFF)
138
0
                    break;
139
0
                s1 = p - MEMCHR_CUT_OFF;
140
0
                while (p > s1) {
141
0
                    p--;
142
0
                    if (*p == ch)
143
0
                        return (p - s);
144
0
                }
145
0
                n = p - s;
146
0
            }
147
0
            while (n > MEMCHR_CUT_OFF);
148
0
        }
149
#endif
150
864
    }
151
1.42k
#endif  /* HAVE_MEMRCHR */
152
1.42k
    p = s + n;
153
8.61k
    while (p > s) {
154
7.74k
        p--;
155
7.74k
        if (*p == ch)
156
556
            return (p - s);
157
7.74k
    }
158
870
    return -1;
159
1.42k
}
Unexecuted instantiation: bytearrayobject.c:stringlib_rfind_char
Unexecuted instantiation: bytesobject.c:stringlib_rfind_char
unicodeobject.c:ucs1lib_rfind_char
Line
Count
Source
100
84
{
101
84
    const STRINGLIB_CHAR *p;
102
84
#ifdef HAVE_MEMRCHR
103
    /* memrchr() is a GNU extension, available since glibc 2.1.91.
104
       it doesn't seem as optimized as memchr(), but is still quite
105
       faster than our hand-written loop below */
106
107
84
    if (n > MEMCHR_CUT_OFF) {
108
42
#if STRINGLIB_SIZEOF_CHAR == 1
109
42
        p = memrchr(s, ch, n);
110
42
        if (p != NULL)
111
42
            return (p - s);
112
0
        return -1;
113
#else
114
        /* use memrchr if we can choose a needle without too many likely
115
           false positives */
116
        const STRINGLIB_CHAR *s1;
117
        Py_ssize_t n1;
118
        unsigned char needle = ch & 0xff;
119
        /* If looking for a multiple of 256, we'd have too
120
           many false positives looking for the '\0' byte in UCS2
121
           and UCS4 representations. */
122
        if (needle != 0) {
123
            do {
124
                void *candidate = memrchr(s, needle,
125
                                          n * sizeof(STRINGLIB_CHAR));
126
                if (candidate == NULL)
127
                    return -1;
128
                n1 = n;
129
                p = (const STRINGLIB_CHAR *)
130
                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
131
                n = p - s;
132
                if (*p == ch)
133
                    return n;
134
                /* False positive */
135
                if (n1 - n > MEMCHR_CUT_OFF)
136
                    continue;
137
                if (n <= MEMCHR_CUT_OFF)
138
                    break;
139
                s1 = p - MEMCHR_CUT_OFF;
140
                while (p > s1) {
141
                    p--;
142
                    if (*p == ch)
143
                        return (p - s);
144
                }
145
                n = p - s;
146
            }
147
            while (n > MEMCHR_CUT_OFF);
148
        }
149
#endif
150
42
    }
151
42
#endif  /* HAVE_MEMRCHR */
152
42
    p = s + n;
153
252
    while (p > s) {
154
238
        p--;
155
238
        if (*p == ch)
156
28
            return (p - s);
157
238
    }
158
14
    return -1;
159
42
}
Unexecuted instantiation: unicodeobject.c:ucs2lib_rfind_char
Unexecuted instantiation: unicodeobject.c:ucs4lib_rfind_char
unicodeobject.c:asciilib_rfind_char
Line
Count
Source
100
2.20k
{
101
2.20k
    const STRINGLIB_CHAR *p;
102
2.20k
#ifdef HAVE_MEMRCHR
103
    /* memrchr() is a GNU extension, available since glibc 2.1.91.
104
       it doesn't seem as optimized as memchr(), but is still quite
105
       faster than our hand-written loop below */
106
107
2.20k
    if (n > MEMCHR_CUT_OFF) {
108
822
#if STRINGLIB_SIZEOF_CHAR == 1
109
822
        p = memrchr(s, ch, n);
110
822
        if (p != NULL)
111
738
            return (p - s);
112
84
        return -1;
113
#else
114
        /* use memrchr if we can choose a needle without too many likely
115
           false positives */
116
        const STRINGLIB_CHAR *s1;
117
        Py_ssize_t n1;
118
        unsigned char needle = ch & 0xff;
119
        /* If looking for a multiple of 256, we'd have too
120
           many false positives looking for the '\0' byte in UCS2
121
           and UCS4 representations. */
122
        if (needle != 0) {
123
            do {
124
                void *candidate = memrchr(s, needle,
125
                                          n * sizeof(STRINGLIB_CHAR));
126
                if (candidate == NULL)
127
                    return -1;
128
                n1 = n;
129
                p = (const STRINGLIB_CHAR *)
130
                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
131
                n = p - s;
132
                if (*p == ch)
133
                    return n;
134
                /* False positive */
135
                if (n1 - n > MEMCHR_CUT_OFF)
136
                    continue;
137
                if (n <= MEMCHR_CUT_OFF)
138
                    break;
139
                s1 = p - MEMCHR_CUT_OFF;
140
                while (p > s1) {
141
                    p--;
142
                    if (*p == ch)
143
                        return (p - s);
144
                }
145
                n = p - s;
146
            }
147
            while (n > MEMCHR_CUT_OFF);
148
        }
149
#endif
150
822
    }
151
1.38k
#endif  /* HAVE_MEMRCHR */
152
1.38k
    p = s + n;
153
8.36k
    while (p > s) {
154
7.50k
        p--;
155
7.50k
        if (*p == ch)
156
528
            return (p - s);
157
7.50k
    }
158
856
    return -1;
159
1.38k
}
Unexecuted instantiation: unicodeobject.c:stringlib_rfind_char
Unexecuted instantiation: bytes_methods.c:stringlib_rfind_char
160
161
#undef MEMCHR_CUT_OFF
162
163
Py_LOCAL_INLINE(Py_ssize_t)
164
FASTSEARCH(const STRINGLIB_CHAR* s, Py_ssize_t n,
165
           const STRINGLIB_CHAR* p, Py_ssize_t m,
166
           Py_ssize_t maxcount, int mode)
167
2.26k
{
168
2.26k
    unsigned long mask;
169
2.26k
    Py_ssize_t skip, count = 0;
170
2.26k
    Py_ssize_t i, j, mlast, w;
171
172
2.26k
    w = n - m;
173
174
2.26k
    if (w < 0 || (mode == FAST_COUNT && maxcount == 0))
175
0
        return -1;
176
177
    /* look for special cases */
178
2.26k
    if (m <= 1) {
179
2.22k
        if (m <= 0)
180
0
            return -1;
181
        /* use special case for 1-character strings */
182
2.22k
        if (mode == FAST_SEARCH)
183
18
            return STRINGLIB(find_char)(s, n, p[0]);
184
2.20k
        else if (mode == FAST_RSEARCH)
185
2.20k
            return STRINGLIB(rfind_char)(s, n, p[0]);
186
2
        else {  /* FAST_COUNT */
187
85
            for (i = 0; i < n; i++)
188
83
                if (s[i] == p[0]) {
189
18
                    count++;
190
18
                    if (count == maxcount)
191
0
                        return maxcount;
192
18
                }
193
2
            return count;
194
2
        }
195
2.22k
    }
196
197
35
    mlast = m - 1;
198
35
    skip = mlast - 1;
199
35
    mask = 0;
200
201
35
    if (mode != FAST_RSEARCH) {
202
35
        const STRINGLIB_CHAR *ss = s + m - 1;
203
35
        const STRINGLIB_CHAR *pp = p + m - 1;
204
205
        /* create compressed boyer-moore delta 1 table */
206
207
        /* process pattern[:-1] */
208
182
        for (i = 0; i < mlast; i++) {
209
147
            STRINGLIB_BLOOM_ADD(mask, p[i]);
210
147
            if (p[i] == p[mlast])
211
0
                skip = mlast - i - 1;
212
147
        }
213
        /* process pattern[-1] outside the loop */
214
35
        STRINGLIB_BLOOM_ADD(mask, p[mlast]);
215
216
126
        for (i = 0; i <= w; i++) {
217
            /* note: using mlast in the skip path slows things down on x86 */
218
91
            if (ss[i] == pp[0]) {
219
                /* candidate match */
220
29
                for (j = 0; j < mlast; j++)
221
29
                    if (s[i+j] != p[j])
222
29
                        break;
223
29
                if (j == mlast) {
224
                    /* got a match! */
225
0
                    if (mode != FAST_COUNT)
226
0
                        return i;
227
0
                    count++;
228
0
                    if (count == maxcount)
229
0
                        return maxcount;
230
0
                    i = i + mlast;
231
0
                    continue;
232
0
                }
233
                /* miss: check if next character is part of pattern */
234
29
                if (!STRINGLIB_BLOOM(mask, ss[i+1]))
235
1
                    i = i + m;
236
28
                else
237
28
                    i = i + skip;
238
62
            } else {
239
                /* skip: check if next character is part of pattern */
240
62
                if (!STRINGLIB_BLOOM(mask, ss[i+1]))
241
34
                    i = i + m;
242
62
            }
243
91
        }
244
35
    } else {    /* FAST_RSEARCH */
245
246
        /* create compressed boyer-moore delta 1 table */
247
248
        /* process pattern[0] outside the loop */
249
0
        STRINGLIB_BLOOM_ADD(mask, p[0]);
250
        /* process pattern[:0:-1] */
251
0
        for (i = mlast; i > 0; i--) {
252
0
            STRINGLIB_BLOOM_ADD(mask, p[i]);
253
0
            if (p[i] == p[0])
254
0
                skip = i - 1;
255
0
        }
256
257
0
        for (i = w; i >= 0; i--) {
258
0
            if (s[i] == p[0]) {
259
                /* candidate match */
260
0
                for (j = mlast; j > 0; j--)
261
0
                    if (s[i+j] != p[j])
262
0
                        break;
263
0
                if (j == 0)
264
                    /* got a match! */
265
0
                    return i;
266
                /* miss: check if previous character is part of pattern */
267
0
                if (i > 0 && !STRINGLIB_BLOOM(mask, s[i-1]))
268
0
                    i = i - m;
269
0
                else
270
0
                    i = i - skip;
271
0
            } else {
272
                /* skip: check if previous character is part of pattern */
273
0
                if (i > 0 && !STRINGLIB_BLOOM(mask, s[i-1]))
274
0
                    i = i - m;
275
0
            }
276
0
        }
277
0
    }
278
279
35
    if (mode != FAST_COUNT)
280
35
        return -1;
281
0
    return count;
282
35
}
Unexecuted instantiation: bytearrayobject.c:fastsearch
Unexecuted instantiation: bytesobject.c:fastsearch
unicodeobject.c:asciilib_fastsearch
Line
Count
Source
167
2.24k
{
168
2.24k
    unsigned long mask;
169
2.24k
    Py_ssize_t skip, count = 0;
170
2.24k
    Py_ssize_t i, j, mlast, w;
171
172
2.24k
    w = n - m;
173
174
2.24k
    if (w < 0 || (mode == FAST_COUNT && maxcount == 0))
175
0
        return -1;
176
177
    /* look for special cases */
178
2.24k
    if (m <= 1) {
179
2.22k
        if (m <= 0)
180
0
            return -1;
181
        /* use special case for 1-character strings */
182
2.22k
        if (mode == FAST_SEARCH)
183
18
            return STRINGLIB(find_char)(s, n, p[0]);
184
2.20k
        else if (mode == FAST_RSEARCH)
185
2.20k
            return STRINGLIB(rfind_char)(s, n, p[0]);
186
2
        else {  /* FAST_COUNT */
187
85
            for (i = 0; i < n; i++)
188
83
                if (s[i] == p[0]) {
189
18
                    count++;
190
18
                    if (count == maxcount)
191
0
                        return maxcount;
192
18
                }
193
2
            return count;
194
2
        }
195
2.22k
    }
196
197
14
    mlast = m - 1;
198
14
    skip = mlast - 1;
199
14
    mask = 0;
200
201
14
    if (mode != FAST_RSEARCH) {
202
14
        const STRINGLIB_CHAR *ss = s + m - 1;
203
14
        const STRINGLIB_CHAR *pp = p + m - 1;
204
205
        /* create compressed boyer-moore delta 1 table */
206
207
        /* process pattern[:-1] */
208
140
        for (i = 0; i < mlast; i++) {
209
126
            STRINGLIB_BLOOM_ADD(mask, p[i]);
210
126
            if (p[i] == p[mlast])
211
0
                skip = mlast - i - 1;
212
126
        }
213
        /* process pattern[-1] outside the loop */
214
14
        STRINGLIB_BLOOM_ADD(mask, p[mlast]);
215
216
84
        for (i = 0; i <= w; i++) {
217
            /* note: using mlast in the skip path slows things down on x86 */
218
70
            if (ss[i] == pp[0]) {
219
                /* candidate match */
220
28
                for (j = 0; j < mlast; j++)
221
28
                    if (s[i+j] != p[j])
222
28
                        break;
223
28
                if (j == mlast) {
224
                    /* got a match! */
225
0
                    if (mode != FAST_COUNT)
226
0
                        return i;
227
0
                    count++;
228
0
                    if (count == maxcount)
229
0
                        return maxcount;
230
0
                    i = i + mlast;
231
0
                    continue;
232
0
                }
233
                /* miss: check if next character is part of pattern */
234
28
                if (!STRINGLIB_BLOOM(mask, ss[i+1]))
235
0
                    i = i + m;
236
28
                else
237
28
                    i = i + skip;
238
42
            } else {
239
                /* skip: check if next character is part of pattern */
240
42
                if (!STRINGLIB_BLOOM(mask, ss[i+1]))
241
14
                    i = i + m;
242
42
            }
243
70
        }
244
14
    } else {    /* FAST_RSEARCH */
245
246
        /* create compressed boyer-moore delta 1 table */
247
248
        /* process pattern[0] outside the loop */
249
0
        STRINGLIB_BLOOM_ADD(mask, p[0]);
250
        /* process pattern[:0:-1] */
251
0
        for (i = mlast; i > 0; i--) {
252
0
            STRINGLIB_BLOOM_ADD(mask, p[i]);
253
0
            if (p[i] == p[0])
254
0
                skip = i - 1;
255
0
        }
256
257
0
        for (i = w; i >= 0; i--) {
258
0
            if (s[i] == p[0]) {
259
                /* candidate match */
260
0
                for (j = mlast; j > 0; j--)
261
0
                    if (s[i+j] != p[j])
262
0
                        break;
263
0
                if (j == 0)
264
                    /* got a match! */
265
0
                    return i;
266
                /* miss: check if previous character is part of pattern */
267
0
                if (i > 0 && !STRINGLIB_BLOOM(mask, s[i-1]))
268
0
                    i = i - m;
269
0
                else
270
0
                    i = i - skip;
271
0
            } else {
272
                /* skip: check if previous character is part of pattern */
273
0
                if (i > 0 && !STRINGLIB_BLOOM(mask, s[i-1]))
274
0
                    i = i - m;
275
0
            }
276
0
        }
277
0
    }
278
279
14
    if (mode != FAST_COUNT)
280
14
        return -1;
281
0
    return count;
282
14
}
unicodeobject.c:ucs1lib_fastsearch
Line
Count
Source
167
21
{
168
21
    unsigned long mask;
169
21
    Py_ssize_t skip, count = 0;
170
21
    Py_ssize_t i, j, mlast, w;
171
172
21
    w = n - m;
173
174
21
    if (w < 0 || (mode == FAST_COUNT && maxcount == 0))
175
0
        return -1;
176
177
    /* look for special cases */
178
21
    if (m <= 1) {
179
0
        if (m <= 0)
180
0
            return -1;
181
        /* use special case for 1-character strings */
182
0
        if (mode == FAST_SEARCH)
183
0
            return STRINGLIB(find_char)(s, n, p[0]);
184
0
        else if (mode == FAST_RSEARCH)
185
0
            return STRINGLIB(rfind_char)(s, n, p[0]);
186
0
        else {  /* FAST_COUNT */
187
0
            for (i = 0; i < n; i++)
188
0
                if (s[i] == p[0]) {
189
0
                    count++;
190
0
                    if (count == maxcount)
191
0
                        return maxcount;
192
0
                }
193
0
            return count;
194
0
        }
195
0
    }
196
197
21
    mlast = m - 1;
198
21
    skip = mlast - 1;
199
21
    mask = 0;
200
201
21
    if (mode != FAST_RSEARCH) {
202
21
        const STRINGLIB_CHAR *ss = s + m - 1;
203
21
        const STRINGLIB_CHAR *pp = p + m - 1;
204
205
        /* create compressed boyer-moore delta 1 table */
206
207
        /* process pattern[:-1] */
208
42
        for (i = 0; i < mlast; i++) {
209
21
            STRINGLIB_BLOOM_ADD(mask, p[i]);
210
21
            if (p[i] == p[mlast])
211
0
                skip = mlast - i - 1;
212
21
        }
213
        /* process pattern[-1] outside the loop */
214
21
        STRINGLIB_BLOOM_ADD(mask, p[mlast]);
215
216
42
        for (i = 0; i <= w; i++) {
217
            /* note: using mlast in the skip path slows things down on x86 */
218
21
            if (ss[i] == pp[0]) {
219
                /* candidate match */
220
1
                for (j = 0; j < mlast; j++)
221
1
                    if (s[i+j] != p[j])
222
1
                        break;
223
1
                if (j == mlast) {
224
                    /* got a match! */
225
0
                    if (mode != FAST_COUNT)
226
0
                        return i;
227
0
                    count++;
228
0
                    if (count == maxcount)
229
0
                        return maxcount;
230
0
                    i = i + mlast;
231
0
                    continue;
232
0
                }
233
                /* miss: check if next character is part of pattern */
234
1
                if (!STRINGLIB_BLOOM(mask, ss[i+1]))
235
1
                    i = i + m;
236
0
                else
237
0
                    i = i + skip;
238
20
            } else {
239
                /* skip: check if next character is part of pattern */
240
20
                if (!STRINGLIB_BLOOM(mask, ss[i+1]))
241
20
                    i = i + m;
242
20
            }
243
21
        }
244
21
    } else {    /* FAST_RSEARCH */
245
246
        /* create compressed boyer-moore delta 1 table */
247
248
        /* process pattern[0] outside the loop */
249
0
        STRINGLIB_BLOOM_ADD(mask, p[0]);
250
        /* process pattern[:0:-1] */
251
0
        for (i = mlast; i > 0; i--) {
252
0
            STRINGLIB_BLOOM_ADD(mask, p[i]);
253
0
            if (p[i] == p[0])
254
0
                skip = i - 1;
255
0
        }
256
257
0
        for (i = w; i >= 0; i--) {
258
0
            if (s[i] == p[0]) {
259
                /* candidate match */
260
0
                for (j = mlast; j > 0; j--)
261
0
                    if (s[i+j] != p[j])
262
0
                        break;
263
0
                if (j == 0)
264
                    /* got a match! */
265
0
                    return i;
266
                /* miss: check if previous character is part of pattern */
267
0
                if (i > 0 && !STRINGLIB_BLOOM(mask, s[i-1]))
268
0
                    i = i - m;
269
0
                else
270
0
                    i = i - skip;
271
0
            } else {
272
                /* skip: check if previous character is part of pattern */
273
0
                if (i > 0 && !STRINGLIB_BLOOM(mask, s[i-1]))
274
0
                    i = i - m;
275
0
            }
276
0
        }
277
0
    }
278
279
21
    if (mode != FAST_COUNT)
280
21
        return -1;
281
0
    return count;
282
21
}
Unexecuted instantiation: unicodeobject.c:ucs2lib_fastsearch
Unexecuted instantiation: unicodeobject.c:ucs4lib_fastsearch
Unexecuted instantiation: unicodeobject.c:fastsearch
Unexecuted instantiation: bytes_methods.c:fastsearch
283