Coverage Report

Created: 2026-06-21 06:15

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/cpython/Objects/stringlib/fastsearch.h
Line
Count
Source
1
/* stringlib: fastsearch implementation */
2
3
#define STRINGLIB_FASTSEARCH_H
4
5
/* fast search/count implementation, based on a mix between boyer-
6
   moore and horspool, with a few more bells and whistles on the top.
7
   for some more background, see:
8
   https://web.archive.org/web/20201107074620/http://effbot.org/zone/stringlib.htm */
9
10
/* note: fastsearch may access s[n], which isn't a problem when using
11
   Python's ordinary string types, but may cause problems if you're
12
   using this code in other contexts.  also, the count mode returns -1
13
   if there cannot possibly be a match in the target string, and 0 if
14
   it has actually checked for matches, but didn't find any.  callers
15
   beware! */
16
17
/* If the strings are long enough, use Crochemore and Perrin's Two-Way
18
   algorithm, which has worst-case O(n) runtime and best-case O(n/k).
19
   Also compute a table of shifts to achieve O(n/k) in more cases,
20
   and often (data dependent) deduce larger shifts than pure C&P can
21
   deduce. See stringlib_find_two_way_notes.txt in this folder for a
22
   detailed explanation. */
23
24
282M
#define FAST_COUNT 0
25
113M
#define FAST_SEARCH 1
26
75.1M
#define FAST_RSEARCH 2
27
28
#if LONG_BIT >= 128
29
#define STRINGLIB_BLOOM_WIDTH 128
30
#elif LONG_BIT >= 64
31
1.16G
#define STRINGLIB_BLOOM_WIDTH 64
32
#elif LONG_BIT >= 32
33
#define STRINGLIB_BLOOM_WIDTH 32
34
#else
35
#error "LONG_BIT is smaller than 32"
36
#endif
37
38
#define STRINGLIB_BLOOM_ADD(mask, ch) \
39
215M
    ((mask |= (1UL << ((ch) & (STRINGLIB_BLOOM_WIDTH -1)))))
40
#define STRINGLIB_BLOOM(mask, ch)     \
41
950M
    ((mask &  (1UL << ((ch) & (STRINGLIB_BLOOM_WIDTH -1)))))
42
43
#ifdef STRINGLIB_FAST_MEMCHR
44
245M
#  define MEMCHR_CUT_OFF 15
45
#else
46
8.93M
#  define MEMCHR_CUT_OFF 40
47
#endif
48
49
Py_LOCAL_INLINE(Py_ssize_t)
50
STRINGLIB(find_char)(const STRINGLIB_CHAR* s, Py_ssize_t n, STRINGLIB_CHAR ch)
51
254M
{
52
254M
    const STRINGLIB_CHAR *p, *e;
53
54
254M
    p = s;
55
254M
    e = s + n;
56
254M
    if (n > MEMCHR_CUT_OFF) {
57
#ifdef STRINGLIB_FAST_MEMCHR
58
35.7M
        p = STRINGLIB_FAST_MEMCHR(s, ch, n);
59
35.7M
        if (p != NULL)
60
34.3M
            return (p - s);
61
1.41M
        return -1;
62
#else
63
        /* use memchr if we can choose a needle without too many likely
64
           false positives */
65
        const STRINGLIB_CHAR *s1, *e1;
66
        unsigned char needle = ch & 0xff;
67
        /* If looking for a multiple of 256, we'd have too
68
           many false positives looking for the '\0' byte in UCS2
69
           and UCS4 representations. */
70
8.40M
        if (needle != 0) {
71
8.41M
            do {
72
8.41M
                const void *candidate = memchr(p, needle,
73
8.41M
                                               (e - p) * sizeof(STRINGLIB_CHAR));
74
8.41M
                if (candidate == NULL)
75
71.3k
                    return -1;
76
8.34M
                s1 = p;
77
8.34M
                p = (const STRINGLIB_CHAR *)
78
8.34M
                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
79
8.34M
                if (*p == ch)
80
8.30M
                    return (p - s);
81
                /* False positive */
82
36.5k
                p++;
83
36.5k
                if (p - s1 > MEMCHR_CUT_OFF)
84
13.5k
                    continue;
85
23.0k
                if (e - p <= MEMCHR_CUT_OFF)
86
2.91k
                    break;
87
20.1k
                e1 = p + MEMCHR_CUT_OFF;
88
694k
                while (p != e1) {
89
678k
                    if (*p == ch)
90
4.02k
                        return (p - s);
91
674k
                    p++;
92
674k
                }
93
20.1k
            }
94
8.39M
            while (e - p > MEMCHR_CUT_OFF);
95
8.39M
        }
96
#endif
97
44.1M
    }
98
1.08G
    while (p < e) {
99
898M
        if (*p == ch)
100
21.1M
            return (p - s);
101
877M
        p++;
102
877M
    }
103
188M
    return -1;
104
209M
}
bytesobject.c:stringlib_find_char
Line
Count
Source
51
337k
{
52
337k
    const STRINGLIB_CHAR *p, *e;
53
54
337k
    p = s;
55
337k
    e = s + n;
56
337k
    if (n > MEMCHR_CUT_OFF) {
57
337k
#ifdef STRINGLIB_FAST_MEMCHR
58
337k
        p = STRINGLIB_FAST_MEMCHR(s, ch, n);
59
337k
        if (p != NULL)
60
336k
            return (p - s);
61
1.49k
        return -1;
62
#else
63
        /* use memchr if we can choose a needle without too many likely
64
           false positives */
65
        const STRINGLIB_CHAR *s1, *e1;
66
        unsigned char needle = ch & 0xff;
67
        /* If looking for a multiple of 256, we'd have too
68
           many false positives looking for the '\0' byte in UCS2
69
           and UCS4 representations. */
70
        if (needle != 0) {
71
            do {
72
                const void *candidate = memchr(p, needle,
73
                                               (e - p) * sizeof(STRINGLIB_CHAR));
74
                if (candidate == NULL)
75
                    return -1;
76
                s1 = p;
77
                p = (const STRINGLIB_CHAR *)
78
                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
79
                if (*p == ch)
80
                    return (p - s);
81
                /* False positive */
82
                p++;
83
                if (p - s1 > MEMCHR_CUT_OFF)
84
                    continue;
85
                if (e - p <= MEMCHR_CUT_OFF)
86
                    break;
87
                e1 = p + MEMCHR_CUT_OFF;
88
                while (p != e1) {
89
                    if (*p == ch)
90
                        return (p - s);
91
                    p++;
92
                }
93
            }
94
            while (e - p > MEMCHR_CUT_OFF);
95
        }
96
#endif
97
337k
    }
98
84
    while (p < e) {
99
84
        if (*p == ch)
100
14
            return (p - s);
101
70
        p++;
102
70
    }
103
0
    return -1;
104
14
}
unicodeobject.c:ucs1lib_find_char
Line
Count
Source
51
217M
{
52
217M
    const STRINGLIB_CHAR *p, *e;
53
54
217M
    p = s;
55
217M
    e = s + n;
56
217M
    if (n > MEMCHR_CUT_OFF) {
57
20.4M
#ifdef STRINGLIB_FAST_MEMCHR
58
20.4M
        p = STRINGLIB_FAST_MEMCHR(s, ch, n);
59
20.4M
        if (p != NULL)
60
19.5M
            return (p - s);
61
874k
        return -1;
62
#else
63
        /* use memchr if we can choose a needle without too many likely
64
           false positives */
65
        const STRINGLIB_CHAR *s1, *e1;
66
        unsigned char needle = ch & 0xff;
67
        /* If looking for a multiple of 256, we'd have too
68
           many false positives looking for the '\0' byte in UCS2
69
           and UCS4 representations. */
70
        if (needle != 0) {
71
            do {
72
                const void *candidate = memchr(p, needle,
73
                                               (e - p) * sizeof(STRINGLIB_CHAR));
74
                if (candidate == NULL)
75
                    return -1;
76
                s1 = p;
77
                p = (const STRINGLIB_CHAR *)
78
                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
79
                if (*p == ch)
80
                    return (p - s);
81
                /* False positive */
82
                p++;
83
                if (p - s1 > MEMCHR_CUT_OFF)
84
                    continue;
85
                if (e - p <= MEMCHR_CUT_OFF)
86
                    break;
87
                e1 = p + MEMCHR_CUT_OFF;
88
                while (p != e1) {
89
                    if (*p == ch)
90
                        return (p - s);
91
                    p++;
92
                }
93
            }
94
            while (e - p > MEMCHR_CUT_OFF);
95
        }
96
#endif
97
20.4M
    }
98
988M
    while (p < e) {
99
805M
        if (*p == ch)
100
13.7M
            return (p - s);
101
791M
        p++;
102
791M
    }
103
183M
    return -1;
104
197M
}
unicodeobject.c:ucs2lib_find_char
Line
Count
Source
51
8.82M
{
52
8.82M
    const STRINGLIB_CHAR *p, *e;
53
54
8.82M
    p = s;
55
8.82M
    e = s + n;
56
8.82M
    if (n > MEMCHR_CUT_OFF) {
57
#ifdef STRINGLIB_FAST_MEMCHR
58
        p = STRINGLIB_FAST_MEMCHR(s, ch, n);
59
        if (p != NULL)
60
            return (p - s);
61
        return -1;
62
#else
63
        /* use memchr if we can choose a needle without too many likely
64
           false positives */
65
8.40M
        const STRINGLIB_CHAR *s1, *e1;
66
8.40M
        unsigned char needle = ch & 0xff;
67
        /* If looking for a multiple of 256, we'd have too
68
           many false positives looking for the '\0' byte in UCS2
69
           and UCS4 representations. */
70
8.40M
        if (needle != 0) {
71
8.41M
            do {
72
8.41M
                const void *candidate = memchr(p, needle,
73
8.41M
                                               (e - p) * sizeof(STRINGLIB_CHAR));
74
8.41M
                if (candidate == NULL)
75
71.3k
                    return -1;
76
8.34M
                s1 = p;
77
8.34M
                p = (const STRINGLIB_CHAR *)
78
8.34M
                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
79
8.34M
                if (*p == ch)
80
8.30M
                    return (p - s);
81
                /* False positive */
82
36.5k
                p++;
83
36.5k
                if (p - s1 > MEMCHR_CUT_OFF)
84
13.5k
                    continue;
85
23.0k
                if (e - p <= MEMCHR_CUT_OFF)
86
2.91k
                    break;
87
20.1k
                e1 = p + MEMCHR_CUT_OFF;
88
694k
                while (p != e1) {
89
678k
                    if (*p == ch)
90
4.02k
                        return (p - s);
91
674k
                    p++;
92
674k
                }
93
20.1k
            }
94
8.39M
            while (e - p > MEMCHR_CUT_OFF);
95
8.39M
        }
96
8.40M
#endif
97
8.40M
    }
98
6.27M
    while (p < e) {
99
6.02M
        if (*p == ch)
100
183k
            return (p - s);
101
5.83M
        p++;
102
5.83M
    }
103
257k
    return -1;
104
441k
}
unicodeobject.c:ucs4lib_find_char
Line
Count
Source
51
6.24M
{
52
6.24M
    const STRINGLIB_CHAR *p, *e;
53
54
6.24M
    p = s;
55
6.24M
    e = s + n;
56
6.24M
    if (n > MEMCHR_CUT_OFF) {
57
6.19M
#ifdef STRINGLIB_FAST_MEMCHR
58
6.19M
        p = STRINGLIB_FAST_MEMCHR(s, ch, n);
59
6.19M
        if (p != NULL)
60
6.18M
            return (p - s);
61
14.4k
        return -1;
62
#else
63
        /* use memchr if we can choose a needle without too many likely
64
           false positives */
65
        const STRINGLIB_CHAR *s1, *e1;
66
        unsigned char needle = ch & 0xff;
67
        /* If looking for a multiple of 256, we'd have too
68
           many false positives looking for the '\0' byte in UCS2
69
           and UCS4 representations. */
70
        if (needle != 0) {
71
            do {
72
                const void *candidate = memchr(p, needle,
73
                                               (e - p) * sizeof(STRINGLIB_CHAR));
74
                if (candidate == NULL)
75
                    return -1;
76
                s1 = p;
77
                p = (const STRINGLIB_CHAR *)
78
                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
79
                if (*p == ch)
80
                    return (p - s);
81
                /* False positive */
82
                p++;
83
                if (p - s1 > MEMCHR_CUT_OFF)
84
                    continue;
85
                if (e - p <= MEMCHR_CUT_OFF)
86
                    break;
87
                e1 = p + MEMCHR_CUT_OFF;
88
                while (p != e1) {
89
                    if (*p == ch)
90
                        return (p - s);
91
                    p++;
92
                }
93
            }
94
            while (e - p > MEMCHR_CUT_OFF);
95
        }
96
#endif
97
6.19M
    }
98
185k
    while (p < e) {
99
155k
        if (*p == ch)
100
19.1k
            return (p - s);
101
136k
        p++;
102
136k
    }
103
29.7k
    return -1;
104
48.9k
}
unicodeobject.c:asciilib_find_char
Line
Count
Source
51
5.49M
{
52
5.49M
    const STRINGLIB_CHAR *p, *e;
53
54
5.49M
    p = s;
55
5.49M
    e = s + n;
56
5.49M
    if (n > MEMCHR_CUT_OFF) {
57
4.45M
#ifdef STRINGLIB_FAST_MEMCHR
58
4.45M
        p = STRINGLIB_FAST_MEMCHR(s, ch, n);
59
4.45M
        if (p != NULL)
60
4.00M
            return (p - s);
61
444k
        return -1;
62
#else
63
        /* use memchr if we can choose a needle without too many likely
64
           false positives */
65
        const STRINGLIB_CHAR *s1, *e1;
66
        unsigned char needle = ch & 0xff;
67
        /* If looking for a multiple of 256, we'd have too
68
           many false positives looking for the '\0' byte in UCS2
69
           and UCS4 representations. */
70
        if (needle != 0) {
71
            do {
72
                const void *candidate = memchr(p, needle,
73
                                               (e - p) * sizeof(STRINGLIB_CHAR));
74
                if (candidate == NULL)
75
                    return -1;
76
                s1 = p;
77
                p = (const STRINGLIB_CHAR *)
78
                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
79
                if (*p == ch)
80
                    return (p - s);
81
                /* False positive */
82
                p++;
83
                if (p - s1 > MEMCHR_CUT_OFF)
84
                    continue;
85
                if (e - p <= MEMCHR_CUT_OFF)
86
                    break;
87
                e1 = p + MEMCHR_CUT_OFF;
88
                while (p != e1) {
89
                    if (*p == ch)
90
                        return (p - s);
91
                    p++;
92
                }
93
            }
94
            while (e - p > MEMCHR_CUT_OFF);
95
        }
96
#endif
97
4.45M
    }
98
3.34M
    while (p < e) {
99
3.27M
        if (*p == ch)
100
981k
            return (p - s);
101
2.29M
        p++;
102
2.29M
    }
103
64.2k
    return -1;
104
1.04M
}
bytes_methods.c:stringlib_find_char
Line
Count
Source
51
15.3M
{
52
15.3M
    const STRINGLIB_CHAR *p, *e;
53
54
15.3M
    p = s;
55
15.3M
    e = s + n;
56
15.3M
    if (n > MEMCHR_CUT_OFF) {
57
4.34M
#ifdef STRINGLIB_FAST_MEMCHR
58
4.34M
        p = STRINGLIB_FAST_MEMCHR(s, ch, n);
59
4.34M
        if (p != NULL)
60
4.26M
            return (p - s);
61
78.6k
        return -1;
62
#else
63
        /* use memchr if we can choose a needle without too many likely
64
           false positives */
65
        const STRINGLIB_CHAR *s1, *e1;
66
        unsigned char needle = ch & 0xff;
67
        /* If looking for a multiple of 256, we'd have too
68
           many false positives looking for the '\0' byte in UCS2
69
           and UCS4 representations. */
70
        if (needle != 0) {
71
            do {
72
                const void *candidate = memchr(p, needle,
73
                                               (e - p) * sizeof(STRINGLIB_CHAR));
74
                if (candidate == NULL)
75
                    return -1;
76
                s1 = p;
77
                p = (const STRINGLIB_CHAR *)
78
                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
79
                if (*p == ch)
80
                    return (p - s);
81
                /* False positive */
82
                p++;
83
                if (p - s1 > MEMCHR_CUT_OFF)
84
                    continue;
85
                if (e - p <= MEMCHR_CUT_OFF)
86
                    break;
87
                e1 = p + MEMCHR_CUT_OFF;
88
                while (p != e1) {
89
                    if (*p == ch)
90
                        return (p - s);
91
                    p++;
92
                }
93
            }
94
            while (e - p > MEMCHR_CUT_OFF);
95
        }
96
#endif
97
4.34M
    }
98
89.1M
    while (p < e) {
99
84.3M
        if (*p == ch)
100
6.21M
            return (p - s);
101
78.1M
        p++;
102
78.1M
    }
103
4.82M
    return -1;
104
11.0M
}
Unexecuted instantiation: bytearrayobject.c:stringlib_find_char
105
106
#undef MEMCHR_CUT_OFF
107
108
#if STRINGLIB_SIZEOF_CHAR == 1
109
400k
#  define MEMRCHR_CUT_OFF 15
110
#else
111
390k
#  define MEMRCHR_CUT_OFF 40
112
#endif
113
114
115
Py_LOCAL_INLINE(Py_ssize_t)
116
STRINGLIB(rfind_char)(const STRINGLIB_CHAR* s, Py_ssize_t n, STRINGLIB_CHAR ch)
117
744k
{
118
744k
    const STRINGLIB_CHAR *p;
119
744k
#ifdef HAVE_MEMRCHR
120
    /* memrchr() is a GNU extension, available since glibc 2.1.91.  it
121
       doesn't seem as optimized as memchr(), but is still quite
122
       faster than our hand-written loop below. There is no wmemrchr
123
       for 4-byte chars. */
124
125
744k
    if (n > MEMRCHR_CUT_OFF) {
126
#if STRINGLIB_SIZEOF_CHAR == 1
127
        p = memrchr(s, ch, n);
128
106k
        if (p != NULL)
129
100k
            return (p - s);
130
6.09k
        return -1;
131
#else
132
        /* use memrchr if we can choose a needle without too many likely
133
           false positives */
134
        const STRINGLIB_CHAR *s1;
135
        Py_ssize_t n1;
136
        unsigned char needle = ch & 0xff;
137
        /* If looking for a multiple of 256, we'd have too
138
           many false positives looking for the '\0' byte in UCS2
139
           and UCS4 representations. */
140
266k
        if (needle != 0) {
141
279k
            do {
142
279k
                void *candidate = memrchr(s, needle,
143
279k
                                          n * sizeof(STRINGLIB_CHAR));
144
279k
                if (candidate == NULL)
145
1.40k
                    return -1;
146
277k
                n1 = n;
147
277k
                p = (const STRINGLIB_CHAR *)
148
277k
                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
149
277k
                n = p - s;
150
277k
                if (*p == ch)
151
262k
                    return n;
152
                /* False positive */
153
15.4k
                if (n1 - n > MEMRCHR_CUT_OFF)
154
6.62k
                    continue;
155
8.86k
                if (n <= MEMRCHR_CUT_OFF)
156
947
                    break;
157
7.91k
                s1 = p - MEMRCHR_CUT_OFF;
158
308k
                while (p > s1) {
159
301k
                    p--;
160
301k
                    if (*p == ch)
161
633
                        return (p - s);
162
301k
                }
163
7.28k
                n = p - s;
164
7.28k
            }
165
266k
            while (n > MEMRCHR_CUT_OFF);
166
266k
        }
167
#endif
168
372k
    }
169
374k
#endif  /* HAVE_MEMRCHR */
170
374k
    p = s + n;
171
2.43M
    while (p > s) {
172
2.23M
        p--;
173
2.23M
        if (*p == ch)
174
179k
            return (p - s);
175
2.23M
    }
176
194k
    return -1;
177
374k
}
Unexecuted instantiation: bytesobject.c:stringlib_rfind_char
unicodeobject.c:ucs1lib_rfind_char
Line
Count
Source
117
69.2k
{
118
69.2k
    const STRINGLIB_CHAR *p;
119
69.2k
#ifdef HAVE_MEMRCHR
120
    /* memrchr() is a GNU extension, available since glibc 2.1.91.  it
121
       doesn't seem as optimized as memchr(), but is still quite
122
       faster than our hand-written loop below. There is no wmemrchr
123
       for 4-byte chars. */
124
125
69.2k
    if (n > MEMRCHR_CUT_OFF) {
126
64.3k
#if STRINGLIB_SIZEOF_CHAR == 1
127
64.3k
        p = memrchr(s, ch, n);
128
64.3k
        if (p != NULL)
129
62.3k
            return (p - s);
130
1.96k
        return -1;
131
#else
132
        /* use memrchr if we can choose a needle without too many likely
133
           false positives */
134
        const STRINGLIB_CHAR *s1;
135
        Py_ssize_t n1;
136
        unsigned char needle = ch & 0xff;
137
        /* If looking for a multiple of 256, we'd have too
138
           many false positives looking for the '\0' byte in UCS2
139
           and UCS4 representations. */
140
        if (needle != 0) {
141
            do {
142
                void *candidate = memrchr(s, needle,
143
                                          n * sizeof(STRINGLIB_CHAR));
144
                if (candidate == NULL)
145
                    return -1;
146
                n1 = n;
147
                p = (const STRINGLIB_CHAR *)
148
                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
149
                n = p - s;
150
                if (*p == ch)
151
                    return n;
152
                /* False positive */
153
                if (n1 - n > MEMRCHR_CUT_OFF)
154
                    continue;
155
                if (n <= MEMRCHR_CUT_OFF)
156
                    break;
157
                s1 = p - MEMRCHR_CUT_OFF;
158
                while (p > s1) {
159
                    p--;
160
                    if (*p == ch)
161
                        return (p - s);
162
                }
163
                n = p - s;
164
            }
165
            while (n > MEMRCHR_CUT_OFF);
166
        }
167
#endif
168
64.3k
    }
169
4.93k
#endif  /* HAVE_MEMRCHR */
170
4.93k
    p = s + n;
171
20.7k
    while (p > s) {
172
18.8k
        p--;
173
18.8k
        if (*p == ch)
174
3.08k
            return (p - s);
175
18.8k
    }
176
1.85k
    return -1;
177
4.93k
}
unicodeobject.c:ucs2lib_rfind_char
Line
Count
Source
117
214k
{
118
214k
    const STRINGLIB_CHAR *p;
119
214k
#ifdef HAVE_MEMRCHR
120
    /* memrchr() is a GNU extension, available since glibc 2.1.91.  it
121
       doesn't seem as optimized as memchr(), but is still quite
122
       faster than our hand-written loop below. There is no wmemrchr
123
       for 4-byte chars. */
124
125
214k
    if (n > MEMRCHR_CUT_OFF) {
126
#if STRINGLIB_SIZEOF_CHAR == 1
127
        p = memrchr(s, ch, n);
128
        if (p != NULL)
129
            return (p - s);
130
        return -1;
131
#else
132
        /* use memrchr if we can choose a needle without too many likely
133
           false positives */
134
181k
        const STRINGLIB_CHAR *s1;
135
181k
        Py_ssize_t n1;
136
181k
        unsigned char needle = ch & 0xff;
137
        /* If looking for a multiple of 256, we'd have too
138
           many false positives looking for the '\0' byte in UCS2
139
           and UCS4 representations. */
140
181k
        if (needle != 0) {
141
184k
            do {
142
184k
                void *candidate = memrchr(s, needle,
143
184k
                                          n * sizeof(STRINGLIB_CHAR));
144
184k
                if (candidate == NULL)
145
824
                    return -1;
146
183k
                n1 = n;
147
183k
                p = (const STRINGLIB_CHAR *)
148
183k
                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
149
183k
                n = p - s;
150
183k
                if (*p == ch)
151
179k
                    return n;
152
                /* False positive */
153
4.31k
                if (n1 - n > MEMRCHR_CUT_OFF)
154
2.04k
                    continue;
155
2.26k
                if (n <= MEMRCHR_CUT_OFF)
156
572
                    break;
157
1.69k
                s1 = p - MEMRCHR_CUT_OFF;
158
63.1k
                while (p > s1) {
159
61.6k
                    p--;
160
61.6k
                    if (*p == ch)
161
254
                        return (p - s);
162
61.6k
                }
163
1.44k
                n = p - s;
164
1.44k
            }
165
181k
            while (n > MEMRCHR_CUT_OFF);
166
181k
        }
167
181k
#endif
168
181k
    }
169
34.3k
#endif  /* HAVE_MEMRCHR */
170
34.3k
    p = s + n;
171
127k
    while (p > s) {
172
125k
        p--;
173
125k
        if (*p == ch)
174
32.0k
            return (p - s);
175
125k
    }
176
2.39k
    return -1;
177
34.3k
}
unicodeobject.c:ucs4lib_rfind_char
Line
Count
Source
117
129k
{
118
129k
    const STRINGLIB_CHAR *p;
119
129k
#ifdef HAVE_MEMRCHR
120
    /* memrchr() is a GNU extension, available since glibc 2.1.91.  it
121
       doesn't seem as optimized as memchr(), but is still quite
122
       faster than our hand-written loop below. There is no wmemrchr
123
       for 4-byte chars. */
124
125
129k
    if (n > MEMRCHR_CUT_OFF) {
126
#if STRINGLIB_SIZEOF_CHAR == 1
127
        p = memrchr(s, ch, n);
128
        if (p != NULL)
129
            return (p - s);
130
        return -1;
131
#else
132
        /* use memrchr if we can choose a needle without too many likely
133
           false positives */
134
84.7k
        const STRINGLIB_CHAR *s1;
135
84.7k
        Py_ssize_t n1;
136
84.7k
        unsigned char needle = ch & 0xff;
137
        /* If looking for a multiple of 256, we'd have too
138
           many false positives looking for the '\0' byte in UCS2
139
           and UCS4 representations. */
140
84.7k
        if (needle != 0) {
141
94.8k
            do {
142
94.8k
                void *candidate = memrchr(s, needle,
143
94.8k
                                          n * sizeof(STRINGLIB_CHAR));
144
94.8k
                if (candidate == NULL)
145
583
                    return -1;
146
94.2k
                n1 = n;
147
94.2k
                p = (const STRINGLIB_CHAR *)
148
94.2k
                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
149
94.2k
                n = p - s;
150
94.2k
                if (*p == ch)
151
83.1k
                    return n;
152
                /* False positive */
153
11.1k
                if (n1 - n > MEMRCHR_CUT_OFF)
154
4.57k
                    continue;
155
6.59k
                if (n <= MEMRCHR_CUT_OFF)
156
375
                    break;
157
6.21k
                s1 = p - MEMRCHR_CUT_OFF;
158
245k
                while (p > s1) {
159
239k
                    p--;
160
239k
                    if (*p == ch)
161
379
                        return (p - s);
162
239k
                }
163
5.84k
                n = p - s;
164
5.84k
            }
165
84.7k
            while (n > MEMRCHR_CUT_OFF);
166
84.7k
        }
167
84.7k
#endif
168
84.7k
    }
169
45.1k
#endif  /* HAVE_MEMRCHR */
170
45.1k
    p = s + n;
171
286k
    while (p > s) {
172
284k
        p--;
173
284k
        if (*p == ch)
174
43.3k
            return (p - s);
175
284k
    }
176
1.86k
    return -1;
177
45.1k
}
unicodeobject.c:asciilib_rfind_char
Line
Count
Source
117
60.4k
{
118
60.4k
    const STRINGLIB_CHAR *p;
119
60.4k
#ifdef HAVE_MEMRCHR
120
    /* memrchr() is a GNU extension, available since glibc 2.1.91.  it
121
       doesn't seem as optimized as memchr(), but is still quite
122
       faster than our hand-written loop below. There is no wmemrchr
123
       for 4-byte chars. */
124
125
60.4k
    if (n > MEMRCHR_CUT_OFF) {
126
19.6k
#if STRINGLIB_SIZEOF_CHAR == 1
127
19.6k
        p = memrchr(s, ch, n);
128
19.6k
        if (p != NULL)
129
19.4k
            return (p - s);
130
286
        return -1;
131
#else
132
        /* use memrchr if we can choose a needle without too many likely
133
           false positives */
134
        const STRINGLIB_CHAR *s1;
135
        Py_ssize_t n1;
136
        unsigned char needle = ch & 0xff;
137
        /* If looking for a multiple of 256, we'd have too
138
           many false positives looking for the '\0' byte in UCS2
139
           and UCS4 representations. */
140
        if (needle != 0) {
141
            do {
142
                void *candidate = memrchr(s, needle,
143
                                          n * sizeof(STRINGLIB_CHAR));
144
                if (candidate == NULL)
145
                    return -1;
146
                n1 = n;
147
                p = (const STRINGLIB_CHAR *)
148
                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
149
                n = p - s;
150
                if (*p == ch)
151
                    return n;
152
                /* False positive */
153
                if (n1 - n > MEMRCHR_CUT_OFF)
154
                    continue;
155
                if (n <= MEMRCHR_CUT_OFF)
156
                    break;
157
                s1 = p - MEMRCHR_CUT_OFF;
158
                while (p > s1) {
159
                    p--;
160
                    if (*p == ch)
161
                        return (p - s);
162
                }
163
                n = p - s;
164
            }
165
            while (n > MEMRCHR_CUT_OFF);
166
        }
167
#endif
168
19.6k
    }
169
40.8k
#endif  /* HAVE_MEMRCHR */
170
40.8k
    p = s + n;
171
233k
    while (p > s) {
172
203k
        p--;
173
203k
        if (*p == ch)
174
10.6k
            return (p - s);
175
203k
    }
176
30.1k
    return -1;
177
40.8k
}
bytes_methods.c:stringlib_rfind_char
Line
Count
Source
117
270k
{
118
270k
    const STRINGLIB_CHAR *p;
119
270k
#ifdef HAVE_MEMRCHR
120
    /* memrchr() is a GNU extension, available since glibc 2.1.91.  it
121
       doesn't seem as optimized as memchr(), but is still quite
122
       faster than our hand-written loop below. There is no wmemrchr
123
       for 4-byte chars. */
124
125
270k
    if (n > MEMRCHR_CUT_OFF) {
126
22.0k
#if STRINGLIB_SIZEOF_CHAR == 1
127
22.0k
        p = memrchr(s, ch, n);
128
22.0k
        if (p != NULL)
129
18.2k
            return (p - s);
130
3.84k
        return -1;
131
#else
132
        /* use memrchr if we can choose a needle without too many likely
133
           false positives */
134
        const STRINGLIB_CHAR *s1;
135
        Py_ssize_t n1;
136
        unsigned char needle = ch & 0xff;
137
        /* If looking for a multiple of 256, we'd have too
138
           many false positives looking for the '\0' byte in UCS2
139
           and UCS4 representations. */
140
        if (needle != 0) {
141
            do {
142
                void *candidate = memrchr(s, needle,
143
                                          n * sizeof(STRINGLIB_CHAR));
144
                if (candidate == NULL)
145
                    return -1;
146
                n1 = n;
147
                p = (const STRINGLIB_CHAR *)
148
                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
149
                n = p - s;
150
                if (*p == ch)
151
                    return n;
152
                /* False positive */
153
                if (n1 - n > MEMRCHR_CUT_OFF)
154
                    continue;
155
                if (n <= MEMRCHR_CUT_OFF)
156
                    break;
157
                s1 = p - MEMRCHR_CUT_OFF;
158
                while (p > s1) {
159
                    p--;
160
                    if (*p == ch)
161
                        return (p - s);
162
                }
163
                n = p - s;
164
            }
165
            while (n > MEMRCHR_CUT_OFF);
166
        }
167
#endif
168
22.0k
    }
169
248k
#endif  /* HAVE_MEMRCHR */
170
248k
    p = s + n;
171
1.76M
    while (p > s) {
172
1.60M
        p--;
173
1.60M
        if (*p == ch)
174
90.4k
            return (p - s);
175
1.60M
    }
176
158k
    return -1;
177
248k
}
Unexecuted instantiation: bytearrayobject.c:stringlib_rfind_char
178
179
#undef MEMRCHR_CUT_OFF
180
181
/* Change to a 1 to see logging comments walk through the algorithm. */
182
#if 0 && STRINGLIB_SIZEOF_CHAR == 1
183
# define LOG(...) printf(__VA_ARGS__)
184
# define LOG_STRING(s, n) printf("\"%.*s\"", (int)(n), s)
185
# define LOG_LINEUP() do {                                         \
186
    LOG("> "); LOG_STRING(haystack, len_haystack); LOG("\n> ");    \
187
    LOG("%*s",(int)(window_last - haystack + 1 - len_needle), ""); \
188
    LOG_STRING(needle, len_needle); LOG("\n");                     \
189
} while(0)
190
#else
191
# define LOG(...)
192
# define LOG_STRING(s, n)
193
# define LOG_LINEUP()
194
#endif
195
196
Py_LOCAL_INLINE(Py_ssize_t)
197
STRINGLIB(_lex_search)(const STRINGLIB_CHAR *needle, Py_ssize_t len_needle,
198
                       Py_ssize_t *return_period, int invert_alphabet)
199
44
{
200
    /* Do a lexicographic search. Essentially this:
201
           >>> max(needle[i:] for i in range(len(needle)+1))
202
       Also find the period of the right half.   */
203
44
    Py_ssize_t max_suffix = 0;
204
44
    Py_ssize_t candidate = 1;
205
44
    Py_ssize_t k = 0;
206
    // The period of the right half.
207
44
    Py_ssize_t period = 1;
208
209
440
    while (candidate + k < len_needle) {
210
        // each loop increases candidate + k + max_suffix
211
396
        STRINGLIB_CHAR a = needle[candidate + k];
212
396
        STRINGLIB_CHAR b = needle[max_suffix + k];
213
        // check if the suffix at candidate is better than max_suffix
214
396
        if (invert_alphabet ? (b < a) : (a < b)) {
215
            // Fell short of max_suffix.
216
            // The next k + 1 characters are non-increasing
217
            // from candidate, so they won't start a maximal suffix.
218
286
            candidate += k + 1;
219
286
            k = 0;
220
            // We've ruled out any period smaller than what's
221
            // been scanned since max_suffix.
222
286
            period = candidate - max_suffix;
223
286
        }
224
110
        else if (a == b) {
225
22
            if (k + 1 != period) {
226
                // Keep scanning the equal strings
227
0
                k++;
228
0
            }
229
22
            else {
230
                // Matched a whole period.
231
                // Start matching the next period.
232
22
                candidate += period;
233
22
                k = 0;
234
22
            }
235
22
        }
236
88
        else {
237
            // Did better than max_suffix, so replace it.
238
88
            max_suffix = candidate;
239
88
            candidate++;
240
88
            k = 0;
241
88
            period = 1;
242
88
        }
243
396
    }
244
44
    *return_period = period;
245
44
    return max_suffix;
246
44
}
Unexecuted instantiation: bytesobject.c:stringlib__lex_search
Unexecuted instantiation: unicodeobject.c:asciilib__lex_search
unicodeobject.c:ucs1lib__lex_search
Line
Count
Source
199
44
{
200
    /* Do a lexicographic search. Essentially this:
201
           >>> max(needle[i:] for i in range(len(needle)+1))
202
       Also find the period of the right half.   */
203
44
    Py_ssize_t max_suffix = 0;
204
44
    Py_ssize_t candidate = 1;
205
44
    Py_ssize_t k = 0;
206
    // The period of the right half.
207
44
    Py_ssize_t period = 1;
208
209
440
    while (candidate + k < len_needle) {
210
        // each loop increases candidate + k + max_suffix
211
396
        STRINGLIB_CHAR a = needle[candidate + k];
212
396
        STRINGLIB_CHAR b = needle[max_suffix + k];
213
        // check if the suffix at candidate is better than max_suffix
214
396
        if (invert_alphabet ? (b < a) : (a < b)) {
215
            // Fell short of max_suffix.
216
            // The next k + 1 characters are non-increasing
217
            // from candidate, so they won't start a maximal suffix.
218
286
            candidate += k + 1;
219
286
            k = 0;
220
            // We've ruled out any period smaller than what's
221
            // been scanned since max_suffix.
222
286
            period = candidate - max_suffix;
223
286
        }
224
110
        else if (a == b) {
225
22
            if (k + 1 != period) {
226
                // Keep scanning the equal strings
227
0
                k++;
228
0
            }
229
22
            else {
230
                // Matched a whole period.
231
                // Start matching the next period.
232
22
                candidate += period;
233
22
                k = 0;
234
22
            }
235
22
        }
236
88
        else {
237
            // Did better than max_suffix, so replace it.
238
88
            max_suffix = candidate;
239
88
            candidate++;
240
88
            k = 0;
241
88
            period = 1;
242
88
        }
243
396
    }
244
44
    *return_period = period;
245
44
    return max_suffix;
246
44
}
Unexecuted instantiation: unicodeobject.c:ucs2lib__lex_search
Unexecuted instantiation: unicodeobject.c:ucs4lib__lex_search
Unexecuted instantiation: bytes_methods.c:stringlib__lex_search
Unexecuted instantiation: bytearrayobject.c:stringlib__lex_search
247
248
Py_LOCAL_INLINE(Py_ssize_t)
249
STRINGLIB(_factorize)(const STRINGLIB_CHAR *needle,
250
                      Py_ssize_t len_needle,
251
                      Py_ssize_t *return_period)
252
22
{
253
    /* Do a "critical factorization", making it so that:
254
       >>> needle = (left := needle[:cut]) + (right := needle[cut:])
255
       where the "local period" of the cut is maximal.
256
257
       The local period of the cut is the minimal length of a string w
258
       such that (left endswith w or w endswith left)
259
       and (right startswith w or w startswith right).
260
261
       The Critical Factorization Theorem says that this maximal local
262
       period is the global period of the string.
263
264
       Crochemore and Perrin (1991) show that this cut can be computed
265
       as the later of two cuts: one that gives a lexicographically
266
       maximal right half, and one that gives the same with the
267
       with respect to a reversed alphabet-ordering.
268
269
       This is what we want to happen:
270
           >>> x = "GCAGAGAG"
271
           >>> cut, period = factorize(x)
272
           >>> x[:cut], (right := x[cut:])
273
           ('GC', 'AGAGAG')
274
           >>> period  # right half period
275
           2
276
           >>> right[period:] == right[:-period]
277
           True
278
279
       This is how the local period lines up in the above example:
280
                GC | AGAGAG
281
           AGAGAGC = AGAGAGC
282
       The length of this minimal repetition is 7, which is indeed the
283
       period of the original string. */
284
285
22
    Py_ssize_t cut1, period1, cut2, period2, cut, period;
286
22
    cut1 = STRINGLIB(_lex_search)(needle, len_needle, &period1, 0);
287
22
    cut2 = STRINGLIB(_lex_search)(needle, len_needle, &period2, 1);
288
289
    // Take the later cut.
290
22
    if (cut1 > cut2) {
291
22
        period = period1;
292
22
        cut = cut1;
293
22
    }
294
0
    else {
295
0
        period = period2;
296
0
        cut = cut2;
297
0
    }
298
299
22
    LOG("split: "); LOG_STRING(needle, cut);
300
22
    LOG(" + "); LOG_STRING(needle + cut, len_needle - cut);
301
22
    LOG("\n");
302
303
22
    *return_period = period;
304
22
    return cut;
305
22
}
Unexecuted instantiation: bytesobject.c:stringlib__factorize
Unexecuted instantiation: unicodeobject.c:asciilib__factorize
unicodeobject.c:ucs1lib__factorize
Line
Count
Source
252
22
{
253
    /* Do a "critical factorization", making it so that:
254
       >>> needle = (left := needle[:cut]) + (right := needle[cut:])
255
       where the "local period" of the cut is maximal.
256
257
       The local period of the cut is the minimal length of a string w
258
       such that (left endswith w or w endswith left)
259
       and (right startswith w or w startswith right).
260
261
       The Critical Factorization Theorem says that this maximal local
262
       period is the global period of the string.
263
264
       Crochemore and Perrin (1991) show that this cut can be computed
265
       as the later of two cuts: one that gives a lexicographically
266
       maximal right half, and one that gives the same with the
267
       with respect to a reversed alphabet-ordering.
268
269
       This is what we want to happen:
270
           >>> x = "GCAGAGAG"
271
           >>> cut, period = factorize(x)
272
           >>> x[:cut], (right := x[cut:])
273
           ('GC', 'AGAGAG')
274
           >>> period  # right half period
275
           2
276
           >>> right[period:] == right[:-period]
277
           True
278
279
       This is how the local period lines up in the above example:
280
                GC | AGAGAG
281
           AGAGAGC = AGAGAGC
282
       The length of this minimal repetition is 7, which is indeed the
283
       period of the original string. */
284
285
22
    Py_ssize_t cut1, period1, cut2, period2, cut, period;
286
22
    cut1 = STRINGLIB(_lex_search)(needle, len_needle, &period1, 0);
287
22
    cut2 = STRINGLIB(_lex_search)(needle, len_needle, &period2, 1);
288
289
    // Take the later cut.
290
22
    if (cut1 > cut2) {
291
22
        period = period1;
292
22
        cut = cut1;
293
22
    }
294
0
    else {
295
0
        period = period2;
296
0
        cut = cut2;
297
0
    }
298
299
22
    LOG("split: "); LOG_STRING(needle, cut);
300
22
    LOG(" + "); LOG_STRING(needle + cut, len_needle - cut);
301
22
    LOG("\n");
302
303
22
    *return_period = period;
304
22
    return cut;
305
22
}
Unexecuted instantiation: unicodeobject.c:ucs2lib__factorize
Unexecuted instantiation: unicodeobject.c:ucs4lib__factorize
Unexecuted instantiation: bytes_methods.c:stringlib__factorize
Unexecuted instantiation: bytearrayobject.c:stringlib__factorize
306
307
308
242
#define SHIFT_TYPE uint8_t
309
#define MAX_SHIFT UINT8_MAX
310
311
81.4k
#define TABLE_SIZE_BITS 6u
312
81.4k
#define TABLE_SIZE (1U << TABLE_SIZE_BITS)
313
79.9k
#define TABLE_MASK (TABLE_SIZE - 1U)
314
315
typedef struct STRINGLIB(_pre) {
316
    const STRINGLIB_CHAR *needle;
317
    Py_ssize_t len_needle;
318
    Py_ssize_t cut;
319
    Py_ssize_t period;
320
    Py_ssize_t gap;
321
    int is_periodic;
322
    SHIFT_TYPE table[TABLE_SIZE];
323
} STRINGLIB(prework);
324
325
326
static void
327
STRINGLIB(_preprocess)(const STRINGLIB_CHAR *needle, Py_ssize_t len_needle,
328
                       STRINGLIB(prework) *p)
329
22
{
330
22
    p->needle = needle;
331
22
    p->len_needle = len_needle;
332
22
    p->cut = STRINGLIB(_factorize)(needle, len_needle, &(p->period));
333
22
    assert(p->period + p->cut <= len_needle);
334
22
    p->is_periodic = (0 == memcmp(needle,
335
22
                                  needle + p->period,
336
22
                                  p->cut * STRINGLIB_SIZEOF_CHAR));
337
22
    if (p->is_periodic) {
338
0
        assert(p->cut <= len_needle/2);
339
0
        assert(p->cut < p->period);
340
0
    }
341
22
    else {
342
        // A lower bound on the period
343
22
        p->period = Py_MAX(p->cut, len_needle - p->cut) + 1;
344
22
    }
345
    // The gap between the last character and the previous
346
    // occurrence of an equivalent character (modulo TABLE_SIZE)
347
22
    p->gap = len_needle;
348
22
    STRINGLIB_CHAR last = needle[len_needle - 1] & TABLE_MASK;
349
154
    for (Py_ssize_t i = len_needle - 2; i >= 0; i--) {
350
154
        STRINGLIB_CHAR x = needle[i] & TABLE_MASK;
351
154
        if (x == last) {
352
22
            p->gap = len_needle - 1 - i;
353
22
            break;
354
22
        }
355
154
    }
356
    // Fill up a compressed Boyer-Moore "Bad Character" table
357
22
    Py_ssize_t not_found_shift = Py_MIN(len_needle, MAX_SHIFT);
358
1.43k
    for (Py_ssize_t i = 0; i < (Py_ssize_t)TABLE_SIZE; i++) {
359
1.40k
        p->table[i] = Py_SAFE_DOWNCAST(not_found_shift,
360
1.40k
                                       Py_ssize_t, SHIFT_TYPE);
361
1.40k
    }
362
242
    for (Py_ssize_t i = len_needle - not_found_shift; i < len_needle; i++) {
363
220
        SHIFT_TYPE shift = Py_SAFE_DOWNCAST(len_needle - 1 - i,
364
220
                                            Py_ssize_t, SHIFT_TYPE);
365
220
        p->table[needle[i] & TABLE_MASK] = shift;
366
220
    }
367
22
}
Unexecuted instantiation: bytesobject.c:stringlib__preprocess
Unexecuted instantiation: unicodeobject.c:asciilib__preprocess
unicodeobject.c:ucs1lib__preprocess
Line
Count
Source
329
22
{
330
22
    p->needle = needle;
331
22
    p->len_needle = len_needle;
332
22
    p->cut = STRINGLIB(_factorize)(needle, len_needle, &(p->period));
333
22
    assert(p->period + p->cut <= len_needle);
334
22
    p->is_periodic = (0 == memcmp(needle,
335
22
                                  needle + p->period,
336
22
                                  p->cut * STRINGLIB_SIZEOF_CHAR));
337
22
    if (p->is_periodic) {
338
0
        assert(p->cut <= len_needle/2);
339
0
        assert(p->cut < p->period);
340
0
    }
341
22
    else {
342
        // A lower bound on the period
343
22
        p->period = Py_MAX(p->cut, len_needle - p->cut) + 1;
344
22
    }
345
    // The gap between the last character and the previous
346
    // occurrence of an equivalent character (modulo TABLE_SIZE)
347
22
    p->gap = len_needle;
348
22
    STRINGLIB_CHAR last = needle[len_needle - 1] & TABLE_MASK;
349
154
    for (Py_ssize_t i = len_needle - 2; i >= 0; i--) {
350
154
        STRINGLIB_CHAR x = needle[i] & TABLE_MASK;
351
154
        if (x == last) {
352
22
            p->gap = len_needle - 1 - i;
353
22
            break;
354
22
        }
355
154
    }
356
    // Fill up a compressed Boyer-Moore "Bad Character" table
357
22
    Py_ssize_t not_found_shift = Py_MIN(len_needle, MAX_SHIFT);
358
1.43k
    for (Py_ssize_t i = 0; i < (Py_ssize_t)TABLE_SIZE; i++) {
359
1.40k
        p->table[i] = Py_SAFE_DOWNCAST(not_found_shift,
360
1.40k
                                       Py_ssize_t, SHIFT_TYPE);
361
1.40k
    }
362
242
    for (Py_ssize_t i = len_needle - not_found_shift; i < len_needle; i++) {
363
220
        SHIFT_TYPE shift = Py_SAFE_DOWNCAST(len_needle - 1 - i,
364
220
                                            Py_ssize_t, SHIFT_TYPE);
365
220
        p->table[needle[i] & TABLE_MASK] = shift;
366
220
    }
367
22
}
Unexecuted instantiation: unicodeobject.c:ucs2lib__preprocess
Unexecuted instantiation: unicodeobject.c:ucs4lib__preprocess
Unexecuted instantiation: bytes_methods.c:stringlib__preprocess
Unexecuted instantiation: bytearrayobject.c:stringlib__preprocess
368
369
static Py_ssize_t
370
STRINGLIB(_two_way)(const STRINGLIB_CHAR *haystack, Py_ssize_t len_haystack,
371
                    STRINGLIB(prework) *p)
372
22
{
373
    // Crochemore and Perrin's (1991) Two-Way algorithm.
374
    // See http://www-igm.univ-mlv.fr/~lecroq/string/node26.html#SECTION00260
375
22
    const Py_ssize_t len_needle = p->len_needle;
376
22
    const Py_ssize_t cut = p->cut;
377
22
    Py_ssize_t period = p->period;
378
22
    const STRINGLIB_CHAR *const needle = p->needle;
379
22
    const STRINGLIB_CHAR *window_last = haystack + len_needle - 1;
380
22
    const STRINGLIB_CHAR *const haystack_end = haystack + len_haystack;
381
22
    SHIFT_TYPE *table = p->table;
382
22
    const STRINGLIB_CHAR *window;
383
22
    LOG("===== Two-way: \"%s\" in \"%s\". =====\n", needle, haystack);
384
385
22
    Py_ssize_t gap = p->gap;
386
22
    Py_ssize_t gap_jump_end = Py_MIN(len_needle, cut + gap);
387
22
    if (p->is_periodic) {
388
0
        LOG("Needle is periodic.\n");
389
0
        Py_ssize_t memory = 0;
390
0
      periodicwindowloop:
391
0
        while (window_last < haystack_end) {
392
0
            assert(memory == 0);
393
0
            for (;;) {
394
0
                LOG_LINEUP();
395
0
                Py_ssize_t shift = table[(*window_last) & TABLE_MASK];
396
0
                window_last += shift;
397
0
                if (shift == 0) {
398
0
                    break;
399
0
                }
400
0
                if (window_last >= haystack_end) {
401
0
                    return -1;
402
0
                }
403
0
                LOG("Horspool skip\n");
404
0
            }
405
0
          no_shift:
406
0
            window = window_last - len_needle + 1;
407
0
            assert((window[len_needle - 1] & TABLE_MASK) ==
408
0
                   (needle[len_needle - 1] & TABLE_MASK));
409
0
            Py_ssize_t i = Py_MAX(cut, memory);
410
0
            for (; i < len_needle; i++) {
411
0
                if (needle[i] != window[i]) {
412
0
                    if (i < gap_jump_end) {
413
0
                        LOG("Early right half mismatch: jump by gap.\n");
414
0
                        assert(gap >= i - cut + 1);
415
0
                        window_last += gap;
416
0
                    }
417
0
                    else {
418
0
                        LOG("Late right half mismatch: jump by n (>gap)\n");
419
0
                        assert(i - cut + 1 > gap);
420
0
                        window_last += i - cut + 1;
421
0
                    }
422
0
                    memory = 0;
423
0
                    goto periodicwindowloop;
424
0
                }
425
0
            }
426
0
            for (i = memory; i < cut; i++) {
427
0
                if (needle[i] != window[i]) {
428
0
                    LOG("Left half does not match.\n");
429
0
                    window_last += period;
430
0
                    memory = len_needle - period;
431
0
                    if (window_last >= haystack_end) {
432
0
                        return -1;
433
0
                    }
434
0
                    Py_ssize_t shift = table[(*window_last) & TABLE_MASK];
435
0
                    if (shift) {
436
                        // A mismatch has been identified to the right
437
                        // of where i will next start, so we can jump
438
                        // at least as far as if the mismatch occurred
439
                        // on the first comparison.
440
0
                        Py_ssize_t mem_jump = Py_MAX(cut, memory) - cut + 1;
441
0
                        LOG("Skip with Memory.\n");
442
0
                        memory = 0;
443
0
                        window_last += Py_MAX(shift, mem_jump);
444
0
                        goto periodicwindowloop;
445
0
                    }
446
0
                    goto no_shift;
447
0
                }
448
0
            }
449
0
            LOG("Found a match!\n");
450
0
            return window - haystack;
451
0
        }
452
0
    }
453
22
    else {
454
22
        period = Py_MAX(gap, period);
455
22
        LOG("Needle is not periodic.\n");
456
15.1k
      windowloop:
457
15.1k
        while (window_last < haystack_end) {
458
79.5k
            for (;;) {
459
79.5k
                LOG_LINEUP();
460
79.5k
                Py_ssize_t shift = table[(*window_last) & TABLE_MASK];
461
79.5k
                window_last += shift;
462
79.5k
                if (shift == 0) {
463
15.1k
                    break;
464
15.1k
                }
465
64.4k
                if (window_last >= haystack_end) {
466
17
                    return -1;
467
17
                }
468
64.4k
                LOG("Horspool skip\n");
469
64.4k
            }
470
15.1k
            window = window_last - len_needle + 1;
471
15.1k
            assert((window[len_needle - 1] & TABLE_MASK) ==
472
15.1k
                   (needle[len_needle - 1] & TABLE_MASK));
473
15.1k
            Py_ssize_t i = cut;
474
15.3k
            for (; i < len_needle; i++) {
475
15.2k
                if (needle[i] != window[i]) {
476
15.0k
                    if (i < gap_jump_end) {
477
15.0k
                        LOG("Early right half mismatch: jump by gap.\n");
478
15.0k
                        assert(gap >= i - cut + 1);
479
15.0k
                        window_last += gap;
480
15.0k
                    }
481
0
                    else {
482
0
                        LOG("Late right half mismatch: jump by n (>gap)\n");
483
0
                        assert(i - cut + 1 > gap);
484
0
                        window_last += i - cut + 1;
485
0
                    }
486
15.0k
                    goto windowloop;
487
15.0k
                }
488
15.2k
            }
489
199
            for (Py_ssize_t i = 0; i < cut; i++) {
490
196
                if (needle[i] != window[i]) {
491
92
                    LOG("Left half does not match.\n");
492
92
                    window_last += period;
493
92
                    goto windowloop;
494
92
                }
495
196
            }
496
3
            LOG("Found a match!\n");
497
3
            return window - haystack;
498
95
        }
499
15.1k
    }
500
2
    LOG("Not found. Returning -1.\n");
501
2
    return -1;
502
22
}
Unexecuted instantiation: bytesobject.c:stringlib__two_way
Unexecuted instantiation: unicodeobject.c:asciilib__two_way
unicodeobject.c:ucs1lib__two_way
Line
Count
Source
372
22
{
373
    // Crochemore and Perrin's (1991) Two-Way algorithm.
374
    // See http://www-igm.univ-mlv.fr/~lecroq/string/node26.html#SECTION00260
375
22
    const Py_ssize_t len_needle = p->len_needle;
376
22
    const Py_ssize_t cut = p->cut;
377
22
    Py_ssize_t period = p->period;
378
22
    const STRINGLIB_CHAR *const needle = p->needle;
379
22
    const STRINGLIB_CHAR *window_last = haystack + len_needle - 1;
380
22
    const STRINGLIB_CHAR *const haystack_end = haystack + len_haystack;
381
22
    SHIFT_TYPE *table = p->table;
382
22
    const STRINGLIB_CHAR *window;
383
22
    LOG("===== Two-way: \"%s\" in \"%s\". =====\n", needle, haystack);
384
385
22
    Py_ssize_t gap = p->gap;
386
22
    Py_ssize_t gap_jump_end = Py_MIN(len_needle, cut + gap);
387
22
    if (p->is_periodic) {
388
0
        LOG("Needle is periodic.\n");
389
0
        Py_ssize_t memory = 0;
390
0
      periodicwindowloop:
391
0
        while (window_last < haystack_end) {
392
0
            assert(memory == 0);
393
0
            for (;;) {
394
0
                LOG_LINEUP();
395
0
                Py_ssize_t shift = table[(*window_last) & TABLE_MASK];
396
0
                window_last += shift;
397
0
                if (shift == 0) {
398
0
                    break;
399
0
                }
400
0
                if (window_last >= haystack_end) {
401
0
                    return -1;
402
0
                }
403
0
                LOG("Horspool skip\n");
404
0
            }
405
0
          no_shift:
406
0
            window = window_last - len_needle + 1;
407
0
            assert((window[len_needle - 1] & TABLE_MASK) ==
408
0
                   (needle[len_needle - 1] & TABLE_MASK));
409
0
            Py_ssize_t i = Py_MAX(cut, memory);
410
0
            for (; i < len_needle; i++) {
411
0
                if (needle[i] != window[i]) {
412
0
                    if (i < gap_jump_end) {
413
0
                        LOG("Early right half mismatch: jump by gap.\n");
414
0
                        assert(gap >= i - cut + 1);
415
0
                        window_last += gap;
416
0
                    }
417
0
                    else {
418
0
                        LOG("Late right half mismatch: jump by n (>gap)\n");
419
0
                        assert(i - cut + 1 > gap);
420
0
                        window_last += i - cut + 1;
421
0
                    }
422
0
                    memory = 0;
423
0
                    goto periodicwindowloop;
424
0
                }
425
0
            }
426
0
            for (i = memory; i < cut; i++) {
427
0
                if (needle[i] != window[i]) {
428
0
                    LOG("Left half does not match.\n");
429
0
                    window_last += period;
430
0
                    memory = len_needle - period;
431
0
                    if (window_last >= haystack_end) {
432
0
                        return -1;
433
0
                    }
434
0
                    Py_ssize_t shift = table[(*window_last) & TABLE_MASK];
435
0
                    if (shift) {
436
                        // A mismatch has been identified to the right
437
                        // of where i will next start, so we can jump
438
                        // at least as far as if the mismatch occurred
439
                        // on the first comparison.
440
0
                        Py_ssize_t mem_jump = Py_MAX(cut, memory) - cut + 1;
441
0
                        LOG("Skip with Memory.\n");
442
0
                        memory = 0;
443
0
                        window_last += Py_MAX(shift, mem_jump);
444
0
                        goto periodicwindowloop;
445
0
                    }
446
0
                    goto no_shift;
447
0
                }
448
0
            }
449
0
            LOG("Found a match!\n");
450
0
            return window - haystack;
451
0
        }
452
0
    }
453
22
    else {
454
22
        period = Py_MAX(gap, period);
455
22
        LOG("Needle is not periodic.\n");
456
15.1k
      windowloop:
457
15.1k
        while (window_last < haystack_end) {
458
79.5k
            for (;;) {
459
79.5k
                LOG_LINEUP();
460
79.5k
                Py_ssize_t shift = table[(*window_last) & TABLE_MASK];
461
79.5k
                window_last += shift;
462
79.5k
                if (shift == 0) {
463
15.1k
                    break;
464
15.1k
                }
465
64.4k
                if (window_last >= haystack_end) {
466
17
                    return -1;
467
17
                }
468
64.4k
                LOG("Horspool skip\n");
469
64.4k
            }
470
15.1k
            window = window_last - len_needle + 1;
471
15.1k
            assert((window[len_needle - 1] & TABLE_MASK) ==
472
15.1k
                   (needle[len_needle - 1] & TABLE_MASK));
473
15.1k
            Py_ssize_t i = cut;
474
15.3k
            for (; i < len_needle; i++) {
475
15.2k
                if (needle[i] != window[i]) {
476
15.0k
                    if (i < gap_jump_end) {
477
15.0k
                        LOG("Early right half mismatch: jump by gap.\n");
478
15.0k
                        assert(gap >= i - cut + 1);
479
15.0k
                        window_last += gap;
480
15.0k
                    }
481
0
                    else {
482
0
                        LOG("Late right half mismatch: jump by n (>gap)\n");
483
0
                        assert(i - cut + 1 > gap);
484
0
                        window_last += i - cut + 1;
485
0
                    }
486
15.0k
                    goto windowloop;
487
15.0k
                }
488
15.2k
            }
489
199
            for (Py_ssize_t i = 0; i < cut; i++) {
490
196
                if (needle[i] != window[i]) {
491
92
                    LOG("Left half does not match.\n");
492
92
                    window_last += period;
493
92
                    goto windowloop;
494
92
                }
495
196
            }
496
3
            LOG("Found a match!\n");
497
3
            return window - haystack;
498
95
        }
499
15.1k
    }
500
2
    LOG("Not found. Returning -1.\n");
501
2
    return -1;
502
22
}
Unexecuted instantiation: unicodeobject.c:ucs2lib__two_way
Unexecuted instantiation: unicodeobject.c:ucs4lib__two_way
Unexecuted instantiation: bytes_methods.c:stringlib__two_way
Unexecuted instantiation: bytearrayobject.c:stringlib__two_way
503
504
505
static Py_ssize_t
506
STRINGLIB(_two_way_find)(const STRINGLIB_CHAR *haystack,
507
                         Py_ssize_t len_haystack,
508
                         const STRINGLIB_CHAR *needle,
509
                         Py_ssize_t len_needle)
510
22
{
511
22
    LOG("###### Finding \"%s\" in \"%s\".\n", needle, haystack);
512
22
    STRINGLIB(prework) p;
513
22
    STRINGLIB(_preprocess)(needle, len_needle, &p);
514
22
    return STRINGLIB(_two_way)(haystack, len_haystack, &p);
515
22
}
Unexecuted instantiation: bytesobject.c:stringlib__two_way_find
Unexecuted instantiation: unicodeobject.c:asciilib__two_way_find
unicodeobject.c:ucs1lib__two_way_find
Line
Count
Source
510
22
{
511
22
    LOG("###### Finding \"%s\" in \"%s\".\n", needle, haystack);
512
22
    STRINGLIB(prework) p;
513
22
    STRINGLIB(_preprocess)(needle, len_needle, &p);
514
22
    return STRINGLIB(_two_way)(haystack, len_haystack, &p);
515
22
}
Unexecuted instantiation: unicodeobject.c:ucs2lib__two_way_find
Unexecuted instantiation: unicodeobject.c:ucs4lib__two_way_find
Unexecuted instantiation: bytes_methods.c:stringlib__two_way_find
Unexecuted instantiation: bytearrayobject.c:stringlib__two_way_find
516
517
518
static Py_ssize_t
519
STRINGLIB(_two_way_count)(const STRINGLIB_CHAR *haystack,
520
                          Py_ssize_t len_haystack,
521
                          const STRINGLIB_CHAR *needle,
522
                          Py_ssize_t len_needle,
523
                          Py_ssize_t maxcount)
524
0
{
525
0
    LOG("###### Counting \"%s\" in \"%s\".\n", needle, haystack);
526
0
    STRINGLIB(prework) p;
527
0
    STRINGLIB(_preprocess)(needle, len_needle, &p);
528
0
    Py_ssize_t index = 0, count = 0;
529
0
    while (1) {
530
0
        Py_ssize_t result;
531
0
        result = STRINGLIB(_two_way)(haystack + index,
532
0
                                     len_haystack - index, &p);
533
0
        if (result == -1) {
534
0
            return count;
535
0
        }
536
0
        count++;
537
0
        if (count == maxcount) {
538
0
            return maxcount;
539
0
        }
540
0
        index += result + len_needle;
541
0
    }
542
0
    return count;
543
0
}
Unexecuted instantiation: bytesobject.c:stringlib__two_way_count
Unexecuted instantiation: unicodeobject.c:asciilib__two_way_count
Unexecuted instantiation: unicodeobject.c:ucs1lib__two_way_count
Unexecuted instantiation: unicodeobject.c:ucs2lib__two_way_count
Unexecuted instantiation: unicodeobject.c:ucs4lib__two_way_count
Unexecuted instantiation: bytes_methods.c:stringlib__two_way_count
Unexecuted instantiation: bytearrayobject.c:stringlib__two_way_count
544
545
#undef SHIFT_TYPE
546
#undef NOT_FOUND
547
#undef SHIFT_OVERFLOW
548
#undef TABLE_SIZE_BITS
549
#undef TABLE_SIZE
550
#undef TABLE_MASK
551
552
#undef LOG
553
#undef LOG_STRING
554
#undef LOG_LINEUP
555
556
static inline Py_ssize_t
557
STRINGLIB(default_find)(const STRINGLIB_CHAR* s, Py_ssize_t n,
558
                        const STRINGLIB_CHAR* p, Py_ssize_t m,
559
                        Py_ssize_t maxcount, int mode)
560
37.9M
{
561
37.9M
    const Py_ssize_t w = n - m;
562
37.9M
    Py_ssize_t mlast = m - 1, count = 0;
563
37.9M
    Py_ssize_t gap = mlast;
564
37.9M
    const STRINGLIB_CHAR last = p[mlast];
565
37.9M
    const STRINGLIB_CHAR *const ss = &s[mlast];
566
567
37.9M
    unsigned long mask = 0;
568
215M
    for (Py_ssize_t i = 0; i < mlast; i++) {
569
177M
        STRINGLIB_BLOOM_ADD(mask, p[i]);
570
177M
        if (p[i] == last) {
571
1.24M
            gap = mlast - i - 1;
572
1.24M
        }
573
177M
    }
574
37.9M
    STRINGLIB_BLOOM_ADD(mask, last);
575
576
1.00G
    for (Py_ssize_t i = 0; i <= w; i++) {
577
974M
        if (ss[i] == last) {
578
            /* candidate match */
579
43.7M
            Py_ssize_t j;
580
67.9M
            for (j = 0; j < mlast; j++) {
581
45.0M
                if (s[i+j] != p[j]) {
582
20.8M
                    break;
583
20.8M
                }
584
45.0M
            }
585
43.7M
            if (j == mlast) {
586
                /* got a match! */
587
22.8M
                if (mode != FAST_COUNT) {
588
11.9M
                    return i;
589
11.9M
                }
590
10.9M
                count++;
591
10.9M
                if (count == maxcount) {
592
0
                    return maxcount;
593
0
                }
594
10.9M
                i = i + mlast;
595
10.9M
                continue;
596
10.9M
            }
597
            /* miss: check if next character is part of pattern */
598
20.8M
            if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) {
599
4.87M
                i = i + m;
600
4.87M
            }
601
16.0M
            else {
602
16.0M
                i = i + gap;
603
16.0M
            }
604
20.8M
        }
605
930M
        else {
606
            /* skip: check if next character is part of pattern */
607
930M
            if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) {
608
804M
                i = i + m;
609
804M
            }
610
930M
        }
611
974M
    }
612
26.0M
    return mode == FAST_COUNT ? count : -1;
613
37.9M
}
Unexecuted instantiation: bytesobject.c:stringlib_default_find
unicodeobject.c:asciilib_default_find
Line
Count
Source
560
2.30M
{
561
2.30M
    const Py_ssize_t w = n - m;
562
2.30M
    Py_ssize_t mlast = m - 1, count = 0;
563
2.30M
    Py_ssize_t gap = mlast;
564
2.30M
    const STRINGLIB_CHAR last = p[mlast];
565
2.30M
    const STRINGLIB_CHAR *const ss = &s[mlast];
566
567
2.30M
    unsigned long mask = 0;
568
5.03M
    for (Py_ssize_t i = 0; i < mlast; i++) {
569
2.72M
        STRINGLIB_BLOOM_ADD(mask, p[i]);
570
2.72M
        if (p[i] == last) {
571
35.8k
            gap = mlast - i - 1;
572
35.8k
        }
573
2.72M
    }
574
2.30M
    STRINGLIB_BLOOM_ADD(mask, last);
575
576
42.7M
    for (Py_ssize_t i = 0; i <= w; i++) {
577
42.6M
        if (ss[i] == last) {
578
            /* candidate match */
579
4.08M
            Py_ssize_t j;
580
6.73M
            for (j = 0; j < mlast; j++) {
581
4.47M
                if (s[i+j] != p[j]) {
582
1.82M
                    break;
583
1.82M
                }
584
4.47M
            }
585
4.08M
            if (j == mlast) {
586
                /* got a match! */
587
2.25M
                if (mode != FAST_COUNT) {
588
2.25M
                    return i;
589
2.25M
                }
590
0
                count++;
591
0
                if (count == maxcount) {
592
0
                    return maxcount;
593
0
                }
594
0
                i = i + mlast;
595
0
                continue;
596
0
            }
597
            /* miss: check if next character is part of pattern */
598
1.82M
            if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) {
599
112k
                i = i + m;
600
112k
            }
601
1.71M
            else {
602
1.71M
                i = i + gap;
603
1.71M
            }
604
1.82M
        }
605
38.6M
        else {
606
            /* skip: check if next character is part of pattern */
607
38.6M
            if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) {
608
35.1M
                i = i + m;
609
35.1M
            }
610
38.6M
        }
611
42.6M
    }
612
47.2k
    return mode == FAST_COUNT ? count : -1;
613
2.30M
}
unicodeobject.c:ucs1lib_default_find
Line
Count
Source
560
27.9M
{
561
27.9M
    const Py_ssize_t w = n - m;
562
27.9M
    Py_ssize_t mlast = m - 1, count = 0;
563
27.9M
    Py_ssize_t gap = mlast;
564
27.9M
    const STRINGLIB_CHAR last = p[mlast];
565
27.9M
    const STRINGLIB_CHAR *const ss = &s[mlast];
566
567
27.9M
    unsigned long mask = 0;
568
194M
    for (Py_ssize_t i = 0; i < mlast; i++) {
569
166M
        STRINGLIB_BLOOM_ADD(mask, p[i]);
570
166M
        if (p[i] == last) {
571
1.14M
            gap = mlast - i - 1;
572
1.14M
        }
573
166M
    }
574
27.9M
    STRINGLIB_BLOOM_ADD(mask, last);
575
576
439M
    for (Py_ssize_t i = 0; i <= w; i++) {
577
413M
        if (ss[i] == last) {
578
            /* candidate match */
579
14.6M
            Py_ssize_t j;
580
20.8M
            for (j = 0; j < mlast; j++) {
581
15.2M
                if (s[i+j] != p[j]) {
582
8.93M
                    break;
583
8.93M
                }
584
15.2M
            }
585
14.6M
            if (j == mlast) {
586
                /* got a match! */
587
5.68M
                if (mode != FAST_COUNT) {
588
2.07M
                    return i;
589
2.07M
                }
590
3.60M
                count++;
591
3.60M
                if (count == maxcount) {
592
0
                    return maxcount;
593
0
                }
594
3.60M
                i = i + mlast;
595
3.60M
                continue;
596
3.60M
            }
597
            /* miss: check if next character is part of pattern */
598
8.93M
            if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) {
599
1.85M
                i = i + m;
600
1.85M
            }
601
7.07M
            else {
602
7.07M
                i = i + gap;
603
7.07M
            }
604
8.93M
        }
605
398M
        else {
606
            /* skip: check if next character is part of pattern */
607
398M
            if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) {
608
290M
                i = i + m;
609
290M
            }
610
398M
        }
611
413M
    }
612
25.8M
    return mode == FAST_COUNT ? count : -1;
613
27.9M
}
unicodeobject.c:ucs2lib_default_find
Line
Count
Source
560
3.28M
{
561
3.28M
    const Py_ssize_t w = n - m;
562
3.28M
    Py_ssize_t mlast = m - 1, count = 0;
563
3.28M
    Py_ssize_t gap = mlast;
564
3.28M
    const STRINGLIB_CHAR last = p[mlast];
565
3.28M
    const STRINGLIB_CHAR *const ss = &s[mlast];
566
567
3.28M
    unsigned long mask = 0;
568
6.65M
    for (Py_ssize_t i = 0; i < mlast; i++) {
569
3.37M
        STRINGLIB_BLOOM_ADD(mask, p[i]);
570
3.37M
        if (p[i] == last) {
571
39.4k
            gap = mlast - i - 1;
572
39.4k
        }
573
3.37M
    }
574
3.28M
    STRINGLIB_BLOOM_ADD(mask, last);
575
576
308M
    for (Py_ssize_t i = 0; i <= w; i++) {
577
308M
        if (ss[i] == last) {
578
            /* candidate match */
579
11.1M
            Py_ssize_t j;
580
17.4M
            for (j = 0; j < mlast; j++) {
581
11.1M
                if (s[i+j] != p[j]) {
582
4.88M
                    break;
583
4.88M
                }
584
11.1M
            }
585
11.1M
            if (j == mlast) {
586
                /* got a match! */
587
6.21M
                if (mode != FAST_COUNT) {
588
3.18M
                    return i;
589
3.18M
                }
590
3.03M
                count++;
591
3.03M
                if (count == maxcount) {
592
0
                    return maxcount;
593
0
                }
594
3.03M
                i = i + mlast;
595
3.03M
                continue;
596
3.03M
            }
597
            /* miss: check if next character is part of pattern */
598
4.88M
            if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) {
599
1.17M
                i = i + m;
600
1.17M
            }
601
3.70M
            else {
602
3.70M
                i = i + gap;
603
3.70M
            }
604
4.88M
        }
605
296M
        else {
606
            /* skip: check if next character is part of pattern */
607
296M
            if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) {
608
288M
                i = i + m;
609
288M
            }
610
296M
        }
611
308M
    }
612
99.5k
    return mode == FAST_COUNT ? count : -1;
613
3.28M
}
unicodeobject.c:ucs4lib_default_find
Line
Count
Source
560
4.46M
{
561
4.46M
    const Py_ssize_t w = n - m;
562
4.46M
    Py_ssize_t mlast = m - 1, count = 0;
563
4.46M
    Py_ssize_t gap = mlast;
564
4.46M
    const STRINGLIB_CHAR last = p[mlast];
565
4.46M
    const STRINGLIB_CHAR *const ss = &s[mlast];
566
567
4.46M
    unsigned long mask = 0;
568
9.05M
    for (Py_ssize_t i = 0; i < mlast; i++) {
569
4.59M
        STRINGLIB_BLOOM_ADD(mask, p[i]);
570
4.59M
        if (p[i] == last) {
571
18.6k
            gap = mlast - i - 1;
572
18.6k
        }
573
4.59M
    }
574
4.46M
    STRINGLIB_BLOOM_ADD(mask, last);
575
576
208M
    for (Py_ssize_t i = 0; i <= w; i++) {
577
208M
        if (ss[i] == last) {
578
            /* candidate match */
579
13.9M
            Py_ssize_t j;
580
22.8M
            for (j = 0; j < mlast; j++) {
581
14.0M
                if (s[i+j] != p[j]) {
582
5.20M
                    break;
583
5.20M
                }
584
14.0M
            }
585
13.9M
            if (j == mlast) {
586
                /* got a match! */
587
8.73M
                if (mode != FAST_COUNT) {
588
4.43M
                    return i;
589
4.43M
                }
590
4.29M
                count++;
591
4.29M
                if (count == maxcount) {
592
0
                    return maxcount;
593
0
                }
594
4.29M
                i = i + mlast;
595
4.29M
                continue;
596
4.29M
            }
597
            /* miss: check if next character is part of pattern */
598
5.20M
            if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) {
599
1.70M
                i = i + m;
600
1.70M
            }
601
3.50M
            else {
602
3.50M
                i = i + gap;
603
3.50M
            }
604
5.20M
        }
605
194M
        else {
606
            /* skip: check if next character is part of pattern */
607
194M
            if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) {
608
188M
                i = i + m;
609
188M
            }
610
194M
        }
611
208M
    }
612
29.8k
    return mode == FAST_COUNT ? count : -1;
613
4.46M
}
bytes_methods.c:stringlib_default_find
Line
Count
Source
560
2.93k
{
561
2.93k
    const Py_ssize_t w = n - m;
562
2.93k
    Py_ssize_t mlast = m - 1, count = 0;
563
2.93k
    Py_ssize_t gap = mlast;
564
2.93k
    const STRINGLIB_CHAR last = p[mlast];
565
2.93k
    const STRINGLIB_CHAR *const ss = &s[mlast];
566
567
2.93k
    unsigned long mask = 0;
568
11.7k
    for (Py_ssize_t i = 0; i < mlast; i++) {
569
8.79k
        STRINGLIB_BLOOM_ADD(mask, p[i]);
570
8.79k
        if (p[i] == last) {
571
2.93k
            gap = mlast - i - 1;
572
2.93k
        }
573
8.79k
    }
574
2.93k
    STRINGLIB_BLOOM_ADD(mask, last);
575
576
1.48M
    for (Py_ssize_t i = 0; i <= w; i++) {
577
1.48M
        if (ss[i] == last) {
578
            /* candidate match */
579
35.7k
            Py_ssize_t j;
580
44.9k
            for (j = 0; j < mlast; j++) {
581
42.1k
                if (s[i+j] != p[j]) {
582
33.0k
                    break;
583
33.0k
                }
584
42.1k
            }
585
35.7k
            if (j == mlast) {
586
                /* got a match! */
587
2.71k
                if (mode != FAST_COUNT) {
588
2.71k
                    return i;
589
2.71k
                }
590
0
                count++;
591
0
                if (count == maxcount) {
592
0
                    return maxcount;
593
0
                }
594
0
                i = i + mlast;
595
0
                continue;
596
0
            }
597
            /* miss: check if next character is part of pattern */
598
33.0k
            if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) {
599
27.2k
                i = i + m;
600
27.2k
            }
601
5.76k
            else {
602
5.76k
                i = i + gap;
603
5.76k
            }
604
33.0k
        }
605
1.44M
        else {
606
            /* skip: check if next character is part of pattern */
607
1.44M
            if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) {
608
1.11M
                i = i + m;
609
1.11M
            }
610
1.44M
        }
611
1.48M
    }
612
223
    return mode == FAST_COUNT ? count : -1;
613
2.93k
}
Unexecuted instantiation: bytearrayobject.c:stringlib_default_find
614
615
616
static Py_ssize_t
617
STRINGLIB(adaptive_find)(const STRINGLIB_CHAR* s, Py_ssize_t n,
618
                         const STRINGLIB_CHAR* p, Py_ssize_t m,
619
                         Py_ssize_t maxcount, int mode)
620
0
{
621
0
    const Py_ssize_t w = n - m;
622
0
    Py_ssize_t mlast = m - 1, count = 0;
623
0
    Py_ssize_t gap = mlast;
624
0
    Py_ssize_t hits = 0, res;
625
0
    const STRINGLIB_CHAR last = p[mlast];
626
0
    const STRINGLIB_CHAR *const ss = &s[mlast];
627
628
0
    unsigned long mask = 0;
629
0
    for (Py_ssize_t i = 0; i < mlast; i++) {
630
0
        STRINGLIB_BLOOM_ADD(mask, p[i]);
631
0
        if (p[i] == last) {
632
0
            gap = mlast - i - 1;
633
0
        }
634
0
    }
635
0
    STRINGLIB_BLOOM_ADD(mask, last);
636
637
0
    for (Py_ssize_t i = 0; i <= w; i++) {
638
0
        if (ss[i] == last) {
639
            /* candidate match */
640
0
            Py_ssize_t j;
641
0
            for (j = 0; j < mlast; j++) {
642
0
                if (s[i+j] != p[j]) {
643
0
                    break;
644
0
                }
645
0
            }
646
0
            if (j == mlast) {
647
                /* got a match! */
648
0
                if (mode != FAST_COUNT) {
649
0
                    return i;
650
0
                }
651
0
                count++;
652
0
                if (count == maxcount) {
653
0
                    return maxcount;
654
0
                }
655
0
                i = i + mlast;
656
0
                continue;
657
0
            }
658
0
            hits += j + 1;
659
0
            if (hits > m / 4 && w - i > 2000) {
660
0
                if (mode == FAST_SEARCH) {
661
0
                    res = STRINGLIB(_two_way_find)(s + i, n - i, p, m);
662
0
                    return res == -1 ? -1 : res + i;
663
0
                }
664
0
                else {
665
0
                    res = STRINGLIB(_two_way_count)(s + i, n - i, p, m,
666
0
                                                    maxcount - count);
667
0
                    return res + count;
668
0
                }
669
0
            }
670
            /* miss: check if next character is part of pattern */
671
0
            if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) {
672
0
                i = i + m;
673
0
            }
674
0
            else {
675
0
                i = i + gap;
676
0
            }
677
0
        }
678
0
        else {
679
            /* skip: check if next character is part of pattern */
680
0
            if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) {
681
0
                i = i + m;
682
0
            }
683
0
        }
684
0
    }
685
0
    return mode == FAST_COUNT ? count : -1;
686
0
}
Unexecuted instantiation: bytesobject.c:stringlib_adaptive_find
Unexecuted instantiation: unicodeobject.c:asciilib_adaptive_find
Unexecuted instantiation: unicodeobject.c:ucs1lib_adaptive_find
Unexecuted instantiation: unicodeobject.c:ucs2lib_adaptive_find
Unexecuted instantiation: unicodeobject.c:ucs4lib_adaptive_find
Unexecuted instantiation: bytes_methods.c:stringlib_adaptive_find
Unexecuted instantiation: bytearrayobject.c:stringlib_adaptive_find
687
688
689
static Py_ssize_t
690
STRINGLIB(default_rfind)(const STRINGLIB_CHAR* s, Py_ssize_t n,
691
                         const STRINGLIB_CHAR* p, Py_ssize_t m,
692
                         Py_ssize_t maxcount, int mode)
693
8.18k
{
694
    /* create compressed boyer-moore delta 1 table */
695
8.18k
    unsigned long mask = 0;
696
8.18k
    Py_ssize_t i, j, mlast = m - 1, skip = m - 1, w = n - m;
697
698
    /* process pattern[0] outside the loop */
699
8.18k
    STRINGLIB_BLOOM_ADD(mask, p[0]);
700
    /* process pattern[:0:-1] */
701
32.7k
    for (i = mlast; i > 0; i--) {
702
24.5k
        STRINGLIB_BLOOM_ADD(mask, p[i]);
703
24.5k
        if (p[i] == p[0]) {
704
0
            skip = i - 1;
705
0
        }
706
24.5k
    }
707
708
1.77M
    for (i = w; i >= 0; i--) {
709
1.77M
        if (s[i] == p[0]) {
710
            /* candidate match */
711
88.9k
            for (j = mlast; j > 0; j--) {
712
80.8k
                if (s[i+j] != p[j]) {
713
56.0k
                    break;
714
56.0k
                }
715
80.8k
            }
716
64.0k
            if (j == 0) {
717
                /* got a match! */
718
8.08k
                return i;
719
8.08k
            }
720
            /* miss: check if previous character is part of pattern */
721
56.0k
            if (i > 0 && !STRINGLIB_BLOOM(mask, s[i-1])) {
722
54.7k
                i = i - m;
723
54.7k
            }
724
1.27k
            else {
725
1.27k
                i = i - skip;
726
1.27k
            }
727
56.0k
        }
728
1.71M
        else {
729
            /* skip: check if previous character is part of pattern */
730
1.71M
            if (i > 0 && !STRINGLIB_BLOOM(mask, s[i-1])) {
731
1.37M
                i = i - m;
732
1.37M
            }
733
1.71M
        }
734
1.77M
    }
735
104
    return -1;
736
8.18k
}
Unexecuted instantiation: bytesobject.c:stringlib_default_rfind
Unexecuted instantiation: unicodeobject.c:asciilib_default_rfind
Unexecuted instantiation: unicodeobject.c:ucs1lib_default_rfind
Unexecuted instantiation: unicodeobject.c:ucs2lib_default_rfind
Unexecuted instantiation: unicodeobject.c:ucs4lib_default_rfind
bytes_methods.c:stringlib_default_rfind
Line
Count
Source
693
8.18k
{
694
    /* create compressed boyer-moore delta 1 table */
695
8.18k
    unsigned long mask = 0;
696
8.18k
    Py_ssize_t i, j, mlast = m - 1, skip = m - 1, w = n - m;
697
698
    /* process pattern[0] outside the loop */
699
8.18k
    STRINGLIB_BLOOM_ADD(mask, p[0]);
700
    /* process pattern[:0:-1] */
701
32.7k
    for (i = mlast; i > 0; i--) {
702
24.5k
        STRINGLIB_BLOOM_ADD(mask, p[i]);
703
24.5k
        if (p[i] == p[0]) {
704
0
            skip = i - 1;
705
0
        }
706
24.5k
    }
707
708
1.77M
    for (i = w; i >= 0; i--) {
709
1.77M
        if (s[i] == p[0]) {
710
            /* candidate match */
711
88.9k
            for (j = mlast; j > 0; j--) {
712
80.8k
                if (s[i+j] != p[j]) {
713
56.0k
                    break;
714
56.0k
                }
715
80.8k
            }
716
64.0k
            if (j == 0) {
717
                /* got a match! */
718
8.08k
                return i;
719
8.08k
            }
720
            /* miss: check if previous character is part of pattern */
721
56.0k
            if (i > 0 && !STRINGLIB_BLOOM(mask, s[i-1])) {
722
54.7k
                i = i - m;
723
54.7k
            }
724
1.27k
            else {
725
1.27k
                i = i - skip;
726
1.27k
            }
727
56.0k
        }
728
1.71M
        else {
729
            /* skip: check if previous character is part of pattern */
730
1.71M
            if (i > 0 && !STRINGLIB_BLOOM(mask, s[i-1])) {
731
1.37M
                i = i - m;
732
1.37M
            }
733
1.71M
        }
734
1.77M
    }
735
104
    return -1;
736
8.18k
}
Unexecuted instantiation: bytearrayobject.c:stringlib_default_rfind
737
738
739
static inline Py_ssize_t
740
STRINGLIB(count_char)(const STRINGLIB_CHAR *s, Py_ssize_t n,
741
                      const STRINGLIB_CHAR p0, Py_ssize_t maxcount)
742
0
{
743
0
    Py_ssize_t i, count = 0;
744
0
    for (i = 0; i < n; i++) {
745
0
        if (s[i] == p0) {
746
0
            count++;
747
0
            if (count == maxcount) {
748
0
                return maxcount;
749
0
            }
750
0
        }
751
0
    }
752
0
    return count;
753
0
}
Unexecuted instantiation: bytesobject.c:stringlib_count_char
Unexecuted instantiation: unicodeobject.c:asciilib_count_char
Unexecuted instantiation: unicodeobject.c:ucs1lib_count_char
Unexecuted instantiation: unicodeobject.c:ucs2lib_count_char
Unexecuted instantiation: unicodeobject.c:ucs4lib_count_char
Unexecuted instantiation: bytes_methods.c:stringlib_count_char
Unexecuted instantiation: bytearrayobject.c:stringlib_count_char
754
755
756
static inline Py_ssize_t
757
STRINGLIB(count_char_no_maxcount)(const STRINGLIB_CHAR *s, Py_ssize_t n,
758
                                  const STRINGLIB_CHAR p0)
759
/* A specialized function of count_char that does not cut off at a maximum.
760
   As a result, the compiler is able to vectorize the loop. */
761
36.9M
{
762
36.9M
    Py_ssize_t count = 0;
763
5.24G
    for (Py_ssize_t i = 0; i < n; i++) {
764
5.20G
        if (s[i] == p0) {
765
774M
            count++;
766
774M
        }
767
5.20G
    }
768
36.9M
    return count;
769
36.9M
}
Unexecuted instantiation: bytesobject.c:stringlib_count_char_no_maxcount
Unexecuted instantiation: unicodeobject.c:asciilib_count_char_no_maxcount
unicodeobject.c:ucs1lib_count_char_no_maxcount
Line
Count
Source
761
23.4M
{
762
23.4M
    Py_ssize_t count = 0;
763
643M
    for (Py_ssize_t i = 0; i < n; i++) {
764
619M
        if (s[i] == p0) {
765
37.8M
            count++;
766
37.8M
        }
767
619M
    }
768
23.4M
    return count;
769
23.4M
}
unicodeobject.c:ucs2lib_count_char_no_maxcount
Line
Count
Source
761
4.35M
{
762
4.35M
    Py_ssize_t count = 0;
763
654M
    for (Py_ssize_t i = 0; i < n; i++) {
764
650M
        if (s[i] == p0) {
765
19.7M
            count++;
766
19.7M
        }
767
650M
    }
768
4.35M
    return count;
769
4.35M
}
unicodeobject.c:ucs4lib_count_char_no_maxcount
Line
Count
Source
761
2.42M
{
762
2.42M
    Py_ssize_t count = 0;
763
462M
    for (Py_ssize_t i = 0; i < n; i++) {
764
459M
        if (s[i] == p0) {
765
13.2M
            count++;
766
13.2M
        }
767
459M
    }
768
2.42M
    return count;
769
2.42M
}
bytes_methods.c:stringlib_count_char_no_maxcount
Line
Count
Source
761
6.79M
{
762
6.79M
    Py_ssize_t count = 0;
763
3.48G
    for (Py_ssize_t i = 0; i < n; i++) {
764
3.47G
        if (s[i] == p0) {
765
703M
            count++;
766
703M
        }
767
3.47G
    }
768
6.79M
    return count;
769
6.79M
}
Unexecuted instantiation: bytearrayobject.c:stringlib_count_char_no_maxcount
770
771
772
Py_LOCAL_INLINE(Py_ssize_t)
773
FASTSEARCH(const STRINGLIB_CHAR* s, Py_ssize_t n,
774
           const STRINGLIB_CHAR* p, Py_ssize_t m,
775
           Py_ssize_t maxcount, int mode)
776
96.4M
{
777
96.4M
    if (n < m || (mode == FAST_COUNT && maxcount == 0)) {
778
789
        return -1;
779
789
    }
780
781
    /* look for special cases */
782
96.4M
    if (m <= 1) {
783
58.5M
        if (m <= 0) {
784
0
            return -1;
785
0
        }
786
        /* use special case for 1-character strings */
787
58.5M
        if (mode == FAST_SEARCH)
788
21.4M
            return STRINGLIB(find_char)(s, n, p[0]);
789
37.0M
        else if (mode == FAST_RSEARCH)
790
60.4k
            return STRINGLIB(rfind_char)(s, n, p[0]);
791
36.9M
        else {
792
36.9M
            if (maxcount == PY_SSIZE_T_MAX) {
793
36.9M
                return STRINGLIB(count_char_no_maxcount)(s, n, p[0]);
794
36.9M
            }
795
0
            return STRINGLIB(count_char)(s, n, p[0], maxcount);
796
36.9M
        }
797
58.5M
    }
798
799
37.9M
    if (mode != FAST_RSEARCH) {
800
37.9M
        if (n < 2500 || (m < 100 && n < 30000) || m < 6) {
801
37.9M
            return STRINGLIB(default_find)(s, n, p, m, maxcount, mode);
802
37.9M
        }
803
22
        else if ((m >> 2) * 3 < (n >> 2)) {
804
            /* 33% threshold, but don't overflow. */
805
            /* For larger problems where the needle isn't a huge
806
               percentage of the size of the haystack, the relatively
807
               expensive O(m) startup cost of the two-way algorithm
808
               will surely pay off. */
809
22
            if (mode == FAST_SEARCH) {
810
22
                return STRINGLIB(_two_way_find)(s, n, p, m);
811
22
            }
812
0
            else {
813
0
                return STRINGLIB(_two_way_count)(s, n, p, m, maxcount);
814
0
            }
815
22
        }
816
0
        else {
817
            /* To ensure that we have good worst-case behavior,
818
               here's an adaptive version of the algorithm, where if
819
               we match O(m) characters without any matches of the
820
               entire needle, then we predict that the startup cost of
821
               the two-way algorithm will probably be worth it. */
822
0
            return STRINGLIB(adaptive_find)(s, n, p, m, maxcount, mode);
823
0
        }
824
37.9M
    }
825
8.18k
    else {
826
        /* FAST_RSEARCH */
827
8.18k
        return STRINGLIB(default_rfind)(s, n, p, m, maxcount, mode);
828
8.18k
    }
829
37.9M
}
bytesobject.c:fastsearch
Line
Count
Source
776
337k
{
777
337k
    if (n < m || (mode == FAST_COUNT && maxcount == 0)) {
778
0
        return -1;
779
0
    }
780
781
    /* look for special cases */
782
337k
    if (m <= 1) {
783
337k
        if (m <= 0) {
784
0
            return -1;
785
0
        }
786
        /* use special case for 1-character strings */
787
337k
        if (mode == FAST_SEARCH)
788
337k
            return STRINGLIB(find_char)(s, n, p[0]);
789
0
        else if (mode == FAST_RSEARCH)
790
0
            return STRINGLIB(rfind_char)(s, n, p[0]);
791
0
        else {
792
0
            if (maxcount == PY_SSIZE_T_MAX) {
793
0
                return STRINGLIB(count_char_no_maxcount)(s, n, p[0]);
794
0
            }
795
0
            return STRINGLIB(count_char)(s, n, p[0], maxcount);
796
0
        }
797
337k
    }
798
799
0
    if (mode != FAST_RSEARCH) {
800
0
        if (n < 2500 || (m < 100 && n < 30000) || m < 6) {
801
0
            return STRINGLIB(default_find)(s, n, p, m, maxcount, mode);
802
0
        }
803
0
        else if ((m >> 2) * 3 < (n >> 2)) {
804
            /* 33% threshold, but don't overflow. */
805
            /* For larger problems where the needle isn't a huge
806
               percentage of the size of the haystack, the relatively
807
               expensive O(m) startup cost of the two-way algorithm
808
               will surely pay off. */
809
0
            if (mode == FAST_SEARCH) {
810
0
                return STRINGLIB(_two_way_find)(s, n, p, m);
811
0
            }
812
0
            else {
813
0
                return STRINGLIB(_two_way_count)(s, n, p, m, maxcount);
814
0
            }
815
0
        }
816
0
        else {
817
            /* To ensure that we have good worst-case behavior,
818
               here's an adaptive version of the algorithm, where if
819
               we match O(m) characters without any matches of the
820
               entire needle, then we predict that the startup cost of
821
               the two-way algorithm will probably be worth it. */
822
0
            return STRINGLIB(adaptive_find)(s, n, p, m, maxcount, mode);
823
0
        }
824
0
    }
825
0
    else {
826
        /* FAST_RSEARCH */
827
0
        return STRINGLIB(default_rfind)(s, n, p, m, maxcount, mode);
828
0
    }
829
0
}
unicodeobject.c:asciilib_fastsearch
Line
Count
Source
776
7.86M
{
777
7.86M
    if (n < m || (mode == FAST_COUNT && maxcount == 0)) {
778
0
        return -1;
779
0
    }
780
781
    /* look for special cases */
782
7.86M
    if (m <= 1) {
783
5.55M
        if (m <= 0) {
784
0
            return -1;
785
0
        }
786
        /* use special case for 1-character strings */
787
5.55M
        if (mode == FAST_SEARCH)
788
5.49M
            return STRINGLIB(find_char)(s, n, p[0]);
789
60.4k
        else if (mode == FAST_RSEARCH)
790
60.4k
            return STRINGLIB(rfind_char)(s, n, p[0]);
791
0
        else {
792
0
            if (maxcount == PY_SSIZE_T_MAX) {
793
0
                return STRINGLIB(count_char_no_maxcount)(s, n, p[0]);
794
0
            }
795
0
            return STRINGLIB(count_char)(s, n, p[0], maxcount);
796
0
        }
797
5.55M
    }
798
799
2.30M
    if (mode != FAST_RSEARCH) {
800
2.30M
        if (n < 2500 || (m < 100 && n < 30000) || m < 6) {
801
2.30M
            return STRINGLIB(default_find)(s, n, p, m, maxcount, mode);
802
2.30M
        }
803
0
        else if ((m >> 2) * 3 < (n >> 2)) {
804
            /* 33% threshold, but don't overflow. */
805
            /* For larger problems where the needle isn't a huge
806
               percentage of the size of the haystack, the relatively
807
               expensive O(m) startup cost of the two-way algorithm
808
               will surely pay off. */
809
0
            if (mode == FAST_SEARCH) {
810
0
                return STRINGLIB(_two_way_find)(s, n, p, m);
811
0
            }
812
0
            else {
813
0
                return STRINGLIB(_two_way_count)(s, n, p, m, maxcount);
814
0
            }
815
0
        }
816
0
        else {
817
            /* To ensure that we have good worst-case behavior,
818
               here's an adaptive version of the algorithm, where if
819
               we match O(m) characters without any matches of the
820
               entire needle, then we predict that the startup cost of
821
               the two-way algorithm will probably be worth it. */
822
0
            return STRINGLIB(adaptive_find)(s, n, p, m, maxcount, mode);
823
0
        }
824
2.30M
    }
825
0
    else {
826
        /* FAST_RSEARCH */
827
0
        return STRINGLIB(default_rfind)(s, n, p, m, maxcount, mode);
828
0
    }
829
2.30M
}
unicodeobject.c:ucs1lib_fastsearch
Line
Count
Source
776
59.5M
{
777
59.5M
    if (n < m || (mode == FAST_COUNT && maxcount == 0)) {
778
0
        return -1;
779
0
    }
780
781
    /* look for special cases */
782
59.5M
    if (m <= 1) {
783
31.5M
        if (m <= 0) {
784
0
            return -1;
785
0
        }
786
        /* use special case for 1-character strings */
787
31.5M
        if (mode == FAST_SEARCH)
788
8.15M
            return STRINGLIB(find_char)(s, n, p[0]);
789
23.4M
        else if (mode == FAST_RSEARCH)
790
0
            return STRINGLIB(rfind_char)(s, n, p[0]);
791
23.4M
        else {
792
23.4M
            if (maxcount == PY_SSIZE_T_MAX) {
793
23.4M
                return STRINGLIB(count_char_no_maxcount)(s, n, p[0]);
794
23.4M
            }
795
0
            return STRINGLIB(count_char)(s, n, p[0], maxcount);
796
23.4M
        }
797
31.5M
    }
798
799
27.9M
    if (mode != FAST_RSEARCH) {
800
27.9M
        if (n < 2500 || (m < 100 && n < 30000) || m < 6) {
801
27.9M
            return STRINGLIB(default_find)(s, n, p, m, maxcount, mode);
802
27.9M
        }
803
22
        else if ((m >> 2) * 3 < (n >> 2)) {
804
            /* 33% threshold, but don't overflow. */
805
            /* For larger problems where the needle isn't a huge
806
               percentage of the size of the haystack, the relatively
807
               expensive O(m) startup cost of the two-way algorithm
808
               will surely pay off. */
809
22
            if (mode == FAST_SEARCH) {
810
22
                return STRINGLIB(_two_way_find)(s, n, p, m);
811
22
            }
812
0
            else {
813
0
                return STRINGLIB(_two_way_count)(s, n, p, m, maxcount);
814
0
            }
815
22
        }
816
0
        else {
817
            /* To ensure that we have good worst-case behavior,
818
               here's an adaptive version of the algorithm, where if
819
               we match O(m) characters without any matches of the
820
               entire needle, then we predict that the startup cost of
821
               the two-way algorithm will probably be worth it. */
822
0
            return STRINGLIB(adaptive_find)(s, n, p, m, maxcount, mode);
823
0
        }
824
27.9M
    }
825
0
    else {
826
        /* FAST_RSEARCH */
827
0
        return STRINGLIB(default_rfind)(s, n, p, m, maxcount, mode);
828
0
    }
829
27.9M
}
unicodeobject.c:ucs2lib_fastsearch
Line
Count
Source
776
11.4M
{
777
11.4M
    if (n < m || (mode == FAST_COUNT && maxcount == 0)) {
778
775
        return -1;
779
775
    }
780
781
    /* look for special cases */
782
11.4M
    if (m <= 1) {
783
8.17M
        if (m <= 0) {
784
0
            return -1;
785
0
        }
786
        /* use special case for 1-character strings */
787
8.17M
        if (mode == FAST_SEARCH)
788
3.82M
            return STRINGLIB(find_char)(s, n, p[0]);
789
4.35M
        else if (mode == FAST_RSEARCH)
790
0
            return STRINGLIB(rfind_char)(s, n, p[0]);
791
4.35M
        else {
792
4.35M
            if (maxcount == PY_SSIZE_T_MAX) {
793
4.35M
                return STRINGLIB(count_char_no_maxcount)(s, n, p[0]);
794
4.35M
            }
795
0
            return STRINGLIB(count_char)(s, n, p[0], maxcount);
796
4.35M
        }
797
8.17M
    }
798
799
3.28M
    if (mode != FAST_RSEARCH) {
800
3.28M
        if (n < 2500 || (m < 100 && n < 30000) || m < 6) {
801
3.28M
            return STRINGLIB(default_find)(s, n, p, m, maxcount, mode);
802
3.28M
        }
803
0
        else if ((m >> 2) * 3 < (n >> 2)) {
804
            /* 33% threshold, but don't overflow. */
805
            /* For larger problems where the needle isn't a huge
806
               percentage of the size of the haystack, the relatively
807
               expensive O(m) startup cost of the two-way algorithm
808
               will surely pay off. */
809
0
            if (mode == FAST_SEARCH) {
810
0
                return STRINGLIB(_two_way_find)(s, n, p, m);
811
0
            }
812
0
            else {
813
0
                return STRINGLIB(_two_way_count)(s, n, p, m, maxcount);
814
0
            }
815
0
        }
816
0
        else {
817
            /* To ensure that we have good worst-case behavior,
818
               here's an adaptive version of the algorithm, where if
819
               we match O(m) characters without any matches of the
820
               entire needle, then we predict that the startup cost of
821
               the two-way algorithm will probably be worth it. */
822
0
            return STRINGLIB(adaptive_find)(s, n, p, m, maxcount, mode);
823
0
        }
824
3.28M
    }
825
0
    else {
826
        /* FAST_RSEARCH */
827
0
        return STRINGLIB(default_rfind)(s, n, p, m, maxcount, mode);
828
0
    }
829
3.28M
}
unicodeobject.c:ucs4lib_fastsearch
Line
Count
Source
776
10.5M
{
777
10.5M
    if (n < m || (mode == FAST_COUNT && maxcount == 0)) {
778
0
        return -1;
779
0
    }
780
781
    /* look for special cases */
782
10.5M
    if (m <= 1) {
783
6.05M
        if (m <= 0) {
784
0
            return -1;
785
0
        }
786
        /* use special case for 1-character strings */
787
6.05M
        if (mode == FAST_SEARCH)
788
3.63M
            return STRINGLIB(find_char)(s, n, p[0]);
789
2.42M
        else if (mode == FAST_RSEARCH)
790
0
            return STRINGLIB(rfind_char)(s, n, p[0]);
791
2.42M
        else {
792
2.42M
            if (maxcount == PY_SSIZE_T_MAX) {
793
2.42M
                return STRINGLIB(count_char_no_maxcount)(s, n, p[0]);
794
2.42M
            }
795
0
            return STRINGLIB(count_char)(s, n, p[0], maxcount);
796
2.42M
        }
797
6.05M
    }
798
799
4.46M
    if (mode != FAST_RSEARCH) {
800
4.46M
        if (n < 2500 || (m < 100 && n < 30000) || m < 6) {
801
4.46M
            return STRINGLIB(default_find)(s, n, p, m, maxcount, mode);
802
4.46M
        }
803
0
        else if ((m >> 2) * 3 < (n >> 2)) {
804
            /* 33% threshold, but don't overflow. */
805
            /* For larger problems where the needle isn't a huge
806
               percentage of the size of the haystack, the relatively
807
               expensive O(m) startup cost of the two-way algorithm
808
               will surely pay off. */
809
0
            if (mode == FAST_SEARCH) {
810
0
                return STRINGLIB(_two_way_find)(s, n, p, m);
811
0
            }
812
0
            else {
813
0
                return STRINGLIB(_two_way_count)(s, n, p, m, maxcount);
814
0
            }
815
0
        }
816
0
        else {
817
            /* To ensure that we have good worst-case behavior,
818
               here's an adaptive version of the algorithm, where if
819
               we match O(m) characters without any matches of the
820
               entire needle, then we predict that the startup cost of
821
               the two-way algorithm will probably be worth it. */
822
0
            return STRINGLIB(adaptive_find)(s, n, p, m, maxcount, mode);
823
0
        }
824
4.46M
    }
825
0
    else {
826
        /* FAST_RSEARCH */
827
0
        return STRINGLIB(default_rfind)(s, n, p, m, maxcount, mode);
828
0
    }
829
4.46M
}
bytes_methods.c:fastsearch
Line
Count
Source
776
6.80M
{
777
6.80M
    if (n < m || (mode == FAST_COUNT && maxcount == 0)) {
778
14
        return -1;
779
14
    }
780
781
    /* look for special cases */
782
6.80M
    if (m <= 1) {
783
6.79M
        if (m <= 0) {
784
0
            return -1;
785
0
        }
786
        /* use special case for 1-character strings */
787
6.79M
        if (mode == FAST_SEARCH)
788
0
            return STRINGLIB(find_char)(s, n, p[0]);
789
6.79M
        else if (mode == FAST_RSEARCH)
790
0
            return STRINGLIB(rfind_char)(s, n, p[0]);
791
6.79M
        else {
792
6.79M
            if (maxcount == PY_SSIZE_T_MAX) {
793
6.79M
                return STRINGLIB(count_char_no_maxcount)(s, n, p[0]);
794
6.79M
            }
795
0
            return STRINGLIB(count_char)(s, n, p[0], maxcount);
796
6.79M
        }
797
6.79M
    }
798
799
11.1k
    if (mode != FAST_RSEARCH) {
800
2.93k
        if (n < 2500 || (m < 100 && n < 30000) || m < 6) {
801
2.93k
            return STRINGLIB(default_find)(s, n, p, m, maxcount, mode);
802
2.93k
        }
803
0
        else if ((m >> 2) * 3 < (n >> 2)) {
804
            /* 33% threshold, but don't overflow. */
805
            /* For larger problems where the needle isn't a huge
806
               percentage of the size of the haystack, the relatively
807
               expensive O(m) startup cost of the two-way algorithm
808
               will surely pay off. */
809
0
            if (mode == FAST_SEARCH) {
810
0
                return STRINGLIB(_two_way_find)(s, n, p, m);
811
0
            }
812
0
            else {
813
0
                return STRINGLIB(_two_way_count)(s, n, p, m, maxcount);
814
0
            }
815
0
        }
816
0
        else {
817
            /* To ensure that we have good worst-case behavior,
818
               here's an adaptive version of the algorithm, where if
819
               we match O(m) characters without any matches of the
820
               entire needle, then we predict that the startup cost of
821
               the two-way algorithm will probably be worth it. */
822
0
            return STRINGLIB(adaptive_find)(s, n, p, m, maxcount, mode);
823
0
        }
824
2.93k
    }
825
8.18k
    else {
826
        /* FAST_RSEARCH */
827
8.18k
        return STRINGLIB(default_rfind)(s, n, p, m, maxcount, mode);
828
8.18k
    }
829
11.1k
}
Unexecuted instantiation: bytearrayobject.c:fastsearch
830