Coverage Report

Created: 2025-11-09 06:26

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/cpython/Objects/stringlib/fastsearch.h
Line
Count
Source
1
/* stringlib: fastsearch implementation */
2
3
#define STRINGLIB_FASTSEARCH_H
4
5
/* fast search/count implementation, based on a mix between boyer-
6
   moore and horspool, with a few more bells and whistles on the top.
7
   for some more background, see:
8
   https://web.archive.org/web/20201107074620/http://effbot.org/zone/stringlib.htm */
9
10
/* note: fastsearch may access s[n], which isn't a problem when using
11
   Python's ordinary string types, but may cause problems if you're
12
   using this code in other contexts.  also, the count mode returns -1
13
   if there cannot possibly be a match in the target string, and 0 if
14
   it has actually checked for matches, but didn't find any.  callers
15
   beware! */
16
17
/* If the strings are long enough, use Crochemore and Perrin's Two-Way
18
   algorithm, which has worst-case O(n) runtime and best-case O(n/k).
19
   Also compute a table of shifts to achieve O(n/k) in more cases,
20
   and often (data dependent) deduce larger shifts than pure C&P can
21
   deduce. See stringlib_find_two_way_notes.txt in this folder for a
22
   detailed explanation. */
23
24
613M
#define FAST_COUNT 0
25
423M
#define FAST_SEARCH 1
26
84.7M
#define FAST_RSEARCH 2
27
28
#if LONG_BIT >= 128
29
#define STRINGLIB_BLOOM_WIDTH 128
30
#elif LONG_BIT >= 64
31
3.63G
#define STRINGLIB_BLOOM_WIDTH 64
32
#elif LONG_BIT >= 32
33
#define STRINGLIB_BLOOM_WIDTH 32
34
#else
35
#error "LONG_BIT is smaller than 32"
36
#endif
37
38
#define STRINGLIB_BLOOM_ADD(mask, ch) \
39
32.4M
    ((mask |= (1UL << ((ch) & (STRINGLIB_BLOOM_WIDTH -1)))))
40
#define STRINGLIB_BLOOM(mask, ch)     \
41
3.60G
    ((mask &  (1UL << ((ch) & (STRINGLIB_BLOOM_WIDTH -1)))))
42
43
#ifdef STRINGLIB_FAST_MEMCHR
44
207M
#  define MEMCHR_CUT_OFF 15
45
#else
46
81.7M
#  define MEMCHR_CUT_OFF 40
47
#endif
48
49
Py_LOCAL_INLINE(Py_ssize_t)
50
STRINGLIB(find_char)(const STRINGLIB_CHAR* s, Py_ssize_t n, STRINGLIB_CHAR ch)
51
289M
{
52
289M
    const STRINGLIB_CHAR *p, *e;
53
54
289M
    p = s;
55
289M
    e = s + n;
56
289M
    if (n > MEMCHR_CUT_OFF) {
57
#ifdef STRINGLIB_FAST_MEMCHR
58
114M
        p = STRINGLIB_FAST_MEMCHR(s, ch, n);
59
114M
        if (p != NULL)
60
111M
            return (p - s);
61
2.41M
        return -1;
62
#else
63
        /* use memchr if we can choose a needle without too many likely
64
           false positives */
65
        const STRINGLIB_CHAR *s1, *e1;
66
        unsigned char needle = ch & 0xff;
67
        /* If looking for a multiple of 256, we'd have too
68
           many false positives looking for the '\0' byte in UCS2
69
           and UCS4 representations. */
70
70.2M
        if (needle != 0) {
71
69.8M
            do {
72
69.8M
                void *candidate = memchr(p, needle,
73
69.8M
                                         (e - p) * sizeof(STRINGLIB_CHAR));
74
69.8M
                if (candidate == NULL)
75
498k
                    return -1;
76
69.3M
                s1 = p;
77
69.3M
                p = (const STRINGLIB_CHAR *)
78
69.3M
                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
79
69.3M
                if (*p == ch)
80
69.2M
                    return (p - s);
81
                /* False positive */
82
84.5k
                p++;
83
84.5k
                if (p - s1 > MEMCHR_CUT_OFF)
84
38.2k
                    continue;
85
46.3k
                if (e - p <= MEMCHR_CUT_OFF)
86
4.44k
                    break;
87
41.8k
                e1 = p + MEMCHR_CUT_OFF;
88
1.22M
                while (p != e1) {
89
1.20M
                    if (*p == ch)
90
18.4k
                        return (p - s);
91
1.18M
                    p++;
92
1.18M
                }
93
41.8k
            }
94
69.7M
            while (e - p > MEMCHR_CUT_OFF);
95
69.7M
        }
96
#endif
97
184M
    }
98
424M
    while (p < e) {
99
336M
        if (*p == ch)
100
17.2M
            return (p - s);
101
319M
        p++;
102
319M
    }
103
87.9M
    return -1;
104
105M
}
Unexecuted instantiation: bytesobject.c:stringlib_find_char
unicodeobject.c:ucs1lib_find_char
Line
Count
Source
51
112M
{
52
112M
    const STRINGLIB_CHAR *p, *e;
53
54
112M
    p = s;
55
112M
    e = s + n;
56
112M
    if (n > MEMCHR_CUT_OFF) {
57
24.5M
#ifdef STRINGLIB_FAST_MEMCHR
58
24.5M
        p = STRINGLIB_FAST_MEMCHR(s, ch, n);
59
24.5M
        if (p != NULL)
60
23.0M
            return (p - s);
61
1.48M
        return -1;
62
#else
63
        /* use memchr if we can choose a needle without too many likely
64
           false positives */
65
        const STRINGLIB_CHAR *s1, *e1;
66
        unsigned char needle = ch & 0xff;
67
        /* If looking for a multiple of 256, we'd have too
68
           many false positives looking for the '\0' byte in UCS2
69
           and UCS4 representations. */
70
        if (needle != 0) {
71
            do {
72
                void *candidate = memchr(p, needle,
73
                                         (e - p) * sizeof(STRINGLIB_CHAR));
74
                if (candidate == NULL)
75
                    return -1;
76
                s1 = p;
77
                p = (const STRINGLIB_CHAR *)
78
                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
79
                if (*p == ch)
80
                    return (p - s);
81
                /* False positive */
82
                p++;
83
                if (p - s1 > MEMCHR_CUT_OFF)
84
                    continue;
85
                if (e - p <= MEMCHR_CUT_OFF)
86
                    break;
87
                e1 = p + MEMCHR_CUT_OFF;
88
                while (p != e1) {
89
                    if (*p == ch)
90
                        return (p - s);
91
                    p++;
92
                }
93
            }
94
            while (e - p > MEMCHR_CUT_OFF);
95
        }
96
#endif
97
24.5M
    }
98
248M
    while (p < e) {
99
164M
        if (*p == ch)
100
4.23M
            return (p - s);
101
160M
        p++;
102
160M
    }
103
84.2M
    return -1;
104
88.4M
}
unicodeobject.c:ucs2lib_find_char
Line
Count
Source
51
81.4M
{
52
81.4M
    const STRINGLIB_CHAR *p, *e;
53
54
81.4M
    p = s;
55
81.4M
    e = s + n;
56
81.4M
    if (n > MEMCHR_CUT_OFF) {
57
#ifdef STRINGLIB_FAST_MEMCHR
58
        p = STRINGLIB_FAST_MEMCHR(s, ch, n);
59
        if (p != NULL)
60
            return (p - s);
61
        return -1;
62
#else
63
        /* use memchr if we can choose a needle without too many likely
64
           false positives */
65
70.2M
        const STRINGLIB_CHAR *s1, *e1;
66
70.2M
        unsigned char needle = ch & 0xff;
67
        /* If looking for a multiple of 256, we'd have too
68
           many false positives looking for the '\0' byte in UCS2
69
           and UCS4 representations. */
70
70.2M
        if (needle != 0) {
71
69.8M
            do {
72
69.8M
                void *candidate = memchr(p, needle,
73
69.8M
                                         (e - p) * sizeof(STRINGLIB_CHAR));
74
69.8M
                if (candidate == NULL)
75
498k
                    return -1;
76
69.3M
                s1 = p;
77
69.3M
                p = (const STRINGLIB_CHAR *)
78
69.3M
                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
79
69.3M
                if (*p == ch)
80
69.2M
                    return (p - s);
81
                /* False positive */
82
84.5k
                p++;
83
84.5k
                if (p - s1 > MEMCHR_CUT_OFF)
84
38.2k
                    continue;
85
46.3k
                if (e - p <= MEMCHR_CUT_OFF)
86
4.44k
                    break;
87
41.8k
                e1 = p + MEMCHR_CUT_OFF;
88
1.22M
                while (p != e1) {
89
1.20M
                    if (*p == ch)
90
18.4k
                        return (p - s);
91
1.18M
                    p++;
92
1.18M
                }
93
41.8k
            }
94
69.7M
            while (e - p > MEMCHR_CUT_OFF);
95
69.7M
        }
96
70.2M
#endif
97
70.2M
    }
98
161M
    while (p < e) {
99
157M
        if (*p == ch)
100
8.09M
            return (p - s);
101
149M
        p++;
102
149M
    }
103
3.61M
    return -1;
104
11.7M
}
unicodeobject.c:ucs4lib_find_char
Line
Count
Source
51
64.6M
{
52
64.6M
    const STRINGLIB_CHAR *p, *e;
53
54
64.6M
    p = s;
55
64.6M
    e = s + n;
56
64.6M
    if (n > MEMCHR_CUT_OFF) {
57
64.5M
#ifdef STRINGLIB_FAST_MEMCHR
58
64.5M
        p = STRINGLIB_FAST_MEMCHR(s, ch, n);
59
64.5M
        if (p != NULL)
60
64.5M
            return (p - s);
61
36.2k
        return -1;
62
#else
63
        /* use memchr if we can choose a needle without too many likely
64
           false positives */
65
        const STRINGLIB_CHAR *s1, *e1;
66
        unsigned char needle = ch & 0xff;
67
        /* If looking for a multiple of 256, we'd have too
68
           many false positives looking for the '\0' byte in UCS2
69
           and UCS4 representations. */
70
        if (needle != 0) {
71
            do {
72
                void *candidate = memchr(p, needle,
73
                                         (e - p) * sizeof(STRINGLIB_CHAR));
74
                if (candidate == NULL)
75
                    return -1;
76
                s1 = p;
77
                p = (const STRINGLIB_CHAR *)
78
                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
79
                if (*p == ch)
80
                    return (p - s);
81
                /* False positive */
82
                p++;
83
                if (p - s1 > MEMCHR_CUT_OFF)
84
                    continue;
85
                if (e - p <= MEMCHR_CUT_OFF)
86
                    break;
87
                e1 = p + MEMCHR_CUT_OFF;
88
                while (p != e1) {
89
                    if (*p == ch)
90
                        return (p - s);
91
                    p++;
92
                }
93
            }
94
            while (e - p > MEMCHR_CUT_OFF);
95
        }
96
#endif
97
64.5M
    }
98
477k
    while (p < e) {
99
388k
        if (*p == ch)
100
43.4k
            return (p - s);
101
345k
        p++;
102
345k
    }
103
89.0k
    return -1;
104
132k
}
unicodeobject.c:asciilib_find_char
Line
Count
Source
51
30.1M
{
52
30.1M
    const STRINGLIB_CHAR *p, *e;
53
54
30.1M
    p = s;
55
30.1M
    e = s + n;
56
30.1M
    if (n > MEMCHR_CUT_OFF) {
57
25.1M
#ifdef STRINGLIB_FAST_MEMCHR
58
25.1M
        p = STRINGLIB_FAST_MEMCHR(s, ch, n);
59
25.1M
        if (p != NULL)
60
24.2M
            return (p - s);
61
897k
        return -1;
62
#else
63
        /* use memchr if we can choose a needle without too many likely
64
           false positives */
65
        const STRINGLIB_CHAR *s1, *e1;
66
        unsigned char needle = ch & 0xff;
67
        /* If looking for a multiple of 256, we'd have too
68
           many false positives looking for the '\0' byte in UCS2
69
           and UCS4 representations. */
70
        if (needle != 0) {
71
            do {
72
                void *candidate = memchr(p, needle,
73
                                         (e - p) * sizeof(STRINGLIB_CHAR));
74
                if (candidate == NULL)
75
                    return -1;
76
                s1 = p;
77
                p = (const STRINGLIB_CHAR *)
78
                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
79
                if (*p == ch)
80
                    return (p - s);
81
                /* False positive */
82
                p++;
83
                if (p - s1 > MEMCHR_CUT_OFF)
84
                    continue;
85
                if (e - p <= MEMCHR_CUT_OFF)
86
                    break;
87
                e1 = p + MEMCHR_CUT_OFF;
88
                while (p != e1) {
89
                    if (*p == ch)
90
                        return (p - s);
91
                    p++;
92
                }
93
            }
94
            while (e - p > MEMCHR_CUT_OFF);
95
        }
96
#endif
97
25.1M
    }
98
13.5M
    while (p < e) {
99
13.5M
        if (*p == ch)
100
4.87M
            return (p - s);
101
8.64M
        p++;
102
8.64M
    }
103
60.2k
    return -1;
104
4.93M
}
bytes_methods.c:stringlib_find_char
Line
Count
Source
51
1.73k
{
52
1.73k
    const STRINGLIB_CHAR *p, *e;
53
54
1.73k
    p = s;
55
1.73k
    e = s + n;
56
1.73k
    if (n > MEMCHR_CUT_OFF) {
57
1.73k
#ifdef STRINGLIB_FAST_MEMCHR
58
1.73k
        p = STRINGLIB_FAST_MEMCHR(s, ch, n);
59
1.73k
        if (p != NULL)
60
1.47k
            return (p - s);
61
258
        return -1;
62
#else
63
        /* use memchr if we can choose a needle without too many likely
64
           false positives */
65
        const STRINGLIB_CHAR *s1, *e1;
66
        unsigned char needle = ch & 0xff;
67
        /* If looking for a multiple of 256, we'd have too
68
           many false positives looking for the '\0' byte in UCS2
69
           and UCS4 representations. */
70
        if (needle != 0) {
71
            do {
72
                void *candidate = memchr(p, needle,
73
                                         (e - p) * sizeof(STRINGLIB_CHAR));
74
                if (candidate == NULL)
75
                    return -1;
76
                s1 = p;
77
                p = (const STRINGLIB_CHAR *)
78
                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
79
                if (*p == ch)
80
                    return (p - s);
81
                /* False positive */
82
                p++;
83
                if (p - s1 > MEMCHR_CUT_OFF)
84
                    continue;
85
                if (e - p <= MEMCHR_CUT_OFF)
86
                    break;
87
                e1 = p + MEMCHR_CUT_OFF;
88
                while (p != e1) {
89
                    if (*p == ch)
90
                        return (p - s);
91
                    p++;
92
                }
93
            }
94
            while (e - p > MEMCHR_CUT_OFF);
95
        }
96
#endif
97
1.73k
    }
98
0
    while (p < e) {
99
0
        if (*p == ch)
100
0
            return (p - s);
101
0
        p++;
102
0
    }
103
0
    return -1;
104
0
}
Unexecuted instantiation: bytearrayobject.c:stringlib_find_char
105
106
#undef MEMCHR_CUT_OFF
107
108
#if STRINGLIB_SIZEOF_CHAR == 1
109
40.0k
#  define MEMRCHR_CUT_OFF 15
110
#else
111
165k
#  define MEMRCHR_CUT_OFF 40
112
#endif
113
114
115
Py_LOCAL_INLINE(Py_ssize_t)
116
STRINGLIB(rfind_char)(const STRINGLIB_CHAR* s, Py_ssize_t n, STRINGLIB_CHAR ch)
117
176k
{
118
176k
    const STRINGLIB_CHAR *p;
119
176k
#ifdef HAVE_MEMRCHR
120
    /* memrchr() is a GNU extension, available since glibc 2.1.91.  it
121
       doesn't seem as optimized as memchr(), but is still quite
122
       faster than our hand-written loop below. There is no wmemrchr
123
       for 4-byte chars. */
124
125
176k
    if (n > MEMRCHR_CUT_OFF) {
126
#if STRINGLIB_SIZEOF_CHAR == 1
127
        p = memrchr(s, ch, n);
128
11.3k
        if (p != NULL)
129
7.89k
            return (p - s);
130
3.43k
        return -1;
131
#else
132
        /* use memrchr if we can choose a needle without too many likely
133
           false positives */
134
        const STRINGLIB_CHAR *s1;
135
        Py_ssize_t n1;
136
        unsigned char needle = ch & 0xff;
137
        /* If looking for a multiple of 256, we'd have too
138
           many false positives looking for the '\0' byte in UCS2
139
           and UCS4 representations. */
140
81.0k
        if (needle != 0) {
141
88.2k
            do {
142
88.2k
                void *candidate = memrchr(s, needle,
143
88.2k
                                          n * sizeof(STRINGLIB_CHAR));
144
88.2k
                if (candidate == NULL)
145
721
                    return -1;
146
87.4k
                n1 = n;
147
87.4k
                p = (const STRINGLIB_CHAR *)
148
87.4k
                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
149
87.4k
                n = p - s;
150
87.4k
                if (*p == ch)
151
77.5k
                    return n;
152
                /* False positive */
153
9.91k
                if (n1 - n > MEMRCHR_CUT_OFF)
154
3.65k
                    continue;
155
6.26k
                if (n <= MEMRCHR_CUT_OFF)
156
1.29k
                    break;
157
4.97k
                s1 = p - MEMRCHR_CUT_OFF;
158
188k
                while (p > s1) {
159
183k
                    p--;
160
183k
                    if (*p == ch)
161
595
                        return (p - s);
162
183k
                }
163
4.37k
                n = p - s;
164
4.37k
            }
165
81.0k
            while (n > MEMRCHR_CUT_OFF);
166
81.0k
        }
167
#endif
168
92.3k
    }
169
86.3k
#endif  /* HAVE_MEMRCHR */
170
86.3k
    p = s + n;
171
605k
    while (p > s) {
172
586k
        p--;
173
586k
        if (*p == ch)
174
67.5k
            return (p - s);
175
586k
    }
176
18.8k
    return -1;
177
86.3k
}
Unexecuted instantiation: bytesobject.c:stringlib_rfind_char
unicodeobject.c:ucs1lib_rfind_char
Line
Count
Source
117
7.83k
{
118
7.83k
    const STRINGLIB_CHAR *p;
119
7.83k
#ifdef HAVE_MEMRCHR
120
    /* memrchr() is a GNU extension, available since glibc 2.1.91.  it
121
       doesn't seem as optimized as memchr(), but is still quite
122
       faster than our hand-written loop below. There is no wmemrchr
123
       for 4-byte chars. */
124
125
7.83k
    if (n > MEMRCHR_CUT_OFF) {
126
4.65k
#if STRINGLIB_SIZEOF_CHAR == 1
127
4.65k
        p = memrchr(s, ch, n);
128
4.65k
        if (p != NULL)
129
3.62k
            return (p - s);
130
1.02k
        return -1;
131
#else
132
        /* use memrchr if we can choose a needle without too many likely
133
           false positives */
134
        const STRINGLIB_CHAR *s1;
135
        Py_ssize_t n1;
136
        unsigned char needle = ch & 0xff;
137
        /* If looking for a multiple of 256, we'd have too
138
           many false positives looking for the '\0' byte in UCS2
139
           and UCS4 representations. */
140
        if (needle != 0) {
141
            do {
142
                void *candidate = memrchr(s, needle,
143
                                          n * sizeof(STRINGLIB_CHAR));
144
                if (candidate == NULL)
145
                    return -1;
146
                n1 = n;
147
                p = (const STRINGLIB_CHAR *)
148
                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
149
                n = p - s;
150
                if (*p == ch)
151
                    return n;
152
                /* False positive */
153
                if (n1 - n > MEMRCHR_CUT_OFF)
154
                    continue;
155
                if (n <= MEMRCHR_CUT_OFF)
156
                    break;
157
                s1 = p - MEMRCHR_CUT_OFF;
158
                while (p > s1) {
159
                    p--;
160
                    if (*p == ch)
161
                        return (p - s);
162
                }
163
                n = p - s;
164
            }
165
            while (n > MEMRCHR_CUT_OFF);
166
        }
167
#endif
168
4.65k
    }
169
3.18k
#endif  /* HAVE_MEMRCHR */
170
3.18k
    p = s + n;
171
9.67k
    while (p > s) {
172
8.87k
        p--;
173
8.87k
        if (*p == ch)
174
2.38k
            return (p - s);
175
8.87k
    }
176
801
    return -1;
177
3.18k
}
unicodeobject.c:ucs2lib_rfind_char
Line
Count
Source
117
25.5k
{
118
25.5k
    const STRINGLIB_CHAR *p;
119
25.5k
#ifdef HAVE_MEMRCHR
120
    /* memrchr() is a GNU extension, available since glibc 2.1.91.  it
121
       doesn't seem as optimized as memchr(), but is still quite
122
       faster than our hand-written loop below. There is no wmemrchr
123
       for 4-byte chars. */
124
125
25.5k
    if (n > MEMRCHR_CUT_OFF) {
126
#if STRINGLIB_SIZEOF_CHAR == 1
127
        p = memrchr(s, ch, n);
128
        if (p != NULL)
129
            return (p - s);
130
        return -1;
131
#else
132
        /* use memrchr if we can choose a needle without too many likely
133
           false positives */
134
17.2k
        const STRINGLIB_CHAR *s1;
135
17.2k
        Py_ssize_t n1;
136
17.2k
        unsigned char needle = ch & 0xff;
137
        /* If looking for a multiple of 256, we'd have too
138
           many false positives looking for the '\0' byte in UCS2
139
           and UCS4 representations. */
140
17.2k
        if (needle != 0) {
141
19.3k
            do {
142
19.3k
                void *candidate = memrchr(s, needle,
143
19.3k
                                          n * sizeof(STRINGLIB_CHAR));
144
19.3k
                if (candidate == NULL)
145
423
                    return -1;
146
18.8k
                n1 = n;
147
18.8k
                p = (const STRINGLIB_CHAR *)
148
18.8k
                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
149
18.8k
                n = p - s;
150
18.8k
                if (*p == ch)
151
15.7k
                    return n;
152
                /* False positive */
153
3.16k
                if (n1 - n > MEMRCHR_CUT_OFF)
154
1.23k
                    continue;
155
1.93k
                if (n <= MEMRCHR_CUT_OFF)
156
563
                    break;
157
1.36k
                s1 = p - MEMRCHR_CUT_OFF;
158
49.8k
                while (p > s1) {
159
48.6k
                    p--;
160
48.6k
                    if (*p == ch)
161
241
                        return (p - s);
162
48.6k
                }
163
1.12k
                n = p - s;
164
1.12k
            }
165
17.2k
            while (n > MEMRCHR_CUT_OFF);
166
17.2k
        }
167
17.2k
#endif
168
17.2k
    }
169
9.19k
#endif  /* HAVE_MEMRCHR */
170
9.19k
    p = s + n;
171
72.1k
    while (p > s) {
172
70.4k
        p--;
173
70.4k
        if (*p == ch)
174
7.50k
            return (p - s);
175
70.4k
    }
176
1.68k
    return -1;
177
9.19k
}
unicodeobject.c:ucs4lib_rfind_char
Line
Count
Source
117
110k
{
118
110k
    const STRINGLIB_CHAR *p;
119
110k
#ifdef HAVE_MEMRCHR
120
    /* memrchr() is a GNU extension, available since glibc 2.1.91.  it
121
       doesn't seem as optimized as memchr(), but is still quite
122
       faster than our hand-written loop below. There is no wmemrchr
123
       for 4-byte chars. */
124
125
110k
    if (n > MEMRCHR_CUT_OFF) {
126
#if STRINGLIB_SIZEOF_CHAR == 1
127
        p = memrchr(s, ch, n);
128
        if (p != NULL)
129
            return (p - s);
130
        return -1;
131
#else
132
        /* use memrchr if we can choose a needle without too many likely
133
           false positives */
134
63.7k
        const STRINGLIB_CHAR *s1;
135
63.7k
        Py_ssize_t n1;
136
63.7k
        unsigned char needle = ch & 0xff;
137
        /* If looking for a multiple of 256, we'd have too
138
           many false positives looking for the '\0' byte in UCS2
139
           and UCS4 representations. */
140
63.7k
        if (needle != 0) {
141
68.9k
            do {
142
68.9k
                void *candidate = memrchr(s, needle,
143
68.9k
                                          n * sizeof(STRINGLIB_CHAR));
144
68.9k
                if (candidate == NULL)
145
298
                    return -1;
146
68.6k
                n1 = n;
147
68.6k
                p = (const STRINGLIB_CHAR *)
148
68.6k
                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
149
68.6k
                n = p - s;
150
68.6k
                if (*p == ch)
151
61.8k
                    return n;
152
                /* False positive */
153
6.75k
                if (n1 - n > MEMRCHR_CUT_OFF)
154
2.42k
                    continue;
155
4.32k
                if (n <= MEMRCHR_CUT_OFF)
156
727
                    break;
157
3.60k
                s1 = p - MEMRCHR_CUT_OFF;
158
138k
                while (p > s1) {
159
135k
                    p--;
160
135k
                    if (*p == ch)
161
354
                        return (p - s);
162
135k
                }
163
3.24k
                n = p - s;
164
3.24k
            }
165
63.7k
            while (n > MEMRCHR_CUT_OFF);
166
63.7k
        }
167
63.7k
#endif
168
63.7k
    }
169
48.3k
#endif  /* HAVE_MEMRCHR */
170
48.3k
    p = s + n;
171
391k
    while (p > s) {
172
390k
        p--;
173
390k
        if (*p == ch)
174
47.0k
            return (p - s);
175
390k
    }
176
1.34k
    return -1;
177
48.3k
}
unicodeobject.c:asciilib_rfind_char
Line
Count
Source
117
11.1k
{
118
11.1k
    const STRINGLIB_CHAR *p;
119
11.1k
#ifdef HAVE_MEMRCHR
120
    /* memrchr() is a GNU extension, available since glibc 2.1.91.  it
121
       doesn't seem as optimized as memchr(), but is still quite
122
       faster than our hand-written loop below. There is no wmemrchr
123
       for 4-byte chars. */
124
125
11.1k
    if (n > MEMRCHR_CUT_OFF) {
126
3.51k
#if STRINGLIB_SIZEOF_CHAR == 1
127
3.51k
        p = memrchr(s, ch, n);
128
3.51k
        if (p != NULL)
129
3.43k
            return (p - s);
130
86
        return -1;
131
#else
132
        /* use memrchr if we can choose a needle without too many likely
133
           false positives */
134
        const STRINGLIB_CHAR *s1;
135
        Py_ssize_t n1;
136
        unsigned char needle = ch & 0xff;
137
        /* If looking for a multiple of 256, we'd have too
138
           many false positives looking for the '\0' byte in UCS2
139
           and UCS4 representations. */
140
        if (needle != 0) {
141
            do {
142
                void *candidate = memrchr(s, needle,
143
                                          n * sizeof(STRINGLIB_CHAR));
144
                if (candidate == NULL)
145
                    return -1;
146
                n1 = n;
147
                p = (const STRINGLIB_CHAR *)
148
                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
149
                n = p - s;
150
                if (*p == ch)
151
                    return n;
152
                /* False positive */
153
                if (n1 - n > MEMRCHR_CUT_OFF)
154
                    continue;
155
                if (n <= MEMRCHR_CUT_OFF)
156
                    break;
157
                s1 = p - MEMRCHR_CUT_OFF;
158
                while (p > s1) {
159
                    p--;
160
                    if (*p == ch)
161
                        return (p - s);
162
                }
163
                n = p - s;
164
            }
165
            while (n > MEMRCHR_CUT_OFF);
166
        }
167
#endif
168
3.51k
    }
169
7.61k
#endif  /* HAVE_MEMRCHR */
170
7.61k
    p = s + n;
171
39.8k
    while (p > s) {
172
37.7k
        p--;
173
37.7k
        if (*p == ch)
174
5.49k
            return (p - s);
175
37.7k
    }
176
2.12k
    return -1;
177
7.61k
}
bytes_methods.c:stringlib_rfind_char
Line
Count
Source
117
21.1k
{
118
21.1k
    const STRINGLIB_CHAR *p;
119
21.1k
#ifdef HAVE_MEMRCHR
120
    /* memrchr() is a GNU extension, available since glibc 2.1.91.  it
121
       doesn't seem as optimized as memchr(), but is still quite
122
       faster than our hand-written loop below. There is no wmemrchr
123
       for 4-byte chars. */
124
125
21.1k
    if (n > MEMRCHR_CUT_OFF) {
126
3.15k
#if STRINGLIB_SIZEOF_CHAR == 1
127
3.15k
        p = memrchr(s, ch, n);
128
3.15k
        if (p != NULL)
129
839
            return (p - s);
130
2.31k
        return -1;
131
#else
132
        /* use memrchr if we can choose a needle without too many likely
133
           false positives */
134
        const STRINGLIB_CHAR *s1;
135
        Py_ssize_t n1;
136
        unsigned char needle = ch & 0xff;
137
        /* If looking for a multiple of 256, we'd have too
138
           many false positives looking for the '\0' byte in UCS2
139
           and UCS4 representations. */
140
        if (needle != 0) {
141
            do {
142
                void *candidate = memrchr(s, needle,
143
                                          n * sizeof(STRINGLIB_CHAR));
144
                if (candidate == NULL)
145
                    return -1;
146
                n1 = n;
147
                p = (const STRINGLIB_CHAR *)
148
                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
149
                n = p - s;
150
                if (*p == ch)
151
                    return n;
152
                /* False positive */
153
                if (n1 - n > MEMRCHR_CUT_OFF)
154
                    continue;
155
                if (n <= MEMRCHR_CUT_OFF)
156
                    break;
157
                s1 = p - MEMRCHR_CUT_OFF;
158
                while (p > s1) {
159
                    p--;
160
                    if (*p == ch)
161
                        return (p - s);
162
                }
163
                n = p - s;
164
            }
165
            while (n > MEMRCHR_CUT_OFF);
166
        }
167
#endif
168
3.15k
    }
169
17.9k
#endif  /* HAVE_MEMRCHR */
170
17.9k
    p = s + n;
171
92.2k
    while (p > s) {
172
79.4k
        p--;
173
79.4k
        if (*p == ch)
174
5.10k
            return (p - s);
175
79.4k
    }
176
12.8k
    return -1;
177
17.9k
}
Unexecuted instantiation: bytearrayobject.c:stringlib_rfind_char
178
179
#undef MEMRCHR_CUT_OFF
180
181
/* Change to a 1 to see logging comments walk through the algorithm. */
182
#if 0 && STRINGLIB_SIZEOF_CHAR == 1
183
# define LOG(...) printf(__VA_ARGS__)
184
# define LOG_STRING(s, n) printf("\"%.*s\"", (int)(n), s)
185
# define LOG_LINEUP() do {                                         \
186
    LOG("> "); LOG_STRING(haystack, len_haystack); LOG("\n> ");    \
187
    LOG("%*s",(int)(window_last - haystack + 1 - len_needle), ""); \
188
    LOG_STRING(needle, len_needle); LOG("\n");                     \
189
} while(0)
190
#else
191
# define LOG(...)
192
# define LOG_STRING(s, n)
193
# define LOG_LINEUP()
194
#endif
195
196
Py_LOCAL_INLINE(Py_ssize_t)
197
STRINGLIB(_lex_search)(const STRINGLIB_CHAR *needle, Py_ssize_t len_needle,
198
                       Py_ssize_t *return_period, int invert_alphabet)
199
44
{
200
    /* Do a lexicographic search. Essentially this:
201
           >>> max(needle[i:] for i in range(len(needle)+1))
202
       Also find the period of the right half.   */
203
44
    Py_ssize_t max_suffix = 0;
204
44
    Py_ssize_t candidate = 1;
205
44
    Py_ssize_t k = 0;
206
    // The period of the right half.
207
44
    Py_ssize_t period = 1;
208
209
440
    while (candidate + k < len_needle) {
210
        // each loop increases candidate + k + max_suffix
211
396
        STRINGLIB_CHAR a = needle[candidate + k];
212
396
        STRINGLIB_CHAR b = needle[max_suffix + k];
213
        // check if the suffix at candidate is better than max_suffix
214
396
        if (invert_alphabet ? (b < a) : (a < b)) {
215
            // Fell short of max_suffix.
216
            // The next k + 1 characters are non-increasing
217
            // from candidate, so they won't start a maximal suffix.
218
286
            candidate += k + 1;
219
286
            k = 0;
220
            // We've ruled out any period smaller than what's
221
            // been scanned since max_suffix.
222
286
            period = candidate - max_suffix;
223
286
        }
224
110
        else if (a == b) {
225
22
            if (k + 1 != period) {
226
                // Keep scanning the equal strings
227
0
                k++;
228
0
            }
229
22
            else {
230
                // Matched a whole period.
231
                // Start matching the next period.
232
22
                candidate += period;
233
22
                k = 0;
234
22
            }
235
22
        }
236
88
        else {
237
            // Did better than max_suffix, so replace it.
238
88
            max_suffix = candidate;
239
88
            candidate++;
240
88
            k = 0;
241
88
            period = 1;
242
88
        }
243
396
    }
244
44
    *return_period = period;
245
44
    return max_suffix;
246
44
}
Unexecuted instantiation: bytesobject.c:stringlib__lex_search
Unexecuted instantiation: unicodeobject.c:asciilib__lex_search
unicodeobject.c:ucs1lib__lex_search
Line
Count
Source
199
44
{
200
    /* Do a lexicographic search. Essentially this:
201
           >>> max(needle[i:] for i in range(len(needle)+1))
202
       Also find the period of the right half.   */
203
44
    Py_ssize_t max_suffix = 0;
204
44
    Py_ssize_t candidate = 1;
205
44
    Py_ssize_t k = 0;
206
    // The period of the right half.
207
44
    Py_ssize_t period = 1;
208
209
440
    while (candidate + k < len_needle) {
210
        // each loop increases candidate + k + max_suffix
211
396
        STRINGLIB_CHAR a = needle[candidate + k];
212
396
        STRINGLIB_CHAR b = needle[max_suffix + k];
213
        // check if the suffix at candidate is better than max_suffix
214
396
        if (invert_alphabet ? (b < a) : (a < b)) {
215
            // Fell short of max_suffix.
216
            // The next k + 1 characters are non-increasing
217
            // from candidate, so they won't start a maximal suffix.
218
286
            candidate += k + 1;
219
286
            k = 0;
220
            // We've ruled out any period smaller than what's
221
            // been scanned since max_suffix.
222
286
            period = candidate - max_suffix;
223
286
        }
224
110
        else if (a == b) {
225
22
            if (k + 1 != period) {
226
                // Keep scanning the equal strings
227
0
                k++;
228
0
            }
229
22
            else {
230
                // Matched a whole period.
231
                // Start matching the next period.
232
22
                candidate += period;
233
22
                k = 0;
234
22
            }
235
22
        }
236
88
        else {
237
            // Did better than max_suffix, so replace it.
238
88
            max_suffix = candidate;
239
88
            candidate++;
240
88
            k = 0;
241
88
            period = 1;
242
88
        }
243
396
    }
244
44
    *return_period = period;
245
44
    return max_suffix;
246
44
}
Unexecuted instantiation: unicodeobject.c:ucs2lib__lex_search
Unexecuted instantiation: unicodeobject.c:ucs4lib__lex_search
Unexecuted instantiation: bytes_methods.c:stringlib__lex_search
Unexecuted instantiation: bytearrayobject.c:stringlib__lex_search
247
248
Py_LOCAL_INLINE(Py_ssize_t)
249
STRINGLIB(_factorize)(const STRINGLIB_CHAR *needle,
250
                      Py_ssize_t len_needle,
251
                      Py_ssize_t *return_period)
252
22
{
253
    /* Do a "critical factorization", making it so that:
254
       >>> needle = (left := needle[:cut]) + (right := needle[cut:])
255
       where the "local period" of the cut is maximal.
256
257
       The local period of the cut is the minimal length of a string w
258
       such that (left endswith w or w endswith left)
259
       and (right startswith w or w startswith right).
260
261
       The Critical Factorization Theorem says that this maximal local
262
       period is the global period of the string.
263
264
       Crochemore and Perrin (1991) show that this cut can be computed
265
       as the later of two cuts: one that gives a lexicographically
266
       maximal right half, and one that gives the same with the
267
       with respect to a reversed alphabet-ordering.
268
269
       This is what we want to happen:
270
           >>> x = "GCAGAGAG"
271
           >>> cut, period = factorize(x)
272
           >>> x[:cut], (right := x[cut:])
273
           ('GC', 'AGAGAG')
274
           >>> period  # right half period
275
           2
276
           >>> right[period:] == right[:-period]
277
           True
278
279
       This is how the local period lines up in the above example:
280
                GC | AGAGAG
281
           AGAGAGC = AGAGAGC
282
       The length of this minimal repetition is 7, which is indeed the
283
       period of the original string. */
284
285
22
    Py_ssize_t cut1, period1, cut2, period2, cut, period;
286
22
    cut1 = STRINGLIB(_lex_search)(needle, len_needle, &period1, 0);
287
22
    cut2 = STRINGLIB(_lex_search)(needle, len_needle, &period2, 1);
288
289
    // Take the later cut.
290
22
    if (cut1 > cut2) {
291
22
        period = period1;
292
22
        cut = cut1;
293
22
    }
294
0
    else {
295
0
        period = period2;
296
0
        cut = cut2;
297
0
    }
298
299
22
    LOG("split: "); LOG_STRING(needle, cut);
300
22
    LOG(" + "); LOG_STRING(needle + cut, len_needle - cut);
301
22
    LOG("\n");
302
303
22
    *return_period = period;
304
22
    return cut;
305
22
}
Unexecuted instantiation: bytesobject.c:stringlib__factorize
Unexecuted instantiation: unicodeobject.c:asciilib__factorize
unicodeobject.c:ucs1lib__factorize
Line
Count
Source
252
22
{
253
    /* Do a "critical factorization", making it so that:
254
       >>> needle = (left := needle[:cut]) + (right := needle[cut:])
255
       where the "local period" of the cut is maximal.
256
257
       The local period of the cut is the minimal length of a string w
258
       such that (left endswith w or w endswith left)
259
       and (right startswith w or w startswith right).
260
261
       The Critical Factorization Theorem says that this maximal local
262
       period is the global period of the string.
263
264
       Crochemore and Perrin (1991) show that this cut can be computed
265
       as the later of two cuts: one that gives a lexicographically
266
       maximal right half, and one that gives the same with the
267
       with respect to a reversed alphabet-ordering.
268
269
       This is what we want to happen:
270
           >>> x = "GCAGAGAG"
271
           >>> cut, period = factorize(x)
272
           >>> x[:cut], (right := x[cut:])
273
           ('GC', 'AGAGAG')
274
           >>> period  # right half period
275
           2
276
           >>> right[period:] == right[:-period]
277
           True
278
279
       This is how the local period lines up in the above example:
280
                GC | AGAGAG
281
           AGAGAGC = AGAGAGC
282
       The length of this minimal repetition is 7, which is indeed the
283
       period of the original string. */
284
285
22
    Py_ssize_t cut1, period1, cut2, period2, cut, period;
286
22
    cut1 = STRINGLIB(_lex_search)(needle, len_needle, &period1, 0);
287
22
    cut2 = STRINGLIB(_lex_search)(needle, len_needle, &period2, 1);
288
289
    // Take the later cut.
290
22
    if (cut1 > cut2) {
291
22
        period = period1;
292
22
        cut = cut1;
293
22
    }
294
0
    else {
295
0
        period = period2;
296
0
        cut = cut2;
297
0
    }
298
299
22
    LOG("split: "); LOG_STRING(needle, cut);
300
22
    LOG(" + "); LOG_STRING(needle + cut, len_needle - cut);
301
22
    LOG("\n");
302
303
22
    *return_period = period;
304
22
    return cut;
305
22
}
Unexecuted instantiation: unicodeobject.c:ucs2lib__factorize
Unexecuted instantiation: unicodeobject.c:ucs4lib__factorize
Unexecuted instantiation: bytes_methods.c:stringlib__factorize
Unexecuted instantiation: bytearrayobject.c:stringlib__factorize
306
307
308
242
#define SHIFT_TYPE uint8_t
309
#define MAX_SHIFT UINT8_MAX
310
311
76.2k
#define TABLE_SIZE_BITS 6u
312
76.2k
#define TABLE_SIZE (1U << TABLE_SIZE_BITS)
313
74.8k
#define TABLE_MASK (TABLE_SIZE - 1U)
314
315
typedef struct STRINGLIB(_pre) {
316
    const STRINGLIB_CHAR *needle;
317
    Py_ssize_t len_needle;
318
    Py_ssize_t cut;
319
    Py_ssize_t period;
320
    Py_ssize_t gap;
321
    int is_periodic;
322
    SHIFT_TYPE table[TABLE_SIZE];
323
} STRINGLIB(prework);
324
325
326
static void
327
STRINGLIB(_preprocess)(const STRINGLIB_CHAR *needle, Py_ssize_t len_needle,
328
                       STRINGLIB(prework) *p)
329
22
{
330
22
    p->needle = needle;
331
22
    p->len_needle = len_needle;
332
22
    p->cut = STRINGLIB(_factorize)(needle, len_needle, &(p->period));
333
22
    assert(p->period + p->cut <= len_needle);
334
22
    p->is_periodic = (0 == memcmp(needle,
335
22
                                  needle + p->period,
336
22
                                  p->cut * STRINGLIB_SIZEOF_CHAR));
337
22
    if (p->is_periodic) {
338
0
        assert(p->cut <= len_needle/2);
339
0
        assert(p->cut < p->period);
340
0
    }
341
22
    else {
342
        // A lower bound on the period
343
22
        p->period = Py_MAX(p->cut, len_needle - p->cut) + 1;
344
22
    }
345
    // The gap between the last character and the previous
346
    // occurrence of an equivalent character (modulo TABLE_SIZE)
347
22
    p->gap = len_needle;
348
22
    STRINGLIB_CHAR last = needle[len_needle - 1] & TABLE_MASK;
349
154
    for (Py_ssize_t i = len_needle - 2; i >= 0; i--) {
350
154
        STRINGLIB_CHAR x = needle[i] & TABLE_MASK;
351
154
        if (x == last) {
352
22
            p->gap = len_needle - 1 - i;
353
22
            break;
354
22
        }
355
154
    }
356
    // Fill up a compressed Boyer-Moore "Bad Character" table
357
22
    Py_ssize_t not_found_shift = Py_MIN(len_needle, MAX_SHIFT);
358
1.43k
    for (Py_ssize_t i = 0; i < (Py_ssize_t)TABLE_SIZE; i++) {
359
1.40k
        p->table[i] = Py_SAFE_DOWNCAST(not_found_shift,
360
1.40k
                                       Py_ssize_t, SHIFT_TYPE);
361
1.40k
    }
362
242
    for (Py_ssize_t i = len_needle - not_found_shift; i < len_needle; i++) {
363
220
        SHIFT_TYPE shift = Py_SAFE_DOWNCAST(len_needle - 1 - i,
364
220
                                            Py_ssize_t, SHIFT_TYPE);
365
220
        p->table[needle[i] & TABLE_MASK] = shift;
366
220
    }
367
22
}
Unexecuted instantiation: bytesobject.c:stringlib__preprocess
Unexecuted instantiation: unicodeobject.c:asciilib__preprocess
unicodeobject.c:ucs1lib__preprocess
Line
Count
Source
329
22
{
330
22
    p->needle = needle;
331
22
    p->len_needle = len_needle;
332
22
    p->cut = STRINGLIB(_factorize)(needle, len_needle, &(p->period));
333
22
    assert(p->period + p->cut <= len_needle);
334
22
    p->is_periodic = (0 == memcmp(needle,
335
22
                                  needle + p->period,
336
22
                                  p->cut * STRINGLIB_SIZEOF_CHAR));
337
22
    if (p->is_periodic) {
338
0
        assert(p->cut <= len_needle/2);
339
0
        assert(p->cut < p->period);
340
0
    }
341
22
    else {
342
        // A lower bound on the period
343
22
        p->period = Py_MAX(p->cut, len_needle - p->cut) + 1;
344
22
    }
345
    // The gap between the last character and the previous
346
    // occurrence of an equivalent character (modulo TABLE_SIZE)
347
22
    p->gap = len_needle;
348
22
    STRINGLIB_CHAR last = needle[len_needle - 1] & TABLE_MASK;
349
154
    for (Py_ssize_t i = len_needle - 2; i >= 0; i--) {
350
154
        STRINGLIB_CHAR x = needle[i] & TABLE_MASK;
351
154
        if (x == last) {
352
22
            p->gap = len_needle - 1 - i;
353
22
            break;
354
22
        }
355
154
    }
356
    // Fill up a compressed Boyer-Moore "Bad Character" table
357
22
    Py_ssize_t not_found_shift = Py_MIN(len_needle, MAX_SHIFT);
358
1.43k
    for (Py_ssize_t i = 0; i < (Py_ssize_t)TABLE_SIZE; i++) {
359
1.40k
        p->table[i] = Py_SAFE_DOWNCAST(not_found_shift,
360
1.40k
                                       Py_ssize_t, SHIFT_TYPE);
361
1.40k
    }
362
242
    for (Py_ssize_t i = len_needle - not_found_shift; i < len_needle; i++) {
363
220
        SHIFT_TYPE shift = Py_SAFE_DOWNCAST(len_needle - 1 - i,
364
220
                                            Py_ssize_t, SHIFT_TYPE);
365
220
        p->table[needle[i] & TABLE_MASK] = shift;
366
220
    }
367
22
}
Unexecuted instantiation: unicodeobject.c:ucs2lib__preprocess
Unexecuted instantiation: unicodeobject.c:ucs4lib__preprocess
Unexecuted instantiation: bytes_methods.c:stringlib__preprocess
Unexecuted instantiation: bytearrayobject.c:stringlib__preprocess
368
369
static Py_ssize_t
370
STRINGLIB(_two_way)(const STRINGLIB_CHAR *haystack, Py_ssize_t len_haystack,
371
                    STRINGLIB(prework) *p)
372
22
{
373
    // Crochemore and Perrin's (1991) Two-Way algorithm.
374
    // See http://www-igm.univ-mlv.fr/~lecroq/string/node26.html#SECTION00260
375
22
    const Py_ssize_t len_needle = p->len_needle;
376
22
    const Py_ssize_t cut = p->cut;
377
22
    Py_ssize_t period = p->period;
378
22
    const STRINGLIB_CHAR *const needle = p->needle;
379
22
    const STRINGLIB_CHAR *window_last = haystack + len_needle - 1;
380
22
    const STRINGLIB_CHAR *const haystack_end = haystack + len_haystack;
381
22
    SHIFT_TYPE *table = p->table;
382
22
    const STRINGLIB_CHAR *window;
383
22
    LOG("===== Two-way: \"%s\" in \"%s\". =====\n", needle, haystack);
384
385
22
    Py_ssize_t gap = p->gap;
386
22
    Py_ssize_t gap_jump_end = Py_MIN(len_needle, cut + gap);
387
22
    if (p->is_periodic) {
388
0
        LOG("Needle is periodic.\n");
389
0
        Py_ssize_t memory = 0;
390
0
      periodicwindowloop:
391
0
        while (window_last < haystack_end) {
392
0
            assert(memory == 0);
393
0
            for (;;) {
394
0
                LOG_LINEUP();
395
0
                Py_ssize_t shift = table[(*window_last) & TABLE_MASK];
396
0
                window_last += shift;
397
0
                if (shift == 0) {
398
0
                    break;
399
0
                }
400
0
                if (window_last >= haystack_end) {
401
0
                    return -1;
402
0
                }
403
0
                LOG("Horspool skip\n");
404
0
            }
405
0
          no_shift:
406
0
            window = window_last - len_needle + 1;
407
0
            assert((window[len_needle - 1] & TABLE_MASK) ==
408
0
                   (needle[len_needle - 1] & TABLE_MASK));
409
0
            Py_ssize_t i = Py_MAX(cut, memory);
410
0
            for (; i < len_needle; i++) {
411
0
                if (needle[i] != window[i]) {
412
0
                    if (i < gap_jump_end) {
413
0
                        LOG("Early right half mismatch: jump by gap.\n");
414
0
                        assert(gap >= i - cut + 1);
415
0
                        window_last += gap;
416
0
                    }
417
0
                    else {
418
0
                        LOG("Late right half mismatch: jump by n (>gap)\n");
419
0
                        assert(i - cut + 1 > gap);
420
0
                        window_last += i - cut + 1;
421
0
                    }
422
0
                    memory = 0;
423
0
                    goto periodicwindowloop;
424
0
                }
425
0
            }
426
0
            for (i = memory; i < cut; i++) {
427
0
                if (needle[i] != window[i]) {
428
0
                    LOG("Left half does not match.\n");
429
0
                    window_last += period;
430
0
                    memory = len_needle - period;
431
0
                    if (window_last >= haystack_end) {
432
0
                        return -1;
433
0
                    }
434
0
                    Py_ssize_t shift = table[(*window_last) & TABLE_MASK];
435
0
                    if (shift) {
436
                        // A mismatch has been identified to the right
437
                        // of where i will next start, so we can jump
438
                        // at least as far as if the mismatch occurred
439
                        // on the first comparison.
440
0
                        Py_ssize_t mem_jump = Py_MAX(cut, memory) - cut + 1;
441
0
                        LOG("Skip with Memory.\n");
442
0
                        memory = 0;
443
0
                        window_last += Py_MAX(shift, mem_jump);
444
0
                        goto periodicwindowloop;
445
0
                    }
446
0
                    goto no_shift;
447
0
                }
448
0
            }
449
0
            LOG("Found a match!\n");
450
0
            return window - haystack;
451
0
        }
452
0
    }
453
22
    else {
454
22
        period = Py_MAX(gap, period);
455
22
        LOG("Needle is not periodic.\n");
456
15.2k
      windowloop:
457
15.2k
        while (window_last < haystack_end) {
458
74.4k
            for (;;) {
459
74.4k
                LOG_LINEUP();
460
74.4k
                Py_ssize_t shift = table[(*window_last) & TABLE_MASK];
461
74.4k
                window_last += shift;
462
74.4k
                if (shift == 0) {
463
15.1k
                    break;
464
15.1k
                }
465
59.2k
                if (window_last >= haystack_end) {
466
18
                    return -1;
467
18
                }
468
59.2k
                LOG("Horspool skip\n");
469
59.2k
            }
470
15.1k
            window = window_last - len_needle + 1;
471
15.1k
            assert((window[len_needle - 1] & TABLE_MASK) ==
472
15.1k
                   (needle[len_needle - 1] & TABLE_MASK));
473
15.1k
            Py_ssize_t i = cut;
474
15.4k
            for (; i < len_needle; i++) {
475
15.3k
                if (needle[i] != window[i]) {
476
15.1k
                    if (i < gap_jump_end) {
477
15.1k
                        LOG("Early right half mismatch: jump by gap.\n");
478
15.1k
                        assert(gap >= i - cut + 1);
479
15.1k
                        window_last += gap;
480
15.1k
                    }
481
0
                    else {
482
0
                        LOG("Late right half mismatch: jump by n (>gap)\n");
483
0
                        assert(i - cut + 1 > gap);
484
0
                        window_last += i - cut + 1;
485
0
                    }
486
15.1k
                    goto windowloop;
487
15.1k
                }
488
15.3k
            }
489
113
            for (Py_ssize_t i = 0; i < cut; i++) {
490
110
                if (needle[i] != window[i]) {
491
86
                    LOG("Left half does not match.\n");
492
86
                    window_last += period;
493
86
                    goto windowloop;
494
86
                }
495
110
            }
496
3
            LOG("Found a match!\n");
497
3
            return window - haystack;
498
89
        }
499
15.2k
    }
500
1
    LOG("Not found. Returning -1.\n");
501
1
    return -1;
502
22
}
Unexecuted instantiation: bytesobject.c:stringlib__two_way
Unexecuted instantiation: unicodeobject.c:asciilib__two_way
unicodeobject.c:ucs1lib__two_way
Line
Count
Source
372
22
{
373
    // Crochemore and Perrin's (1991) Two-Way algorithm.
374
    // See http://www-igm.univ-mlv.fr/~lecroq/string/node26.html#SECTION00260
375
22
    const Py_ssize_t len_needle = p->len_needle;
376
22
    const Py_ssize_t cut = p->cut;
377
22
    Py_ssize_t period = p->period;
378
22
    const STRINGLIB_CHAR *const needle = p->needle;
379
22
    const STRINGLIB_CHAR *window_last = haystack + len_needle - 1;
380
22
    const STRINGLIB_CHAR *const haystack_end = haystack + len_haystack;
381
22
    SHIFT_TYPE *table = p->table;
382
22
    const STRINGLIB_CHAR *window;
383
22
    LOG("===== Two-way: \"%s\" in \"%s\". =====\n", needle, haystack);
384
385
22
    Py_ssize_t gap = p->gap;
386
22
    Py_ssize_t gap_jump_end = Py_MIN(len_needle, cut + gap);
387
22
    if (p->is_periodic) {
388
0
        LOG("Needle is periodic.\n");
389
0
        Py_ssize_t memory = 0;
390
0
      periodicwindowloop:
391
0
        while (window_last < haystack_end) {
392
0
            assert(memory == 0);
393
0
            for (;;) {
394
0
                LOG_LINEUP();
395
0
                Py_ssize_t shift = table[(*window_last) & TABLE_MASK];
396
0
                window_last += shift;
397
0
                if (shift == 0) {
398
0
                    break;
399
0
                }
400
0
                if (window_last >= haystack_end) {
401
0
                    return -1;
402
0
                }
403
0
                LOG("Horspool skip\n");
404
0
            }
405
0
          no_shift:
406
0
            window = window_last - len_needle + 1;
407
0
            assert((window[len_needle - 1] & TABLE_MASK) ==
408
0
                   (needle[len_needle - 1] & TABLE_MASK));
409
0
            Py_ssize_t i = Py_MAX(cut, memory);
410
0
            for (; i < len_needle; i++) {
411
0
                if (needle[i] != window[i]) {
412
0
                    if (i < gap_jump_end) {
413
0
                        LOG("Early right half mismatch: jump by gap.\n");
414
0
                        assert(gap >= i - cut + 1);
415
0
                        window_last += gap;
416
0
                    }
417
0
                    else {
418
0
                        LOG("Late right half mismatch: jump by n (>gap)\n");
419
0
                        assert(i - cut + 1 > gap);
420
0
                        window_last += i - cut + 1;
421
0
                    }
422
0
                    memory = 0;
423
0
                    goto periodicwindowloop;
424
0
                }
425
0
            }
426
0
            for (i = memory; i < cut; i++) {
427
0
                if (needle[i] != window[i]) {
428
0
                    LOG("Left half does not match.\n");
429
0
                    window_last += period;
430
0
                    memory = len_needle - period;
431
0
                    if (window_last >= haystack_end) {
432
0
                        return -1;
433
0
                    }
434
0
                    Py_ssize_t shift = table[(*window_last) & TABLE_MASK];
435
0
                    if (shift) {
436
                        // A mismatch has been identified to the right
437
                        // of where i will next start, so we can jump
438
                        // at least as far as if the mismatch occurred
439
                        // on the first comparison.
440
0
                        Py_ssize_t mem_jump = Py_MAX(cut, memory) - cut + 1;
441
0
                        LOG("Skip with Memory.\n");
442
0
                        memory = 0;
443
0
                        window_last += Py_MAX(shift, mem_jump);
444
0
                        goto periodicwindowloop;
445
0
                    }
446
0
                    goto no_shift;
447
0
                }
448
0
            }
449
0
            LOG("Found a match!\n");
450
0
            return window - haystack;
451
0
        }
452
0
    }
453
22
    else {
454
22
        period = Py_MAX(gap, period);
455
22
        LOG("Needle is not periodic.\n");
456
15.2k
      windowloop:
457
15.2k
        while (window_last < haystack_end) {
458
74.4k
            for (;;) {
459
74.4k
                LOG_LINEUP();
460
74.4k
                Py_ssize_t shift = table[(*window_last) & TABLE_MASK];
461
74.4k
                window_last += shift;
462
74.4k
                if (shift == 0) {
463
15.1k
                    break;
464
15.1k
                }
465
59.2k
                if (window_last >= haystack_end) {
466
18
                    return -1;
467
18
                }
468
59.2k
                LOG("Horspool skip\n");
469
59.2k
            }
470
15.1k
            window = window_last - len_needle + 1;
471
15.1k
            assert((window[len_needle - 1] & TABLE_MASK) ==
472
15.1k
                   (needle[len_needle - 1] & TABLE_MASK));
473
15.1k
            Py_ssize_t i = cut;
474
15.4k
            for (; i < len_needle; i++) {
475
15.3k
                if (needle[i] != window[i]) {
476
15.1k
                    if (i < gap_jump_end) {
477
15.1k
                        LOG("Early right half mismatch: jump by gap.\n");
478
15.1k
                        assert(gap >= i - cut + 1);
479
15.1k
                        window_last += gap;
480
15.1k
                    }
481
0
                    else {
482
0
                        LOG("Late right half mismatch: jump by n (>gap)\n");
483
0
                        assert(i - cut + 1 > gap);
484
0
                        window_last += i - cut + 1;
485
0
                    }
486
15.1k
                    goto windowloop;
487
15.1k
                }
488
15.3k
            }
489
113
            for (Py_ssize_t i = 0; i < cut; i++) {
490
110
                if (needle[i] != window[i]) {
491
86
                    LOG("Left half does not match.\n");
492
86
                    window_last += period;
493
86
                    goto windowloop;
494
86
                }
495
110
            }
496
3
            LOG("Found a match!\n");
497
3
            return window - haystack;
498
89
        }
499
15.2k
    }
500
1
    LOG("Not found. Returning -1.\n");
501
1
    return -1;
502
22
}
Unexecuted instantiation: unicodeobject.c:ucs2lib__two_way
Unexecuted instantiation: unicodeobject.c:ucs4lib__two_way
Unexecuted instantiation: bytes_methods.c:stringlib__two_way
Unexecuted instantiation: bytearrayobject.c:stringlib__two_way
503
504
505
static Py_ssize_t
506
STRINGLIB(_two_way_find)(const STRINGLIB_CHAR *haystack,
507
                         Py_ssize_t len_haystack,
508
                         const STRINGLIB_CHAR *needle,
509
                         Py_ssize_t len_needle)
510
22
{
511
22
    LOG("###### Finding \"%s\" in \"%s\".\n", needle, haystack);
512
22
    STRINGLIB(prework) p;
513
22
    STRINGLIB(_preprocess)(needle, len_needle, &p);
514
22
    return STRINGLIB(_two_way)(haystack, len_haystack, &p);
515
22
}
Unexecuted instantiation: bytesobject.c:stringlib__two_way_find
Unexecuted instantiation: unicodeobject.c:asciilib__two_way_find
unicodeobject.c:ucs1lib__two_way_find
Line
Count
Source
510
22
{
511
22
    LOG("###### Finding \"%s\" in \"%s\".\n", needle, haystack);
512
22
    STRINGLIB(prework) p;
513
22
    STRINGLIB(_preprocess)(needle, len_needle, &p);
514
22
    return STRINGLIB(_two_way)(haystack, len_haystack, &p);
515
22
}
Unexecuted instantiation: unicodeobject.c:ucs2lib__two_way_find
Unexecuted instantiation: unicodeobject.c:ucs4lib__two_way_find
Unexecuted instantiation: bytes_methods.c:stringlib__two_way_find
Unexecuted instantiation: bytearrayobject.c:stringlib__two_way_find
516
517
518
static Py_ssize_t
519
STRINGLIB(_two_way_count)(const STRINGLIB_CHAR *haystack,
520
                          Py_ssize_t len_haystack,
521
                          const STRINGLIB_CHAR *needle,
522
                          Py_ssize_t len_needle,
523
                          Py_ssize_t maxcount)
524
0
{
525
0
    LOG("###### Counting \"%s\" in \"%s\".\n", needle, haystack);
526
0
    STRINGLIB(prework) p;
527
0
    STRINGLIB(_preprocess)(needle, len_needle, &p);
528
0
    Py_ssize_t index = 0, count = 0;
529
0
    while (1) {
530
0
        Py_ssize_t result;
531
0
        result = STRINGLIB(_two_way)(haystack + index,
532
0
                                     len_haystack - index, &p);
533
0
        if (result == -1) {
534
0
            return count;
535
0
        }
536
0
        count++;
537
0
        if (count == maxcount) {
538
0
            return maxcount;
539
0
        }
540
0
        index += result + len_needle;
541
0
    }
542
0
    return count;
543
0
}
Unexecuted instantiation: bytesobject.c:stringlib__two_way_count
Unexecuted instantiation: unicodeobject.c:asciilib__two_way_count
Unexecuted instantiation: unicodeobject.c:ucs1lib__two_way_count
Unexecuted instantiation: unicodeobject.c:ucs2lib__two_way_count
Unexecuted instantiation: unicodeobject.c:ucs4lib__two_way_count
Unexecuted instantiation: bytes_methods.c:stringlib__two_way_count
Unexecuted instantiation: bytearrayobject.c:stringlib__two_way_count
544
545
#undef SHIFT_TYPE
546
#undef NOT_FOUND
547
#undef SHIFT_OVERFLOW
548
#undef TABLE_SIZE_BITS
549
#undef TABLE_SIZE
550
#undef TABLE_MASK
551
552
#undef LOG
553
#undef LOG_STRING
554
#undef LOG_LINEUP
555
556
static inline Py_ssize_t
557
STRINGLIB(default_find)(const STRINGLIB_CHAR* s, Py_ssize_t n,
558
                        const STRINGLIB_CHAR* p, Py_ssize_t m,
559
                        Py_ssize_t maxcount, int mode)
560
16.1M
{
561
16.1M
    const Py_ssize_t w = n - m;
562
16.1M
    Py_ssize_t mlast = m - 1, count = 0;
563
16.1M
    Py_ssize_t gap = mlast;
564
16.1M
    const STRINGLIB_CHAR last = p[mlast];
565
16.1M
    const STRINGLIB_CHAR *const ss = &s[mlast];
566
567
16.1M
    unsigned long mask = 0;
568
32.4M
    for (Py_ssize_t i = 0; i < mlast; i++) {
569
16.2M
        STRINGLIB_BLOOM_ADD(mask, p[i]);
570
16.2M
        if (p[i] == last) {
571
575k
            gap = mlast - i - 1;
572
575k
        }
573
16.2M
    }
574
16.1M
    STRINGLIB_BLOOM_ADD(mask, last);
575
576
3.63G
    for (Py_ssize_t i = 0; i <= w; i++) {
577
3.62G
        if (ss[i] == last) {
578
            /* candidate match */
579
34.9M
            Py_ssize_t j;
580
57.2M
            for (j = 0; j < mlast; j++) {
581
35.0M
                if (s[i+j] != p[j]) {
582
12.8M
                    break;
583
12.8M
                }
584
35.0M
            }
585
34.9M
            if (j == mlast) {
586
                /* got a match! */
587
22.1M
                if (mode != FAST_COUNT) {
588
11.1M
                    return i;
589
11.1M
                }
590
10.9M
                count++;
591
10.9M
                if (count == maxcount) {
592
0
                    return maxcount;
593
0
                }
594
10.9M
                i = i + mlast;
595
10.9M
                continue;
596
10.9M
            }
597
            /* miss: check if next character is part of pattern */
598
12.8M
            if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) {
599
4.56M
                i = i + m;
600
4.56M
            }
601
8.30M
            else {
602
8.30M
                i = i + gap;
603
8.30M
            }
604
12.8M
        }
605
3.59G
        else {
606
            /* skip: check if next character is part of pattern */
607
3.59G
            if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) {
608
3.55G
                i = i + m;
609
3.55G
            }
610
3.59G
        }
611
3.62G
    }
612
4.95M
    return mode == FAST_COUNT ? count : -1;
613
16.1M
}
Unexecuted instantiation: bytesobject.c:stringlib_default_find
unicodeobject.c:asciilib_default_find
Line
Count
Source
560
1.63M
{
561
1.63M
    const Py_ssize_t w = n - m;
562
1.63M
    Py_ssize_t mlast = m - 1, count = 0;
563
1.63M
    Py_ssize_t gap = mlast;
564
1.63M
    const STRINGLIB_CHAR last = p[mlast];
565
1.63M
    const STRINGLIB_CHAR *const ss = &s[mlast];
566
567
1.63M
    unsigned long mask = 0;
568
3.27M
    for (Py_ssize_t i = 0; i < mlast; i++) {
569
1.64M
        STRINGLIB_BLOOM_ADD(mask, p[i]);
570
1.64M
        if (p[i] == last) {
571
18.0k
            gap = mlast - i - 1;
572
18.0k
        }
573
1.64M
    }
574
1.63M
    STRINGLIB_BLOOM_ADD(mask, last);
575
576
184M
    for (Py_ssize_t i = 0; i <= w; i++) {
577
184M
        if (ss[i] == last) {
578
            /* candidate match */
579
4.12M
            Py_ssize_t j;
580
5.73M
            for (j = 0; j < mlast; j++) {
581
4.13M
                if (s[i+j] != p[j]) {
582
2.52M
                    break;
583
2.52M
                }
584
4.13M
            }
585
4.12M
            if (j == mlast) {
586
                /* got a match! */
587
1.60M
                if (mode != FAST_COUNT) {
588
1.60M
                    return i;
589
1.60M
                }
590
0
                count++;
591
0
                if (count == maxcount) {
592
0
                    return maxcount;
593
0
                }
594
0
                i = i + mlast;
595
0
                continue;
596
0
            }
597
            /* miss: check if next character is part of pattern */
598
2.52M
            if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) {
599
40.8k
                i = i + m;
600
40.8k
            }
601
2.48M
            else {
602
2.48M
                i = i + gap;
603
2.48M
            }
604
2.52M
        }
605
180M
        else {
606
            /* skip: check if next character is part of pattern */
607
180M
            if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) {
608
173M
                i = i + m;
609
173M
            }
610
180M
        }
611
184M
    }
612
31.0k
    return mode == FAST_COUNT ? count : -1;
613
1.63M
}
unicodeobject.c:ucs1lib_default_find
Line
Count
Source
560
6.75M
{
561
6.75M
    const Py_ssize_t w = n - m;
562
6.75M
    Py_ssize_t mlast = m - 1, count = 0;
563
6.75M
    Py_ssize_t gap = mlast;
564
6.75M
    const STRINGLIB_CHAR last = p[mlast];
565
6.75M
    const STRINGLIB_CHAR *const ss = &s[mlast];
566
567
6.75M
    unsigned long mask = 0;
568
13.5M
    for (Py_ssize_t i = 0; i < mlast; i++) {
569
6.81M
        STRINGLIB_BLOOM_ADD(mask, p[i]);
570
6.81M
        if (p[i] == last) {
571
495k
            gap = mlast - i - 1;
572
495k
        }
573
6.81M
    }
574
6.75M
    STRINGLIB_BLOOM_ADD(mask, last);
575
576
532M
    for (Py_ssize_t i = 0; i <= w; i++) {
577
527M
        if (ss[i] == last) {
578
            /* candidate match */
579
10.6M
            Py_ssize_t j;
580
16.0M
            for (j = 0; j < mlast; j++) {
581
10.6M
                if (s[i+j] != p[j]) {
582
5.18M
                    break;
583
5.18M
                }
584
10.6M
            }
585
10.6M
            if (j == mlast) {
586
                /* got a match! */
587
5.42M
                if (mode != FAST_COUNT) {
588
1.95M
                    return i;
589
1.95M
                }
590
3.47M
                count++;
591
3.47M
                if (count == maxcount) {
592
0
                    return maxcount;
593
0
                }
594
3.47M
                i = i + mlast;
595
3.47M
                continue;
596
3.47M
            }
597
            /* miss: check if next character is part of pattern */
598
5.18M
            if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) {
599
1.83M
                i = i + m;
600
1.83M
            }
601
3.35M
            else {
602
3.35M
                i = i + gap;
603
3.35M
            }
604
5.18M
        }
605
516M
        else {
606
            /* skip: check if next character is part of pattern */
607
516M
            if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) {
608
505M
                i = i + m;
609
505M
            }
610
516M
        }
611
527M
    }
612
4.79M
    return mode == FAST_COUNT ? count : -1;
613
6.75M
}
unicodeobject.c:ucs2lib_default_find
Line
Count
Source
560
3.08M
{
561
3.08M
    const Py_ssize_t w = n - m;
562
3.08M
    Py_ssize_t mlast = m - 1, count = 0;
563
3.08M
    Py_ssize_t gap = mlast;
564
3.08M
    const STRINGLIB_CHAR last = p[mlast];
565
3.08M
    const STRINGLIB_CHAR *const ss = &s[mlast];
566
567
3.08M
    unsigned long mask = 0;
568
6.21M
    for (Py_ssize_t i = 0; i < mlast; i++) {
569
3.12M
        STRINGLIB_BLOOM_ADD(mask, p[i]);
570
3.12M
        if (p[i] == last) {
571
33.3k
            gap = mlast - i - 1;
572
33.3k
        }
573
3.12M
    }
574
3.08M
    STRINGLIB_BLOOM_ADD(mask, last);
575
576
1.23G
    for (Py_ssize_t i = 0; i <= w; i++) {
577
1.23G
        if (ss[i] == last) {
578
            /* candidate match */
579
8.34M
            Py_ssize_t j;
580
14.2M
            for (j = 0; j < mlast; j++) {
581
8.38M
                if (s[i+j] != p[j]) {
582
2.49M
                    break;
583
2.49M
                }
584
8.38M
            }
585
8.34M
            if (j == mlast) {
586
                /* got a match! */
587
5.85M
                if (mode != FAST_COUNT) {
588
2.98M
                    return i;
589
2.98M
                }
590
2.87M
                count++;
591
2.87M
                if (count == maxcount) {
592
0
                    return maxcount;
593
0
                }
594
2.87M
                i = i + mlast;
595
2.87M
                continue;
596
2.87M
            }
597
            /* miss: check if next character is part of pattern */
598
2.49M
            if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) {
599
995k
                i = i + m;
600
995k
            }
601
1.49M
            else {
602
1.49M
                i = i + gap;
603
1.49M
            }
604
2.49M
        }
605
1.22G
        else {
606
            /* skip: check if next character is part of pattern */
607
1.22G
            if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) {
608
1.21G
                i = i + m;
609
1.21G
            }
610
1.22G
        }
611
1.23G
    }
612
102k
    return mode == FAST_COUNT ? count : -1;
613
3.08M
}
unicodeobject.c:ucs4lib_default_find
Line
Count
Source
560
4.66M
{
561
4.66M
    const Py_ssize_t w = n - m;
562
4.66M
    Py_ssize_t mlast = m - 1, count = 0;
563
4.66M
    Py_ssize_t gap = mlast;
564
4.66M
    const STRINGLIB_CHAR last = p[mlast];
565
4.66M
    const STRINGLIB_CHAR *const ss = &s[mlast];
566
567
4.66M
    unsigned long mask = 0;
568
9.36M
    for (Py_ssize_t i = 0; i < mlast; i++) {
569
4.69M
        STRINGLIB_BLOOM_ADD(mask, p[i]);
570
4.69M
        if (p[i] == last) {
571
25.8k
            gap = mlast - i - 1;
572
25.8k
        }
573
4.69M
    }
574
4.66M
    STRINGLIB_BLOOM_ADD(mask, last);
575
576
1.68G
    for (Py_ssize_t i = 0; i <= w; i++) {
577
1.68G
        if (ss[i] == last) {
578
            /* candidate match */
579
11.8M
            Py_ssize_t j;
580
21.1M
            for (j = 0; j < mlast; j++) {
581
11.9M
                if (s[i+j] != p[j]) {
582
2.67M
                    break;
583
2.67M
                }
584
11.9M
            }
585
11.8M
            if (j == mlast) {
586
                /* got a match! */
587
9.22M
                if (mode != FAST_COUNT) {
588
4.63M
                    return i;
589
4.63M
                }
590
4.58M
                count++;
591
4.58M
                if (count == maxcount) {
592
0
                    return maxcount;
593
0
                }
594
4.58M
                i = i + mlast;
595
4.58M
                continue;
596
4.58M
            }
597
            /* miss: check if next character is part of pattern */
598
2.67M
            if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) {
599
1.69M
                i = i + m;
600
1.69M
            }
601
973k
            else {
602
973k
                i = i + gap;
603
973k
            }
604
2.67M
        }
605
1.67G
        else {
606
            /* skip: check if next character is part of pattern */
607
1.67G
            if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) {
608
1.65G
                i = i + m;
609
1.65G
            }
610
1.67G
        }
611
1.68G
    }
612
26.2k
    return mode == FAST_COUNT ? count : -1;
613
4.66M
}
bytes_methods.c:stringlib_default_find
Line
Count
Source
560
2.96k
{
561
2.96k
    const Py_ssize_t w = n - m;
562
2.96k
    Py_ssize_t mlast = m - 1, count = 0;
563
2.96k
    Py_ssize_t gap = mlast;
564
2.96k
    const STRINGLIB_CHAR last = p[mlast];
565
2.96k
    const STRINGLIB_CHAR *const ss = &s[mlast];
566
567
2.96k
    unsigned long mask = 0;
568
11.8k
    for (Py_ssize_t i = 0; i < mlast; i++) {
569
8.90k
        STRINGLIB_BLOOM_ADD(mask, p[i]);
570
8.90k
        if (p[i] == last) {
571
2.96k
            gap = mlast - i - 1;
572
2.96k
        }
573
8.90k
    }
574
2.96k
    STRINGLIB_BLOOM_ADD(mask, last);
575
576
406k
    for (Py_ssize_t i = 0; i <= w; i++) {
577
405k
        if (ss[i] == last) {
578
            /* candidate match */
579
7.17k
            Py_ssize_t j;
580
15.6k
            for (j = 0; j < mlast; j++) {
581
12.9k
                if (s[i+j] != p[j]) {
582
4.43k
                    break;
583
4.43k
                }
584
12.9k
            }
585
7.17k
            if (j == mlast) {
586
                /* got a match! */
587
2.73k
                if (mode != FAST_COUNT) {
588
2.73k
                    return i;
589
2.73k
                }
590
0
                count++;
591
0
                if (count == maxcount) {
592
0
                    return maxcount;
593
0
                }
594
0
                i = i + mlast;
595
0
                continue;
596
0
            }
597
            /* miss: check if next character is part of pattern */
598
4.43k
            if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) {
599
557
                i = i + m;
600
557
            }
601
3.88k
            else {
602
3.88k
                i = i + gap;
603
3.88k
            }
604
4.43k
        }
605
398k
        else {
606
            /* skip: check if next character is part of pattern */
607
398k
            if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) {
608
24.1k
                i = i + m;
609
24.1k
            }
610
398k
        }
611
405k
    }
612
235
    return mode == FAST_COUNT ? count : -1;
613
2.96k
}
Unexecuted instantiation: bytearrayobject.c:stringlib_default_find
614
615
616
static Py_ssize_t
617
STRINGLIB(adaptive_find)(const STRINGLIB_CHAR* s, Py_ssize_t n,
618
                         const STRINGLIB_CHAR* p, Py_ssize_t m,
619
                         Py_ssize_t maxcount, int mode)
620
0
{
621
0
    const Py_ssize_t w = n - m;
622
0
    Py_ssize_t mlast = m - 1, count = 0;
623
0
    Py_ssize_t gap = mlast;
624
0
    Py_ssize_t hits = 0, res;
625
0
    const STRINGLIB_CHAR last = p[mlast];
626
0
    const STRINGLIB_CHAR *const ss = &s[mlast];
627
628
0
    unsigned long mask = 0;
629
0
    for (Py_ssize_t i = 0; i < mlast; i++) {
630
0
        STRINGLIB_BLOOM_ADD(mask, p[i]);
631
0
        if (p[i] == last) {
632
0
            gap = mlast - i - 1;
633
0
        }
634
0
    }
635
0
    STRINGLIB_BLOOM_ADD(mask, last);
636
637
0
    for (Py_ssize_t i = 0; i <= w; i++) {
638
0
        if (ss[i] == last) {
639
            /* candidate match */
640
0
            Py_ssize_t j;
641
0
            for (j = 0; j < mlast; j++) {
642
0
                if (s[i+j] != p[j]) {
643
0
                    break;
644
0
                }
645
0
            }
646
0
            if (j == mlast) {
647
                /* got a match! */
648
0
                if (mode != FAST_COUNT) {
649
0
                    return i;
650
0
                }
651
0
                count++;
652
0
                if (count == maxcount) {
653
0
                    return maxcount;
654
0
                }
655
0
                i = i + mlast;
656
0
                continue;
657
0
            }
658
0
            hits += j + 1;
659
0
            if (hits > m / 4 && w - i > 2000) {
660
0
                if (mode == FAST_SEARCH) {
661
0
                    res = STRINGLIB(_two_way_find)(s + i, n - i, p, m);
662
0
                    return res == -1 ? -1 : res + i;
663
0
                }
664
0
                else {
665
0
                    res = STRINGLIB(_two_way_count)(s + i, n - i, p, m,
666
0
                                                    maxcount - count);
667
0
                    return res + count;
668
0
                }
669
0
            }
670
            /* miss: check if next character is part of pattern */
671
0
            if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) {
672
0
                i = i + m;
673
0
            }
674
0
            else {
675
0
                i = i + gap;
676
0
            }
677
0
        }
678
0
        else {
679
            /* skip: check if next character is part of pattern */
680
0
            if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) {
681
0
                i = i + m;
682
0
            }
683
0
        }
684
0
    }
685
0
    return mode == FAST_COUNT ? count : -1;
686
0
}
Unexecuted instantiation: bytesobject.c:stringlib_adaptive_find
Unexecuted instantiation: unicodeobject.c:asciilib_adaptive_find
Unexecuted instantiation: unicodeobject.c:ucs1lib_adaptive_find
Unexecuted instantiation: unicodeobject.c:ucs2lib_adaptive_find
Unexecuted instantiation: unicodeobject.c:ucs4lib_adaptive_find
Unexecuted instantiation: bytes_methods.c:stringlib_adaptive_find
Unexecuted instantiation: bytearrayobject.c:stringlib_adaptive_find
687
688
689
static Py_ssize_t
690
STRINGLIB(default_rfind)(const STRINGLIB_CHAR* s, Py_ssize_t n,
691
                         const STRINGLIB_CHAR* p, Py_ssize_t m,
692
                         Py_ssize_t maxcount, int mode)
693
4
{
694
    /* create compressed boyer-moore delta 1 table */
695
4
    unsigned long mask = 0;
696
4
    Py_ssize_t i, j, mlast = m - 1, skip = m - 1, w = n - m;
697
698
    /* process pattern[0] outside the loop */
699
4
    STRINGLIB_BLOOM_ADD(mask, p[0]);
700
    /* process pattern[:0:-1] */
701
16
    for (i = mlast; i > 0; i--) {
702
12
        STRINGLIB_BLOOM_ADD(mask, p[i]);
703
12
        if (p[i] == p[0]) {
704
0
            skip = i - 1;
705
0
        }
706
12
    }
707
708
356
    for (i = w; i >= 0; i--) {
709
352
        if (s[i] == p[0]) {
710
            /* candidate match */
711
8
            for (j = mlast; j > 0; j--) {
712
8
                if (s[i+j] != p[j]) {
713
8
                    break;
714
8
                }
715
8
            }
716
8
            if (j == 0) {
717
                /* got a match! */
718
0
                return i;
719
0
            }
720
            /* miss: check if previous character is part of pattern */
721
8
            if (i > 0 && !STRINGLIB_BLOOM(mask, s[i-1])) {
722
8
                i = i - m;
723
8
            }
724
0
            else {
725
0
                i = i - skip;
726
0
            }
727
8
        }
728
344
        else {
729
            /* skip: check if previous character is part of pattern */
730
344
            if (i > 0 && !STRINGLIB_BLOOM(mask, s[i-1])) {
731
336
                i = i - m;
732
336
            }
733
344
        }
734
352
    }
735
4
    return -1;
736
4
}
Unexecuted instantiation: bytesobject.c:stringlib_default_rfind
Unexecuted instantiation: unicodeobject.c:asciilib_default_rfind
Unexecuted instantiation: unicodeobject.c:ucs1lib_default_rfind
Unexecuted instantiation: unicodeobject.c:ucs2lib_default_rfind
Unexecuted instantiation: unicodeobject.c:ucs4lib_default_rfind
bytes_methods.c:stringlib_default_rfind
Line
Count
Source
693
4
{
694
    /* create compressed boyer-moore delta 1 table */
695
4
    unsigned long mask = 0;
696
4
    Py_ssize_t i, j, mlast = m - 1, skip = m - 1, w = n - m;
697
698
    /* process pattern[0] outside the loop */
699
4
    STRINGLIB_BLOOM_ADD(mask, p[0]);
700
    /* process pattern[:0:-1] */
701
16
    for (i = mlast; i > 0; i--) {
702
12
        STRINGLIB_BLOOM_ADD(mask, p[i]);
703
12
        if (p[i] == p[0]) {
704
0
            skip = i - 1;
705
0
        }
706
12
    }
707
708
356
    for (i = w; i >= 0; i--) {
709
352
        if (s[i] == p[0]) {
710
            /* candidate match */
711
8
            for (j = mlast; j > 0; j--) {
712
8
                if (s[i+j] != p[j]) {
713
8
                    break;
714
8
                }
715
8
            }
716
8
            if (j == 0) {
717
                /* got a match! */
718
0
                return i;
719
0
            }
720
            /* miss: check if previous character is part of pattern */
721
8
            if (i > 0 && !STRINGLIB_BLOOM(mask, s[i-1])) {
722
8
                i = i - m;
723
8
            }
724
0
            else {
725
0
                i = i - skip;
726
0
            }
727
8
        }
728
344
        else {
729
            /* skip: check if previous character is part of pattern */
730
344
            if (i > 0 && !STRINGLIB_BLOOM(mask, s[i-1])) {
731
336
                i = i - m;
732
336
            }
733
344
        }
734
352
    }
735
4
    return -1;
736
4
}
Unexecuted instantiation: bytearrayobject.c:stringlib_default_rfind
737
738
739
static inline Py_ssize_t
740
STRINGLIB(count_char)(const STRINGLIB_CHAR *s, Py_ssize_t n,
741
                      const STRINGLIB_CHAR p0, Py_ssize_t maxcount)
742
0
{
743
0
    Py_ssize_t i, count = 0;
744
0
    for (i = 0; i < n; i++) {
745
0
        if (s[i] == p0) {
746
0
            count++;
747
0
            if (count == maxcount) {
748
0
                return maxcount;
749
0
            }
750
0
        }
751
0
    }
752
0
    return count;
753
0
}
Unexecuted instantiation: bytesobject.c:stringlib_count_char
Unexecuted instantiation: unicodeobject.c:asciilib_count_char
Unexecuted instantiation: unicodeobject.c:ucs1lib_count_char
Unexecuted instantiation: unicodeobject.c:ucs2lib_count_char
Unexecuted instantiation: unicodeobject.c:ucs4lib_count_char
Unexecuted instantiation: bytes_methods.c:stringlib_count_char
Unexecuted instantiation: bytearrayobject.c:stringlib_count_char
754
755
756
static inline Py_ssize_t
757
STRINGLIB(count_char_no_maxcount)(const STRINGLIB_CHAR *s, Py_ssize_t n,
758
                                  const STRINGLIB_CHAR p0)
759
/* A specialized function of count_char that does not cut off at a maximum.
760
   As a result, the compiler is able to vectorize the loop. */
761
68.6M
{
762
68.6M
    Py_ssize_t count = 0;
763
6.62G
    for (Py_ssize_t i = 0; i < n; i++) {
764
6.55G
        if (s[i] == p0) {
765
207M
            count++;
766
207M
        }
767
6.55G
    }
768
68.6M
    return count;
769
68.6M
}
Unexecuted instantiation: bytesobject.c:stringlib_count_char_no_maxcount
Unexecuted instantiation: unicodeobject.c:asciilib_count_char_no_maxcount
unicodeobject.c:ucs1lib_count_char_no_maxcount
Line
Count
Source
761
54.7M
{
762
54.7M
    Py_ssize_t count = 0;
763
1.76G
    for (Py_ssize_t i = 0; i < n; i++) {
764
1.70G
        if (s[i] == p0) {
765
52.2M
            count++;
766
52.2M
        }
767
1.70G
    }
768
54.7M
    return count;
769
54.7M
}
unicodeobject.c:ucs2lib_count_char_no_maxcount
Line
Count
Source
761
12.4M
{
762
12.4M
    Py_ssize_t count = 0;
763
2.41G
    for (Py_ssize_t i = 0; i < n; i++) {
764
2.40G
        if (s[i] == p0) {
765
81.5M
            count++;
766
81.5M
        }
767
2.40G
    }
768
12.4M
    return count;
769
12.4M
}
unicodeobject.c:ucs4lib_count_char_no_maxcount
Line
Count
Source
761
1.39M
{
762
1.39M
    Py_ssize_t count = 0;
763
2.44G
    for (Py_ssize_t i = 0; i < n; i++) {
764
2.43G
        if (s[i] == p0) {
765
73.6M
            count++;
766
73.6M
        }
767
2.43G
    }
768
1.39M
    return count;
769
1.39M
}
Unexecuted instantiation: bytes_methods.c:stringlib_count_char_no_maxcount
Unexecuted instantiation: bytearrayobject.c:stringlib_count_char_no_maxcount
770
771
772
Py_LOCAL_INLINE(Py_ssize_t)
773
FASTSEARCH(const STRINGLIB_CHAR* s, Py_ssize_t n,
774
           const STRINGLIB_CHAR* p, Py_ssize_t m,
775
           Py_ssize_t maxcount, int mode)
776
256M
{
777
256M
    if (n < m || (mode == FAST_COUNT && maxcount == 0)) {
778
15.3k
        return -1;
779
15.3k
    }
780
781
    /* look for special cases */
782
256M
    if (m <= 1) {
783
240M
        if (m <= 0) {
784
0
            return -1;
785
0
        }
786
        /* use special case for 1-character strings */
787
240M
        if (mode == FAST_SEARCH)
788
171M
            return STRINGLIB(find_char)(s, n, p[0]);
789
68.6M
        else if (mode == FAST_RSEARCH)
790
11.1k
            return STRINGLIB(rfind_char)(s, n, p[0]);
791
68.6M
        else {
792
68.6M
            if (maxcount == PY_SSIZE_T_MAX) {
793
68.6M
                return STRINGLIB(count_char_no_maxcount)(s, n, p[0]);
794
68.6M
            }
795
0
            return STRINGLIB(count_char)(s, n, p[0], maxcount);
796
68.6M
        }
797
240M
    }
798
799
16.1M
    if (mode != FAST_RSEARCH) {
800
16.1M
        if (n < 2500 || (m < 100 && n < 30000) || m < 6) {
801
16.1M
            return STRINGLIB(default_find)(s, n, p, m, maxcount, mode);
802
16.1M
        }
803
22
        else if ((m >> 2) * 3 < (n >> 2)) {
804
            /* 33% threshold, but don't overflow. */
805
            /* For larger problems where the needle isn't a huge
806
               percentage of the size of the haystack, the relatively
807
               expensive O(m) startup cost of the two-way algorithm
808
               will surely pay off. */
809
22
            if (mode == FAST_SEARCH) {
810
22
                return STRINGLIB(_two_way_find)(s, n, p, m);
811
22
            }
812
0
            else {
813
0
                return STRINGLIB(_two_way_count)(s, n, p, m, maxcount);
814
0
            }
815
22
        }
816
0
        else {
817
            /* To ensure that we have good worst-case behavior,
818
               here's an adaptive version of the algorithm, where if
819
               we match O(m) characters without any matches of the
820
               entire needle, then we predict that the startup cost of
821
               the two-way algorithm will probably be worth it. */
822
0
            return STRINGLIB(adaptive_find)(s, n, p, m, maxcount, mode);
823
0
        }
824
16.1M
    }
825
4
    else {
826
        /* FAST_RSEARCH */
827
4
        return STRINGLIB(default_rfind)(s, n, p, m, maxcount, mode);
828
4
    }
829
16.1M
}
Unexecuted instantiation: bytesobject.c:fastsearch
unicodeobject.c:asciilib_fastsearch
Line
Count
Source
776
31.7M
{
777
31.7M
    if (n < m || (mode == FAST_COUNT && maxcount == 0)) {
778
0
        return -1;
779
0
    }
780
781
    /* look for special cases */
782
31.7M
    if (m <= 1) {
783
30.1M
        if (m <= 0) {
784
0
            return -1;
785
0
        }
786
        /* use special case for 1-character strings */
787
30.1M
        if (mode == FAST_SEARCH)
788
30.1M
            return STRINGLIB(find_char)(s, n, p[0]);
789
11.1k
        else if (mode == FAST_RSEARCH)
790
11.1k
            return STRINGLIB(rfind_char)(s, n, p[0]);
791
0
        else {
792
0
            if (maxcount == PY_SSIZE_T_MAX) {
793
0
                return STRINGLIB(count_char_no_maxcount)(s, n, p[0]);
794
0
            }
795
0
            return STRINGLIB(count_char)(s, n, p[0], maxcount);
796
0
        }
797
30.1M
    }
798
799
1.63M
    if (mode != FAST_RSEARCH) {
800
1.63M
        if (n < 2500 || (m < 100 && n < 30000) || m < 6) {
801
1.63M
            return STRINGLIB(default_find)(s, n, p, m, maxcount, mode);
802
1.63M
        }
803
0
        else if ((m >> 2) * 3 < (n >> 2)) {
804
            /* 33% threshold, but don't overflow. */
805
            /* For larger problems where the needle isn't a huge
806
               percentage of the size of the haystack, the relatively
807
               expensive O(m) startup cost of the two-way algorithm
808
               will surely pay off. */
809
0
            if (mode == FAST_SEARCH) {
810
0
                return STRINGLIB(_two_way_find)(s, n, p, m);
811
0
            }
812
0
            else {
813
0
                return STRINGLIB(_two_way_count)(s, n, p, m, maxcount);
814
0
            }
815
0
        }
816
0
        else {
817
            /* To ensure that we have good worst-case behavior,
818
               here's an adaptive version of the algorithm, where if
819
               we match O(m) characters without any matches of the
820
               entire needle, then we predict that the startup cost of
821
               the two-way algorithm will probably be worth it. */
822
0
            return STRINGLIB(adaptive_find)(s, n, p, m, maxcount, mode);
823
0
        }
824
1.63M
    }
825
0
    else {
826
        /* FAST_RSEARCH */
827
0
        return STRINGLIB(default_rfind)(s, n, p, m, maxcount, mode);
828
0
    }
829
1.63M
}
unicodeobject.c:ucs1lib_fastsearch
Line
Count
Source
776
69.8M
{
777
69.8M
    if (n < m || (mode == FAST_COUNT && maxcount == 0)) {
778
0
        return -1;
779
0
    }
780
781
    /* look for special cases */
782
69.8M
    if (m <= 1) {
783
63.1M
        if (m <= 0) {
784
0
            return -1;
785
0
        }
786
        /* use special case for 1-character strings */
787
63.1M
        if (mode == FAST_SEARCH)
788
8.33M
            return STRINGLIB(find_char)(s, n, p[0]);
789
54.7M
        else if (mode == FAST_RSEARCH)
790
0
            return STRINGLIB(rfind_char)(s, n, p[0]);
791
54.7M
        else {
792
54.7M
            if (maxcount == PY_SSIZE_T_MAX) {
793
54.7M
                return STRINGLIB(count_char_no_maxcount)(s, n, p[0]);
794
54.7M
            }
795
0
            return STRINGLIB(count_char)(s, n, p[0], maxcount);
796
54.7M
        }
797
63.1M
    }
798
799
6.75M
    if (mode != FAST_RSEARCH) {
800
6.75M
        if (n < 2500 || (m < 100 && n < 30000) || m < 6) {
801
6.75M
            return STRINGLIB(default_find)(s, n, p, m, maxcount, mode);
802
6.75M
        }
803
22
        else if ((m >> 2) * 3 < (n >> 2)) {
804
            /* 33% threshold, but don't overflow. */
805
            /* For larger problems where the needle isn't a huge
806
               percentage of the size of the haystack, the relatively
807
               expensive O(m) startup cost of the two-way algorithm
808
               will surely pay off. */
809
22
            if (mode == FAST_SEARCH) {
810
22
                return STRINGLIB(_two_way_find)(s, n, p, m);
811
22
            }
812
0
            else {
813
0
                return STRINGLIB(_two_way_count)(s, n, p, m, maxcount);
814
0
            }
815
22
        }
816
0
        else {
817
            /* To ensure that we have good worst-case behavior,
818
               here's an adaptive version of the algorithm, where if
819
               we match O(m) characters without any matches of the
820
               entire needle, then we predict that the startup cost of
821
               the two-way algorithm will probably be worth it. */
822
0
            return STRINGLIB(adaptive_find)(s, n, p, m, maxcount, mode);
823
0
        }
824
6.75M
    }
825
0
    else {
826
        /* FAST_RSEARCH */
827
0
        return STRINGLIB(default_rfind)(s, n, p, m, maxcount, mode);
828
0
    }
829
6.75M
}
unicodeobject.c:ucs2lib_fastsearch
Line
Count
Source
776
85.6M
{
777
85.6M
    if (n < m || (mode == FAST_COUNT && maxcount == 0)) {
778
15.3k
        return -1;
779
15.3k
    }
780
781
    /* look for special cases */
782
85.6M
    if (m <= 1) {
783
82.5M
        if (m <= 0) {
784
0
            return -1;
785
0
        }
786
        /* use special case for 1-character strings */
787
82.5M
        if (mode == FAST_SEARCH)
788
70.1M
            return STRINGLIB(find_char)(s, n, p[0]);
789
12.4M
        else if (mode == FAST_RSEARCH)
790
0
            return STRINGLIB(rfind_char)(s, n, p[0]);
791
12.4M
        else {
792
12.4M
            if (maxcount == PY_SSIZE_T_MAX) {
793
12.4M
                return STRINGLIB(count_char_no_maxcount)(s, n, p[0]);
794
12.4M
            }
795
0
            return STRINGLIB(count_char)(s, n, p[0], maxcount);
796
12.4M
        }
797
82.5M
    }
798
799
3.08M
    if (mode != FAST_RSEARCH) {
800
3.08M
        if (n < 2500 || (m < 100 && n < 30000) || m < 6) {
801
3.08M
            return STRINGLIB(default_find)(s, n, p, m, maxcount, mode);
802
3.08M
        }
803
0
        else if ((m >> 2) * 3 < (n >> 2)) {
804
            /* 33% threshold, but don't overflow. */
805
            /* For larger problems where the needle isn't a huge
806
               percentage of the size of the haystack, the relatively
807
               expensive O(m) startup cost of the two-way algorithm
808
               will surely pay off. */
809
0
            if (mode == FAST_SEARCH) {
810
0
                return STRINGLIB(_two_way_find)(s, n, p, m);
811
0
            }
812
0
            else {
813
0
                return STRINGLIB(_two_way_count)(s, n, p, m, maxcount);
814
0
            }
815
0
        }
816
0
        else {
817
            /* To ensure that we have good worst-case behavior,
818
               here's an adaptive version of the algorithm, where if
819
               we match O(m) characters without any matches of the
820
               entire needle, then we predict that the startup cost of
821
               the two-way algorithm will probably be worth it. */
822
0
            return STRINGLIB(adaptive_find)(s, n, p, m, maxcount, mode);
823
0
        }
824
3.08M
    }
825
0
    else {
826
        /* FAST_RSEARCH */
827
0
        return STRINGLIB(default_rfind)(s, n, p, m, maxcount, mode);
828
0
    }
829
3.08M
}
unicodeobject.c:ucs4lib_fastsearch
Line
Count
Source
776
69.1M
{
777
69.1M
    if (n < m || (mode == FAST_COUNT && maxcount == 0)) {
778
0
        return -1;
779
0
    }
780
781
    /* look for special cases */
782
69.1M
    if (m <= 1) {
783
64.5M
        if (m <= 0) {
784
0
            return -1;
785
0
        }
786
        /* use special case for 1-character strings */
787
64.5M
        if (mode == FAST_SEARCH)
788
63.1M
            return STRINGLIB(find_char)(s, n, p[0]);
789
1.39M
        else if (mode == FAST_RSEARCH)
790
0
            return STRINGLIB(rfind_char)(s, n, p[0]);
791
1.39M
        else {
792
1.39M
            if (maxcount == PY_SSIZE_T_MAX) {
793
1.39M
                return STRINGLIB(count_char_no_maxcount)(s, n, p[0]);
794
1.39M
            }
795
0
            return STRINGLIB(count_char)(s, n, p[0], maxcount);
796
1.39M
        }
797
64.5M
    }
798
799
4.66M
    if (mode != FAST_RSEARCH) {
800
4.66M
        if (n < 2500 || (m < 100 && n < 30000) || m < 6) {
801
4.66M
            return STRINGLIB(default_find)(s, n, p, m, maxcount, mode);
802
4.66M
        }
803
0
        else if ((m >> 2) * 3 < (n >> 2)) {
804
            /* 33% threshold, but don't overflow. */
805
            /* For larger problems where the needle isn't a huge
806
               percentage of the size of the haystack, the relatively
807
               expensive O(m) startup cost of the two-way algorithm
808
               will surely pay off. */
809
0
            if (mode == FAST_SEARCH) {
810
0
                return STRINGLIB(_two_way_find)(s, n, p, m);
811
0
            }
812
0
            else {
813
0
                return STRINGLIB(_two_way_count)(s, n, p, m, maxcount);
814
0
            }
815
0
        }
816
0
        else {
817
            /* To ensure that we have good worst-case behavior,
818
               here's an adaptive version of the algorithm, where if
819
               we match O(m) characters without any matches of the
820
               entire needle, then we predict that the startup cost of
821
               the two-way algorithm will probably be worth it. */
822
0
            return STRINGLIB(adaptive_find)(s, n, p, m, maxcount, mode);
823
0
        }
824
4.66M
    }
825
0
    else {
826
        /* FAST_RSEARCH */
827
0
        return STRINGLIB(default_rfind)(s, n, p, m, maxcount, mode);
828
0
    }
829
4.66M
}
bytes_methods.c:fastsearch
Line
Count
Source
776
2.98k
{
777
2.98k
    if (n < m || (mode == FAST_COUNT && maxcount == 0)) {
778
13
        return -1;
779
13
    }
780
781
    /* look for special cases */
782
2.97k
    if (m <= 1) {
783
0
        if (m <= 0) {
784
0
            return -1;
785
0
        }
786
        /* use special case for 1-character strings */
787
0
        if (mode == FAST_SEARCH)
788
0
            return STRINGLIB(find_char)(s, n, p[0]);
789
0
        else if (mode == FAST_RSEARCH)
790
0
            return STRINGLIB(rfind_char)(s, n, p[0]);
791
0
        else {
792
0
            if (maxcount == PY_SSIZE_T_MAX) {
793
0
                return STRINGLIB(count_char_no_maxcount)(s, n, p[0]);
794
0
            }
795
0
            return STRINGLIB(count_char)(s, n, p[0], maxcount);
796
0
        }
797
0
    }
798
799
2.97k
    if (mode != FAST_RSEARCH) {
800
2.96k
        if (n < 2500 || (m < 100 && n < 30000) || m < 6) {
801
2.96k
            return STRINGLIB(default_find)(s, n, p, m, maxcount, mode);
802
2.96k
        }
803
0
        else if ((m >> 2) * 3 < (n >> 2)) {
804
            /* 33% threshold, but don't overflow. */
805
            /* For larger problems where the needle isn't a huge
806
               percentage of the size of the haystack, the relatively
807
               expensive O(m) startup cost of the two-way algorithm
808
               will surely pay off. */
809
0
            if (mode == FAST_SEARCH) {
810
0
                return STRINGLIB(_two_way_find)(s, n, p, m);
811
0
            }
812
0
            else {
813
0
                return STRINGLIB(_two_way_count)(s, n, p, m, maxcount);
814
0
            }
815
0
        }
816
0
        else {
817
            /* To ensure that we have good worst-case behavior,
818
               here's an adaptive version of the algorithm, where if
819
               we match O(m) characters without any matches of the
820
               entire needle, then we predict that the startup cost of
821
               the two-way algorithm will probably be worth it. */
822
0
            return STRINGLIB(adaptive_find)(s, n, p, m, maxcount, mode);
823
0
        }
824
2.96k
    }
825
4
    else {
826
        /* FAST_RSEARCH */
827
4
        return STRINGLIB(default_rfind)(s, n, p, m, maxcount, mode);
828
4
    }
829
2.97k
}
Unexecuted instantiation: bytearrayobject.c:fastsearch
830