Coverage Report

Created: 2025-07-11 06:24

/src/cpython/Objects/stringlib/fastsearch.h
Line
Count
Source (jump to first uncovered line)
1
/* stringlib: fastsearch implementation */
2
3
#define STRINGLIB_FASTSEARCH_H
4
5
/* fast search/count implementation, based on a mix between boyer-
6
   moore and horspool, with a few more bells and whistles on the top.
7
   for some more background, see:
8
   https://web.archive.org/web/20201107074620/http://effbot.org/zone/stringlib.htm */
9
10
/* note: fastsearch may access s[n], which isn't a problem when using
11
   Python's ordinary string types, but may cause problems if you're
12
   using this code in other contexts.  also, the count mode returns -1
13
   if there cannot possibly be a match in the target string, and 0 if
14
   it has actually checked for matches, but didn't find any.  callers
15
   beware! */
16
17
/* If the strings are long enough, use Crochemore and Perrin's Two-Way
18
   algorithm, which has worst-case O(n) runtime and best-case O(n/k).
19
   Also compute a table of shifts to achieve O(n/k) in more cases,
20
   and often (data dependent) deduce larger shifts than pure C&P can
21
   deduce. See stringlib_find_two_way_notes.txt in this folder for a
22
   detailed explanation. */
23
24
501M
#define FAST_COUNT 0
25
346M
#define FAST_SEARCH 1
26
68.4M
#define FAST_RSEARCH 2
27
28
#if LONG_BIT >= 128
29
#define STRINGLIB_BLOOM_WIDTH 128
30
#elif LONG_BIT >= 64
31
4.93G
#define STRINGLIB_BLOOM_WIDTH 64
32
#elif LONG_BIT >= 32
33
#define STRINGLIB_BLOOM_WIDTH 32
34
#else
35
#error "LONG_BIT is smaller than 32"
36
#endif
37
38
#define STRINGLIB_BLOOM_ADD(mask, ch) \
39
28.0M
    ((mask |= (1UL << ((ch) & (STRINGLIB_BLOOM_WIDTH -1)))))
40
#define STRINGLIB_BLOOM(mask, ch)     \
41
4.90G
    ((mask &  (1UL << ((ch) & (STRINGLIB_BLOOM_WIDTH -1)))))
42
43
#ifdef STRINGLIB_FAST_MEMCHR
44
189M
#  define MEMCHR_CUT_OFF 15
45
#else
46
54.1M
#  define MEMCHR_CUT_OFF 40
47
#endif
48
49
Py_LOCAL_INLINE(Py_ssize_t)
50
STRINGLIB(find_char)(const STRINGLIB_CHAR* s, Py_ssize_t n, STRINGLIB_CHAR ch)
51
243M
{
52
243M
    const STRINGLIB_CHAR *p, *e;
53
54
243M
    p = s;
55
243M
    e = s + n;
56
243M
    if (n > MEMCHR_CUT_OFF) {
57
#ifdef STRINGLIB_FAST_MEMCHR
58
98.5M
        p = STRINGLIB_FAST_MEMCHR(s, ch, n);
59
98.5M
        if (p != NULL)
60
97.1M
            return (p - s);
61
1.37M
        return -1;
62
#else
63
        /* use memchr if we can choose a needle without too many likely
64
           false positives */
65
        const STRINGLIB_CHAR *s1, *e1;
66
        unsigned char needle = ch & 0xff;
67
        /* If looking for a multiple of 256, we'd have too
68
           many false positives looking for the '\0' byte in UCS2
69
           and UCS4 representations. */
70
46.8M
        if (needle != 0) {
71
46.5M
            do {
72
46.5M
                void *candidate = memchr(p, needle,
73
46.5M
                                         (e - p) * sizeof(STRINGLIB_CHAR));
74
46.5M
                if (candidate == NULL)
75
368k
                    return -1;
76
46.1M
                s1 = p;
77
46.1M
                p = (const STRINGLIB_CHAR *)
78
46.1M
                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
79
46.1M
                if (*p == ch)
80
46.1M
                    return (p - s);
81
                /* False positive */
82
52.9k
                p++;
83
52.9k
                if (p - s1 > MEMCHR_CUT_OFF)
84
24.1k
                    continue;
85
28.7k
                if (e - p <= MEMCHR_CUT_OFF)
86
3.12k
                    break;
87
25.6k
                e1 = p + MEMCHR_CUT_OFF;
88
866k
                while (p != e1) {
89
847k
                    if (*p == ch)
90
6.65k
                        return (p - s);
91
840k
                    p++;
92
840k
                }
93
25.6k
            }
94
46.5M
            while (e - p > MEMCHR_CUT_OFF);
95
46.5M
        }
96
#endif
97
145M
    }
98
372M
    while (p < e) {
99
287M
        if (*p == ch)
100
13.3M
            return (p - s);
101
273M
        p++;
102
273M
    }
103
84.8M
    return -1;
104
98.2M
}
Unexecuted instantiation: bytesobject.c:stringlib_find_char
unicodeobject.c:ucs1lib_find_char
Line
Count
Source
51
101M
{
52
101M
    const STRINGLIB_CHAR *p, *e;
53
54
101M
    p = s;
55
101M
    e = s + n;
56
101M
    if (n > MEMCHR_CUT_OFF) {
57
14.7M
#ifdef STRINGLIB_FAST_MEMCHR
58
14.7M
        p = STRINGLIB_FAST_MEMCHR(s, ch, n);
59
14.7M
        if (p != NULL)
60
14.0M
            return (p - s);
61
728k
        return -1;
62
#else
63
        /* use memchr if we can choose a needle without too many likely
64
           false positives */
65
        const STRINGLIB_CHAR *s1, *e1;
66
        unsigned char needle = ch & 0xff;
67
        /* If looking for a multiple of 256, we'd have too
68
           many false positives looking for the '\0' byte in UCS2
69
           and UCS4 representations. */
70
        if (needle != 0) {
71
            do {
72
                void *candidate = memchr(p, needle,
73
                                         (e - p) * sizeof(STRINGLIB_CHAR));
74
                if (candidate == NULL)
75
                    return -1;
76
                s1 = p;
77
                p = (const STRINGLIB_CHAR *)
78
                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
79
                if (*p == ch)
80
                    return (p - s);
81
                /* False positive */
82
                p++;
83
                if (p - s1 > MEMCHR_CUT_OFF)
84
                    continue;
85
                if (e - p <= MEMCHR_CUT_OFF)
86
                    break;
87
                e1 = p + MEMCHR_CUT_OFF;
88
                while (p != e1) {
89
                    if (*p == ch)
90
                        return (p - s);
91
                    p++;
92
                }
93
            }
94
            while (e - p > MEMCHR_CUT_OFF);
95
        }
96
#endif
97
14.7M
    }
98
242M
    while (p < e) {
99
159M
        if (*p == ch)
100
3.64M
            return (p - s);
101
156M
        p++;
102
156M
    }
103
82.6M
    return -1;
104
86.3M
}
unicodeobject.c:ucs2lib_find_char
Line
Count
Source
51
54.0M
{
52
54.0M
    const STRINGLIB_CHAR *p, *e;
53
54
54.0M
    p = s;
55
54.0M
    e = s + n;
56
54.0M
    if (n > MEMCHR_CUT_OFF) {
57
#ifdef STRINGLIB_FAST_MEMCHR
58
        p = STRINGLIB_FAST_MEMCHR(s, ch, n);
59
        if (p != NULL)
60
            return (p - s);
61
        return -1;
62
#else
63
        /* use memchr if we can choose a needle without too many likely
64
           false positives */
65
46.8M
        const STRINGLIB_CHAR *s1, *e1;
66
46.8M
        unsigned char needle = ch & 0xff;
67
        /* If looking for a multiple of 256, we'd have too
68
           many false positives looking for the '\0' byte in UCS2
69
           and UCS4 representations. */
70
46.8M
        if (needle != 0) {
71
46.5M
            do {
72
46.5M
                void *candidate = memchr(p, needle,
73
46.5M
                                         (e - p) * sizeof(STRINGLIB_CHAR));
74
46.5M
                if (candidate == NULL)
75
368k
                    return -1;
76
46.1M
                s1 = p;
77
46.1M
                p = (const STRINGLIB_CHAR *)
78
46.1M
                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
79
46.1M
                if (*p == ch)
80
46.1M
                    return (p - s);
81
                /* False positive */
82
52.9k
                p++;
83
52.9k
                if (p - s1 > MEMCHR_CUT_OFF)
84
24.1k
                    continue;
85
28.7k
                if (e - p <= MEMCHR_CUT_OFF)
86
3.12k
                    break;
87
25.6k
                e1 = p + MEMCHR_CUT_OFF;
88
866k
                while (p != e1) {
89
847k
                    if (*p == ch)
90
6.65k
                        return (p - s);
91
840k
                    p++;
92
840k
                }
93
25.6k
            }
94
46.5M
            while (e - p > MEMCHR_CUT_OFF);
95
46.5M
        }
96
46.8M
#endif
97
46.8M
    }
98
116M
    while (p < e) {
99
114M
        if (*p == ch)
100
5.44M
            return (p - s);
101
109M
        p++;
102
109M
    }
103
2.10M
    return -1;
104
7.54M
}
unicodeobject.c:ucs4lib_find_char
Line
Count
Source
51
71.4M
{
52
71.4M
    const STRINGLIB_CHAR *p, *e;
53
54
71.4M
    p = s;
55
71.4M
    e = s + n;
56
71.4M
    if (n > MEMCHR_CUT_OFF) {
57
71.3M
#ifdef STRINGLIB_FAST_MEMCHR
58
71.3M
        p = STRINGLIB_FAST_MEMCHR(s, ch, n);
59
71.3M
        if (p != NULL)
60
71.3M
            return (p - s);
61
26.8k
        return -1;
62
#else
63
        /* use memchr if we can choose a needle without too many likely
64
           false positives */
65
        const STRINGLIB_CHAR *s1, *e1;
66
        unsigned char needle = ch & 0xff;
67
        /* If looking for a multiple of 256, we'd have too
68
           many false positives looking for the '\0' byte in UCS2
69
           and UCS4 representations. */
70
        if (needle != 0) {
71
            do {
72
                void *candidate = memchr(p, needle,
73
                                         (e - p) * sizeof(STRINGLIB_CHAR));
74
                if (candidate == NULL)
75
                    return -1;
76
                s1 = p;
77
                p = (const STRINGLIB_CHAR *)
78
                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
79
                if (*p == ch)
80
                    return (p - s);
81
                /* False positive */
82
                p++;
83
                if (p - s1 > MEMCHR_CUT_OFF)
84
                    continue;
85
                if (e - p <= MEMCHR_CUT_OFF)
86
                    break;
87
                e1 = p + MEMCHR_CUT_OFF;
88
                while (p != e1) {
89
                    if (*p == ch)
90
                        return (p - s);
91
                    p++;
92
                }
93
            }
94
            while (e - p > MEMCHR_CUT_OFF);
95
        }
96
#endif
97
71.3M
    }
98
243k
    while (p < e) {
99
213k
        if (*p == ch)
100
27.1k
            return (p - s);
101
186k
        p++;
102
186k
    }
103
30.8k
    return -1;
104
57.9k
}
unicodeobject.c:asciilib_find_char
Line
Count
Source
51
16.6M
{
52
16.6M
    const STRINGLIB_CHAR *p, *e;
53
54
16.6M
    p = s;
55
16.6M
    e = s + n;
56
16.6M
    if (n > MEMCHR_CUT_OFF) {
57
12.3M
#ifdef STRINGLIB_FAST_MEMCHR
58
12.3M
        p = STRINGLIB_FAST_MEMCHR(s, ch, n);
59
12.3M
        if (p != NULL)
60
11.7M
            return (p - s);
61
616k
        return -1;
62
#else
63
        /* use memchr if we can choose a needle without too many likely
64
           false positives */
65
        const STRINGLIB_CHAR *s1, *e1;
66
        unsigned char needle = ch & 0xff;
67
        /* If looking for a multiple of 256, we'd have too
68
           many false positives looking for the '\0' byte in UCS2
69
           and UCS4 representations. */
70
        if (needle != 0) {
71
            do {
72
                void *candidate = memchr(p, needle,
73
                                         (e - p) * sizeof(STRINGLIB_CHAR));
74
                if (candidate == NULL)
75
                    return -1;
76
                s1 = p;
77
                p = (const STRINGLIB_CHAR *)
78
                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
79
                if (*p == ch)
80
                    return (p - s);
81
                /* False positive */
82
                p++;
83
                if (p - s1 > MEMCHR_CUT_OFF)
84
                    continue;
85
                if (e - p <= MEMCHR_CUT_OFF)
86
                    break;
87
                e1 = p + MEMCHR_CUT_OFF;
88
                while (p != e1) {
89
                    if (*p == ch)
90
                        return (p - s);
91
                    p++;
92
                }
93
            }
94
            while (e - p > MEMCHR_CUT_OFF);
95
        }
96
#endif
97
12.3M
    }
98
12.7M
    while (p < e) {
99
12.6M
        if (*p == ch)
100
4.25M
            return (p - s);
101
8.44M
        p++;
102
8.44M
    }
103
58.2k
    return -1;
104
4.31M
}
bytes_methods.c:stringlib_find_char
Line
Count
Source
51
1.59k
{
52
1.59k
    const STRINGLIB_CHAR *p, *e;
53
54
1.59k
    p = s;
55
1.59k
    e = s + n;
56
1.59k
    if (n > MEMCHR_CUT_OFF) {
57
1.59k
#ifdef STRINGLIB_FAST_MEMCHR
58
1.59k
        p = STRINGLIB_FAST_MEMCHR(s, ch, n);
59
1.59k
        if (p != NULL)
60
1.35k
            return (p - s);
61
242
        return -1;
62
#else
63
        /* use memchr if we can choose a needle without too many likely
64
           false positives */
65
        const STRINGLIB_CHAR *s1, *e1;
66
        unsigned char needle = ch & 0xff;
67
        /* If looking for a multiple of 256, we'd have too
68
           many false positives looking for the '\0' byte in UCS2
69
           and UCS4 representations. */
70
        if (needle != 0) {
71
            do {
72
                void *candidate = memchr(p, needle,
73
                                         (e - p) * sizeof(STRINGLIB_CHAR));
74
                if (candidate == NULL)
75
                    return -1;
76
                s1 = p;
77
                p = (const STRINGLIB_CHAR *)
78
                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
79
                if (*p == ch)
80
                    return (p - s);
81
                /* False positive */
82
                p++;
83
                if (p - s1 > MEMCHR_CUT_OFF)
84
                    continue;
85
                if (e - p <= MEMCHR_CUT_OFF)
86
                    break;
87
                e1 = p + MEMCHR_CUT_OFF;
88
                while (p != e1) {
89
                    if (*p == ch)
90
                        return (p - s);
91
                    p++;
92
                }
93
            }
94
            while (e - p > MEMCHR_CUT_OFF);
95
        }
96
#endif
97
1.59k
    }
98
0
    while (p < e) {
99
0
        if (*p == ch)
100
0
            return (p - s);
101
0
        p++;
102
0
    }
103
0
    return -1;
104
0
}
Unexecuted instantiation: bytearrayobject.c:stringlib_find_char
105
106
#undef MEMCHR_CUT_OFF
107
108
#if STRINGLIB_SIZEOF_CHAR == 1
109
35.3k
#  define MEMRCHR_CUT_OFF 15
110
#else
111
224k
#  define MEMRCHR_CUT_OFF 40
112
#endif
113
114
115
Py_LOCAL_INLINE(Py_ssize_t)
116
STRINGLIB(rfind_char)(const STRINGLIB_CHAR* s, Py_ssize_t n, STRINGLIB_CHAR ch)
117
221k
{
118
221k
    const STRINGLIB_CHAR *p;
119
221k
#ifdef HAVE_MEMRCHR
120
    /* memrchr() is a GNU extension, available since glibc 2.1.91.  it
121
       doesn't seem as optimized as memchr(), but is still quite
122
       faster than our hand-written loop below. There is no wmemrchr
123
       for 4-byte chars. */
124
125
221k
    if (n > MEMRCHR_CUT_OFF) {
126
#if STRINGLIB_SIZEOF_CHAR == 1
127
        p = memrchr(s, ch, n);
128
9.60k
        if (p != NULL)
129
6.81k
            return (p - s);
130
2.78k
        return -1;
131
#else
132
        /* use memrchr if we can choose a needle without too many likely
133
           false positives */
134
        const STRINGLIB_CHAR *s1;
135
        Py_ssize_t n1;
136
        unsigned char needle = ch & 0xff;
137
        /* If looking for a multiple of 256, we'd have too
138
           many false positives looking for the '\0' byte in UCS2
139
           and UCS4 representations. */
140
94.6k
        if (needle != 0) {
141
102k
            do {
142
102k
                void *candidate = memrchr(s, needle,
143
102k
                                          n * sizeof(STRINGLIB_CHAR));
144
102k
                if (candidate == NULL)
145
597
                    return -1;
146
102k
                n1 = n;
147
102k
                p = (const STRINGLIB_CHAR *)
148
102k
                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
149
102k
                n = p - s;
150
102k
                if (*p == ch)
151
89.1k
                    return n;
152
                /* False positive */
153
13.1k
                if (n1 - n > MEMRCHR_CUT_OFF)
154
4.95k
                    continue;
155
8.15k
                if (n <= MEMRCHR_CUT_OFF)
156
1.26k
                    break;
157
6.89k
                s1 = p - MEMRCHR_CUT_OFF;
158
235k
                while (p > s1) {
159
230k
                    p--;
160
230k
                    if (*p == ch)
161
1.89k
                        return (p - s);
162
230k
                }
163
5.00k
                n = p - s;
164
5.00k
            }
165
94.6k
            while (n > MEMRCHR_CUT_OFF);
166
94.6k
        }
167
#endif
168
104k
    }
169
120k
#endif  /* HAVE_MEMRCHR */
170
120k
    p = s + n;
171
549k
    while (p > s) {
172
532k
        p--;
173
532k
        if (*p == ch)
174
103k
            return (p - s);
175
532k
    }
176
16.5k
    return -1;
177
120k
}
Unexecuted instantiation: bytesobject.c:stringlib_rfind_char
unicodeobject.c:ucs1lib_rfind_char
Line
Count
Source
117
8.03k
{
118
8.03k
    const STRINGLIB_CHAR *p;
119
8.03k
#ifdef HAVE_MEMRCHR
120
    /* memrchr() is a GNU extension, available since glibc 2.1.91.  it
121
       doesn't seem as optimized as memchr(), but is still quite
122
       faster than our hand-written loop below. There is no wmemrchr
123
       for 4-byte chars. */
124
125
8.03k
    if (n > MEMRCHR_CUT_OFF) {
126
3.86k
#if STRINGLIB_SIZEOF_CHAR == 1
127
3.86k
        p = memrchr(s, ch, n);
128
3.86k
        if (p != NULL)
129
2.93k
            return (p - s);
130
927
        return -1;
131
#else
132
        /* use memrchr if we can choose a needle without too many likely
133
           false positives */
134
        const STRINGLIB_CHAR *s1;
135
        Py_ssize_t n1;
136
        unsigned char needle = ch & 0xff;
137
        /* If looking for a multiple of 256, we'd have too
138
           many false positives looking for the '\0' byte in UCS2
139
           and UCS4 representations. */
140
        if (needle != 0) {
141
            do {
142
                void *candidate = memrchr(s, needle,
143
                                          n * sizeof(STRINGLIB_CHAR));
144
                if (candidate == NULL)
145
                    return -1;
146
                n1 = n;
147
                p = (const STRINGLIB_CHAR *)
148
                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
149
                n = p - s;
150
                if (*p == ch)
151
                    return n;
152
                /* False positive */
153
                if (n1 - n > MEMRCHR_CUT_OFF)
154
                    continue;
155
                if (n <= MEMRCHR_CUT_OFF)
156
                    break;
157
                s1 = p - MEMRCHR_CUT_OFF;
158
                while (p > s1) {
159
                    p--;
160
                    if (*p == ch)
161
                        return (p - s);
162
                }
163
                n = p - s;
164
            }
165
            while (n > MEMRCHR_CUT_OFF);
166
        }
167
#endif
168
3.86k
    }
169
4.17k
#endif  /* HAVE_MEMRCHR */
170
4.17k
    p = s + n;
171
12.9k
    while (p > s) {
172
12.1k
        p--;
173
12.1k
        if (*p == ch)
174
3.45k
            return (p - s);
175
12.1k
    }
176
714
    return -1;
177
4.17k
}
unicodeobject.c:ucs2lib_rfind_char
Line
Count
Source
117
35.1k
{
118
35.1k
    const STRINGLIB_CHAR *p;
119
35.1k
#ifdef HAVE_MEMRCHR
120
    /* memrchr() is a GNU extension, available since glibc 2.1.91.  it
121
       doesn't seem as optimized as memchr(), but is still quite
122
       faster than our hand-written loop below. There is no wmemrchr
123
       for 4-byte chars. */
124
125
35.1k
    if (n > MEMRCHR_CUT_OFF) {
126
#if STRINGLIB_SIZEOF_CHAR == 1
127
        p = memrchr(s, ch, n);
128
        if (p != NULL)
129
            return (p - s);
130
        return -1;
131
#else
132
        /* use memrchr if we can choose a needle without too many likely
133
           false positives */
134
14.2k
        const STRINGLIB_CHAR *s1;
135
14.2k
        Py_ssize_t n1;
136
14.2k
        unsigned char needle = ch & 0xff;
137
        /* If looking for a multiple of 256, we'd have too
138
           many false positives looking for the '\0' byte in UCS2
139
           and UCS4 representations. */
140
14.2k
        if (needle != 0) {
141
15.9k
            do {
142
15.9k
                void *candidate = memrchr(s, needle,
143
15.9k
                                          n * sizeof(STRINGLIB_CHAR));
144
15.9k
                if (candidate == NULL)
145
331
                    return -1;
146
15.6k
                n1 = n;
147
15.6k
                p = (const STRINGLIB_CHAR *)
148
15.6k
                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
149
15.6k
                n = p - s;
150
15.6k
                if (*p == ch)
151
12.3k
                    return n;
152
                /* False positive */
153
3.34k
                if (n1 - n > MEMRCHR_CUT_OFF)
154
1.23k
                    continue;
155
2.10k
                if (n <= MEMRCHR_CUT_OFF)
156
419
                    break;
157
1.68k
                s1 = p - MEMRCHR_CUT_OFF;
158
62.9k
                while (p > s1) {
159
61.4k
                    p--;
160
61.4k
                    if (*p == ch)
161
220
                        return (p - s);
162
61.4k
                }
163
1.46k
                n = p - s;
164
1.46k
            }
165
14.2k
            while (n > MEMRCHR_CUT_OFF);
166
14.2k
        }
167
14.2k
#endif
168
14.2k
    }
169
22.2k
#endif  /* HAVE_MEMRCHR */
170
22.2k
    p = s + n;
171
111k
    while (p > s) {
172
109k
        p--;
173
109k
        if (*p == ch)
174
20.6k
            return (p - s);
175
109k
    }
176
1.58k
    return -1;
177
22.2k
}
unicodeobject.c:ucs4lib_rfind_char
Line
Count
Source
117
151k
{
118
151k
    const STRINGLIB_CHAR *p;
119
151k
#ifdef HAVE_MEMRCHR
120
    /* memrchr() is a GNU extension, available since glibc 2.1.91.  it
121
       doesn't seem as optimized as memchr(), but is still quite
122
       faster than our hand-written loop below. There is no wmemrchr
123
       for 4-byte chars. */
124
125
151k
    if (n > MEMRCHR_CUT_OFF) {
126
#if STRINGLIB_SIZEOF_CHAR == 1
127
        p = memrchr(s, ch, n);
128
        if (p != NULL)
129
            return (p - s);
130
        return -1;
131
#else
132
        /* use memrchr if we can choose a needle without too many likely
133
           false positives */
134
80.4k
        const STRINGLIB_CHAR *s1;
135
80.4k
        Py_ssize_t n1;
136
80.4k
        unsigned char needle = ch & 0xff;
137
        /* If looking for a multiple of 256, we'd have too
138
           many false positives looking for the '\0' byte in UCS2
139
           and UCS4 representations. */
140
80.4k
        if (needle != 0) {
141
86.8k
            do {
142
86.8k
                void *candidate = memrchr(s, needle,
143
86.8k
                                          n * sizeof(STRINGLIB_CHAR));
144
86.8k
                if (candidate == NULL)
145
266
                    return -1;
146
86.6k
                n1 = n;
147
86.6k
                p = (const STRINGLIB_CHAR *)
148
86.6k
                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
149
86.6k
                n = p - s;
150
86.6k
                if (*p == ch)
151
76.8k
                    return n;
152
                /* False positive */
153
9.76k
                if (n1 - n > MEMRCHR_CUT_OFF)
154
3.71k
                    continue;
155
6.05k
                if (n <= MEMRCHR_CUT_OFF)
156
844
                    break;
157
5.20k
                s1 = p - MEMRCHR_CUT_OFF;
158
172k
                while (p > s1) {
159
169k
                    p--;
160
169k
                    if (*p == ch)
161
1.67k
                        return (p - s);
162
169k
                }
163
3.53k
                n = p - s;
164
3.53k
            }
165
80.4k
            while (n > MEMRCHR_CUT_OFF);
166
80.4k
        }
167
80.4k
#endif
168
80.4k
    }
169
72.4k
#endif  /* HAVE_MEMRCHR */
170
72.4k
    p = s + n;
171
309k
    while (p > s) {
172
307k
        p--;
173
307k
        if (*p == ch)
174
71.2k
            return (p - s);
175
307k
    }
176
1.22k
    return -1;
177
72.4k
}
unicodeobject.c:asciilib_rfind_char
Line
Count
Source
117
9.02k
{
118
9.02k
    const STRINGLIB_CHAR *p;
119
9.02k
#ifdef HAVE_MEMRCHR
120
    /* memrchr() is a GNU extension, available since glibc 2.1.91.  it
121
       doesn't seem as optimized as memchr(), but is still quite
122
       faster than our hand-written loop below. There is no wmemrchr
123
       for 4-byte chars. */
124
125
9.02k
    if (n > MEMRCHR_CUT_OFF) {
126
3.13k
#if STRINGLIB_SIZEOF_CHAR == 1
127
3.13k
        p = memrchr(s, ch, n);
128
3.13k
        if (p != NULL)
129
3.05k
            return (p - s);
130
80
        return -1;
131
#else
132
        /* use memrchr if we can choose a needle without too many likely
133
           false positives */
134
        const STRINGLIB_CHAR *s1;
135
        Py_ssize_t n1;
136
        unsigned char needle = ch & 0xff;
137
        /* If looking for a multiple of 256, we'd have too
138
           many false positives looking for the '\0' byte in UCS2
139
           and UCS4 representations. */
140
        if (needle != 0) {
141
            do {
142
                void *candidate = memrchr(s, needle,
143
                                          n * sizeof(STRINGLIB_CHAR));
144
                if (candidate == NULL)
145
                    return -1;
146
                n1 = n;
147
                p = (const STRINGLIB_CHAR *)
148
                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
149
                n = p - s;
150
                if (*p == ch)
151
                    return n;
152
                /* False positive */
153
                if (n1 - n > MEMRCHR_CUT_OFF)
154
                    continue;
155
                if (n <= MEMRCHR_CUT_OFF)
156
                    break;
157
                s1 = p - MEMRCHR_CUT_OFF;
158
                while (p > s1) {
159
                    p--;
160
                    if (*p == ch)
161
                        return (p - s);
162
                }
163
                n = p - s;
164
            }
165
            while (n > MEMRCHR_CUT_OFF);
166
        }
167
#endif
168
3.13k
    }
169
5.89k
#endif  /* HAVE_MEMRCHR */
170
5.89k
    p = s + n;
171
33.4k
    while (p > s) {
172
31.4k
        p--;
173
31.4k
        if (*p == ch)
174
3.89k
            return (p - s);
175
31.4k
    }
176
2.00k
    return -1;
177
5.89k
}
bytes_methods.c:stringlib_rfind_char
Line
Count
Source
117
18.2k
{
118
18.2k
    const STRINGLIB_CHAR *p;
119
18.2k
#ifdef HAVE_MEMRCHR
120
    /* memrchr() is a GNU extension, available since glibc 2.1.91.  it
121
       doesn't seem as optimized as memchr(), but is still quite
122
       faster than our hand-written loop below. There is no wmemrchr
123
       for 4-byte chars. */
124
125
18.2k
    if (n > MEMRCHR_CUT_OFF) {
126
2.60k
#if STRINGLIB_SIZEOF_CHAR == 1
127
2.60k
        p = memrchr(s, ch, n);
128
2.60k
        if (p != NULL)
129
828
            return (p - s);
130
1.77k
        return -1;
131
#else
132
        /* use memrchr if we can choose a needle without too many likely
133
           false positives */
134
        const STRINGLIB_CHAR *s1;
135
        Py_ssize_t n1;
136
        unsigned char needle = ch & 0xff;
137
        /* If looking for a multiple of 256, we'd have too
138
           many false positives looking for the '\0' byte in UCS2
139
           and UCS4 representations. */
140
        if (needle != 0) {
141
            do {
142
                void *candidate = memrchr(s, needle,
143
                                          n * sizeof(STRINGLIB_CHAR));
144
                if (candidate == NULL)
145
                    return -1;
146
                n1 = n;
147
                p = (const STRINGLIB_CHAR *)
148
                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
149
                n = p - s;
150
                if (*p == ch)
151
                    return n;
152
                /* False positive */
153
                if (n1 - n > MEMRCHR_CUT_OFF)
154
                    continue;
155
                if (n <= MEMRCHR_CUT_OFF)
156
                    break;
157
                s1 = p - MEMRCHR_CUT_OFF;
158
                while (p > s1) {
159
                    p--;
160
                    if (*p == ch)
161
                        return (p - s);
162
                }
163
                n = p - s;
164
            }
165
            while (n > MEMRCHR_CUT_OFF);
166
        }
167
#endif
168
2.60k
    }
169
15.6k
#endif  /* HAVE_MEMRCHR */
170
15.6k
    p = s + n;
171
82.1k
    while (p > s) {
172
71.1k
        p--;
173
71.1k
        if (*p == ch)
174
4.64k
            return (p - s);
175
71.1k
    }
176
11.0k
    return -1;
177
15.6k
}
Unexecuted instantiation: bytearrayobject.c:stringlib_rfind_char
178
179
#undef MEMRCHR_CUT_OFF
180
181
/* Change to a 1 to see logging comments walk through the algorithm. */
182
#if 0 && STRINGLIB_SIZEOF_CHAR == 1
183
# define LOG(...) printf(__VA_ARGS__)
184
# define LOG_STRING(s, n) printf("\"%.*s\"", (int)(n), s)
185
# define LOG_LINEUP() do {                                         \
186
    LOG("> "); LOG_STRING(haystack, len_haystack); LOG("\n> ");    \
187
    LOG("%*s",(int)(window_last - haystack + 1 - len_needle), ""); \
188
    LOG_STRING(needle, len_needle); LOG("\n");                     \
189
} while(0)
190
#else
191
# define LOG(...)
192
# define LOG_STRING(s, n)
193
# define LOG_LINEUP()
194
#endif
195
196
Py_LOCAL_INLINE(Py_ssize_t)
197
STRINGLIB(_lex_search)(const STRINGLIB_CHAR *needle, Py_ssize_t len_needle,
198
                       Py_ssize_t *return_period, int invert_alphabet)
199
36
{
200
    /* Do a lexicographic search. Essentially this:
201
           >>> max(needle[i:] for i in range(len(needle)+1))
202
       Also find the period of the right half.   */
203
36
    Py_ssize_t max_suffix = 0;
204
36
    Py_ssize_t candidate = 1;
205
36
    Py_ssize_t k = 0;
206
    // The period of the right half.
207
36
    Py_ssize_t period = 1;
208
209
360
    while (candidate + k < len_needle) {
210
        // each loop increases candidate + k + max_suffix
211
324
        STRINGLIB_CHAR a = needle[candidate + k];
212
324
        STRINGLIB_CHAR b = needle[max_suffix + k];
213
        // check if the suffix at candidate is better than max_suffix
214
324
        if (invert_alphabet ? (b < a) : (a < b)) {
215
            // Fell short of max_suffix.
216
            // The next k + 1 characters are non-increasing
217
            // from candidate, so they won't start a maximal suffix.
218
234
            candidate += k + 1;
219
234
            k = 0;
220
            // We've ruled out any period smaller than what's
221
            // been scanned since max_suffix.
222
234
            period = candidate - max_suffix;
223
234
        }
224
90
        else if (a == b) {
225
18
            if (k + 1 != period) {
226
                // Keep scanning the equal strings
227
0
                k++;
228
0
            }
229
18
            else {
230
                // Matched a whole period.
231
                // Start matching the next period.
232
18
                candidate += period;
233
18
                k = 0;
234
18
            }
235
18
        }
236
72
        else {
237
            // Did better than max_suffix, so replace it.
238
72
            max_suffix = candidate;
239
72
            candidate++;
240
72
            k = 0;
241
72
            period = 1;
242
72
        }
243
324
    }
244
36
    *return_period = period;
245
36
    return max_suffix;
246
36
}
Unexecuted instantiation: bytesobject.c:stringlib__lex_search
Unexecuted instantiation: unicodeobject.c:asciilib__lex_search
unicodeobject.c:ucs1lib__lex_search
Line
Count
Source
199
36
{
200
    /* Do a lexicographic search. Essentially this:
201
           >>> max(needle[i:] for i in range(len(needle)+1))
202
       Also find the period of the right half.   */
203
36
    Py_ssize_t max_suffix = 0;
204
36
    Py_ssize_t candidate = 1;
205
36
    Py_ssize_t k = 0;
206
    // The period of the right half.
207
36
    Py_ssize_t period = 1;
208
209
360
    while (candidate + k < len_needle) {
210
        // each loop increases candidate + k + max_suffix
211
324
        STRINGLIB_CHAR a = needle[candidate + k];
212
324
        STRINGLIB_CHAR b = needle[max_suffix + k];
213
        // check if the suffix at candidate is better than max_suffix
214
324
        if (invert_alphabet ? (b < a) : (a < b)) {
215
            // Fell short of max_suffix.
216
            // The next k + 1 characters are non-increasing
217
            // from candidate, so they won't start a maximal suffix.
218
234
            candidate += k + 1;
219
234
            k = 0;
220
            // We've ruled out any period smaller than what's
221
            // been scanned since max_suffix.
222
234
            period = candidate - max_suffix;
223
234
        }
224
90
        else if (a == b) {
225
18
            if (k + 1 != period) {
226
                // Keep scanning the equal strings
227
0
                k++;
228
0
            }
229
18
            else {
230
                // Matched a whole period.
231
                // Start matching the next period.
232
18
                candidate += period;
233
18
                k = 0;
234
18
            }
235
18
        }
236
72
        else {
237
            // Did better than max_suffix, so replace it.
238
72
            max_suffix = candidate;
239
72
            candidate++;
240
72
            k = 0;
241
72
            period = 1;
242
72
        }
243
324
    }
244
36
    *return_period = period;
245
36
    return max_suffix;
246
36
}
Unexecuted instantiation: unicodeobject.c:ucs2lib__lex_search
Unexecuted instantiation: unicodeobject.c:ucs4lib__lex_search
Unexecuted instantiation: bytes_methods.c:stringlib__lex_search
Unexecuted instantiation: bytearrayobject.c:stringlib__lex_search
247
248
Py_LOCAL_INLINE(Py_ssize_t)
249
STRINGLIB(_factorize)(const STRINGLIB_CHAR *needle,
250
                      Py_ssize_t len_needle,
251
                      Py_ssize_t *return_period)
252
18
{
253
    /* Do a "critical factorization", making it so that:
254
       >>> needle = (left := needle[:cut]) + (right := needle[cut:])
255
       where the "local period" of the cut is maximal.
256
257
       The local period of the cut is the minimal length of a string w
258
       such that (left endswith w or w endswith left)
259
       and (right startswith w or w startswith right).
260
261
       The Critical Factorization Theorem says that this maximal local
262
       period is the global period of the string.
263
264
       Crochemore and Perrin (1991) show that this cut can be computed
265
       as the later of two cuts: one that gives a lexicographically
266
       maximal right half, and one that gives the same with the
267
       with respect to a reversed alphabet-ordering.
268
269
       This is what we want to happen:
270
           >>> x = "GCAGAGAG"
271
           >>> cut, period = factorize(x)
272
           >>> x[:cut], (right := x[cut:])
273
           ('GC', 'AGAGAG')
274
           >>> period  # right half period
275
           2
276
           >>> right[period:] == right[:-period]
277
           True
278
279
       This is how the local period lines up in the above example:
280
                GC | AGAGAG
281
           AGAGAGC = AGAGAGC
282
       The length of this minimal repetition is 7, which is indeed the
283
       period of the original string. */
284
285
18
    Py_ssize_t cut1, period1, cut2, period2, cut, period;
286
18
    cut1 = STRINGLIB(_lex_search)(needle, len_needle, &period1, 0);
287
18
    cut2 = STRINGLIB(_lex_search)(needle, len_needle, &period2, 1);
288
289
    // Take the later cut.
290
18
    if (cut1 > cut2) {
291
18
        period = period1;
292
18
        cut = cut1;
293
18
    }
294
0
    else {
295
0
        period = period2;
296
0
        cut = cut2;
297
0
    }
298
299
18
    LOG("split: "); LOG_STRING(needle, cut);
300
18
    LOG(" + "); LOG_STRING(needle + cut, len_needle - cut);
301
18
    LOG("\n");
302
303
18
    *return_period = period;
304
18
    return cut;
305
18
}
Unexecuted instantiation: bytesobject.c:stringlib__factorize
Unexecuted instantiation: unicodeobject.c:asciilib__factorize
unicodeobject.c:ucs1lib__factorize
Line
Count
Source
252
18
{
253
    /* Do a "critical factorization", making it so that:
254
       >>> needle = (left := needle[:cut]) + (right := needle[cut:])
255
       where the "local period" of the cut is maximal.
256
257
       The local period of the cut is the minimal length of a string w
258
       such that (left endswith w or w endswith left)
259
       and (right startswith w or w startswith right).
260
261
       The Critical Factorization Theorem says that this maximal local
262
       period is the global period of the string.
263
264
       Crochemore and Perrin (1991) show that this cut can be computed
265
       as the later of two cuts: one that gives a lexicographically
266
       maximal right half, and one that gives the same with the
267
       with respect to a reversed alphabet-ordering.
268
269
       This is what we want to happen:
270
           >>> x = "GCAGAGAG"
271
           >>> cut, period = factorize(x)
272
           >>> x[:cut], (right := x[cut:])
273
           ('GC', 'AGAGAG')
274
           >>> period  # right half period
275
           2
276
           >>> right[period:] == right[:-period]
277
           True
278
279
       This is how the local period lines up in the above example:
280
                GC | AGAGAG
281
           AGAGAGC = AGAGAGC
282
       The length of this minimal repetition is 7, which is indeed the
283
       period of the original string. */
284
285
18
    Py_ssize_t cut1, period1, cut2, period2, cut, period;
286
18
    cut1 = STRINGLIB(_lex_search)(needle, len_needle, &period1, 0);
287
18
    cut2 = STRINGLIB(_lex_search)(needle, len_needle, &period2, 1);
288
289
    // Take the later cut.
290
18
    if (cut1 > cut2) {
291
18
        period = period1;
292
18
        cut = cut1;
293
18
    }
294
0
    else {
295
0
        period = period2;
296
0
        cut = cut2;
297
0
    }
298
299
18
    LOG("split: "); LOG_STRING(needle, cut);
300
18
    LOG(" + "); LOG_STRING(needle + cut, len_needle - cut);
301
18
    LOG("\n");
302
303
18
    *return_period = period;
304
18
    return cut;
305
18
}
Unexecuted instantiation: unicodeobject.c:ucs2lib__factorize
Unexecuted instantiation: unicodeobject.c:ucs4lib__factorize
Unexecuted instantiation: bytes_methods.c:stringlib__factorize
Unexecuted instantiation: bytearrayobject.c:stringlib__factorize
306
307
308
198
#define SHIFT_TYPE uint8_t
309
#define MAX_SHIFT UINT8_MAX
310
311
64.4k
#define TABLE_SIZE_BITS 6u
312
64.4k
#define TABLE_SIZE (1U << TABLE_SIZE_BITS)
313
63.2k
#define TABLE_MASK (TABLE_SIZE - 1U)
314
315
typedef struct STRINGLIB(_pre) {
316
    const STRINGLIB_CHAR *needle;
317
    Py_ssize_t len_needle;
318
    Py_ssize_t cut;
319
    Py_ssize_t period;
320
    Py_ssize_t gap;
321
    int is_periodic;
322
    SHIFT_TYPE table[TABLE_SIZE];
323
} STRINGLIB(prework);
324
325
326
static void
327
STRINGLIB(_preprocess)(const STRINGLIB_CHAR *needle, Py_ssize_t len_needle,
328
                       STRINGLIB(prework) *p)
329
18
{
330
18
    p->needle = needle;
331
18
    p->len_needle = len_needle;
332
18
    p->cut = STRINGLIB(_factorize)(needle, len_needle, &(p->period));
333
18
    assert(p->period + p->cut <= len_needle);
334
18
    p->is_periodic = (0 == memcmp(needle,
335
18
                                  needle + p->period,
336
18
                                  p->cut * STRINGLIB_SIZEOF_CHAR));
337
18
    if (p->is_periodic) {
338
0
        assert(p->cut <= len_needle/2);
339
0
        assert(p->cut < p->period);
340
0
    }
341
18
    else {
342
        // A lower bound on the period
343
18
        p->period = Py_MAX(p->cut, len_needle - p->cut) + 1;
344
18
    }
345
    // The gap between the last character and the previous
346
    // occurrence of an equivalent character (modulo TABLE_SIZE)
347
18
    p->gap = len_needle;
348
18
    STRINGLIB_CHAR last = needle[len_needle - 1] & TABLE_MASK;
349
126
    for (Py_ssize_t i = len_needle - 2; i >= 0; i--) {
350
126
        STRINGLIB_CHAR x = needle[i] & TABLE_MASK;
351
126
        if (x == last) {
352
18
            p->gap = len_needle - 1 - i;
353
18
            break;
354
18
        }
355
126
    }
356
    // Fill up a compressed Boyer-Moore "Bad Character" table
357
18
    Py_ssize_t not_found_shift = Py_MIN(len_needle, MAX_SHIFT);
358
1.17k
    for (Py_ssize_t i = 0; i < (Py_ssize_t)TABLE_SIZE; i++) {
359
1.15k
        p->table[i] = Py_SAFE_DOWNCAST(not_found_shift,
360
1.15k
                                       Py_ssize_t, SHIFT_TYPE);
361
1.15k
    }
362
198
    for (Py_ssize_t i = len_needle - not_found_shift; i < len_needle; i++) {
363
180
        SHIFT_TYPE shift = Py_SAFE_DOWNCAST(len_needle - 1 - i,
364
180
                                            Py_ssize_t, SHIFT_TYPE);
365
180
        p->table[needle[i] & TABLE_MASK] = shift;
366
180
    }
367
18
}
Unexecuted instantiation: bytesobject.c:stringlib__preprocess
Unexecuted instantiation: unicodeobject.c:asciilib__preprocess
unicodeobject.c:ucs1lib__preprocess
Line
Count
Source
329
18
{
330
18
    p->needle = needle;
331
18
    p->len_needle = len_needle;
332
18
    p->cut = STRINGLIB(_factorize)(needle, len_needle, &(p->period));
333
18
    assert(p->period + p->cut <= len_needle);
334
18
    p->is_periodic = (0 == memcmp(needle,
335
18
                                  needle + p->period,
336
18
                                  p->cut * STRINGLIB_SIZEOF_CHAR));
337
18
    if (p->is_periodic) {
338
0
        assert(p->cut <= len_needle/2);
339
0
        assert(p->cut < p->period);
340
0
    }
341
18
    else {
342
        // A lower bound on the period
343
18
        p->period = Py_MAX(p->cut, len_needle - p->cut) + 1;
344
18
    }
345
    // The gap between the last character and the previous
346
    // occurrence of an equivalent character (modulo TABLE_SIZE)
347
18
    p->gap = len_needle;
348
18
    STRINGLIB_CHAR last = needle[len_needle - 1] & TABLE_MASK;
349
126
    for (Py_ssize_t i = len_needle - 2; i >= 0; i--) {
350
126
        STRINGLIB_CHAR x = needle[i] & TABLE_MASK;
351
126
        if (x == last) {
352
18
            p->gap = len_needle - 1 - i;
353
18
            break;
354
18
        }
355
126
    }
356
    // Fill up a compressed Boyer-Moore "Bad Character" table
357
18
    Py_ssize_t not_found_shift = Py_MIN(len_needle, MAX_SHIFT);
358
1.17k
    for (Py_ssize_t i = 0; i < (Py_ssize_t)TABLE_SIZE; i++) {
359
1.15k
        p->table[i] = Py_SAFE_DOWNCAST(not_found_shift,
360
1.15k
                                       Py_ssize_t, SHIFT_TYPE);
361
1.15k
    }
362
198
    for (Py_ssize_t i = len_needle - not_found_shift; i < len_needle; i++) {
363
180
        SHIFT_TYPE shift = Py_SAFE_DOWNCAST(len_needle - 1 - i,
364
180
                                            Py_ssize_t, SHIFT_TYPE);
365
180
        p->table[needle[i] & TABLE_MASK] = shift;
366
180
    }
367
18
}
Unexecuted instantiation: unicodeobject.c:ucs2lib__preprocess
Unexecuted instantiation: unicodeobject.c:ucs4lib__preprocess
Unexecuted instantiation: bytes_methods.c:stringlib__preprocess
Unexecuted instantiation: bytearrayobject.c:stringlib__preprocess
368
369
static Py_ssize_t
370
STRINGLIB(_two_way)(const STRINGLIB_CHAR *haystack, Py_ssize_t len_haystack,
371
                    STRINGLIB(prework) *p)
372
18
{
373
    // Crochemore and Perrin's (1991) Two-Way algorithm.
374
    // See http://www-igm.univ-mlv.fr/~lecroq/string/node26.html#SECTION00260
375
18
    const Py_ssize_t len_needle = p->len_needle;
376
18
    const Py_ssize_t cut = p->cut;
377
18
    Py_ssize_t period = p->period;
378
18
    const STRINGLIB_CHAR *const needle = p->needle;
379
18
    const STRINGLIB_CHAR *window_last = haystack + len_needle - 1;
380
18
    const STRINGLIB_CHAR *const haystack_end = haystack + len_haystack;
381
18
    SHIFT_TYPE *table = p->table;
382
18
    const STRINGLIB_CHAR *window;
383
18
    LOG("===== Two-way: \"%s\" in \"%s\". =====\n", needle, haystack);
384
385
18
    Py_ssize_t gap = p->gap;
386
18
    Py_ssize_t gap_jump_end = Py_MIN(len_needle, cut + gap);
387
18
    if (p->is_periodic) {
388
0
        LOG("Needle is periodic.\n");
389
0
        Py_ssize_t memory = 0;
390
0
      periodicwindowloop:
391
0
        while (window_last < haystack_end) {
392
0
            assert(memory == 0);
393
0
            for (;;) {
394
0
                LOG_LINEUP();
395
0
                Py_ssize_t shift = table[(*window_last) & TABLE_MASK];
396
0
                window_last += shift;
397
0
                if (shift == 0) {
398
0
                    break;
399
0
                }
400
0
                if (window_last >= haystack_end) {
401
0
                    return -1;
402
0
                }
403
0
                LOG("Horspool skip\n");
404
0
            }
405
0
          no_shift:
406
0
            window = window_last - len_needle + 1;
407
0
            assert((window[len_needle - 1] & TABLE_MASK) ==
408
0
                   (needle[len_needle - 1] & TABLE_MASK));
409
0
            Py_ssize_t i = Py_MAX(cut, memory);
410
0
            for (; i < len_needle; i++) {
411
0
                if (needle[i] != window[i]) {
412
0
                    if (i < gap_jump_end) {
413
0
                        LOG("Early right half mismatch: jump by gap.\n");
414
0
                        assert(gap >= i - cut + 1);
415
0
                        window_last += gap;
416
0
                    }
417
0
                    else {
418
0
                        LOG("Late right half mismatch: jump by n (>gap)\n");
419
0
                        assert(i - cut + 1 > gap);
420
0
                        window_last += i - cut + 1;
421
0
                    }
422
0
                    memory = 0;
423
0
                    goto periodicwindowloop;
424
0
                }
425
0
            }
426
0
            for (i = memory; i < cut; i++) {
427
0
                if (needle[i] != window[i]) {
428
0
                    LOG("Left half does not match.\n");
429
0
                    window_last += period;
430
0
                    memory = len_needle - period;
431
0
                    if (window_last >= haystack_end) {
432
0
                        return -1;
433
0
                    }
434
0
                    Py_ssize_t shift = table[(*window_last) & TABLE_MASK];
435
0
                    if (shift) {
436
                        // A mismatch has been identified to the right
437
                        // of where i will next start, so we can jump
438
                        // at least as far as if the mismatch occurred
439
                        // on the first comparison.
440
0
                        Py_ssize_t mem_jump = Py_MAX(cut, memory) - cut + 1;
441
0
                        LOG("Skip with Memory.\n");
442
0
                        memory = 0;
443
0
                        window_last += Py_MAX(shift, mem_jump);
444
0
                        goto periodicwindowloop;
445
0
                    }
446
0
                    goto no_shift;
447
0
                }
448
0
            }
449
0
            LOG("Found a match!\n");
450
0
            return window - haystack;
451
0
        }
452
0
    }
453
18
    else {
454
18
        period = Py_MAX(gap, period);
455
18
        LOG("Needle is not periodic.\n");
456
10.6k
      windowloop:
457
10.6k
        while (window_last < haystack_end) {
458
62.9k
            for (;;) {
459
62.9k
                LOG_LINEUP();
460
62.9k
                Py_ssize_t shift = table[(*window_last) & TABLE_MASK];
461
62.9k
                window_last += shift;
462
62.9k
                if (shift == 0) {
463
10.6k
                    break;
464
10.6k
                }
465
52.3k
                if (window_last >= haystack_end) {
466
15
                    return -1;
467
15
                }
468
52.3k
                LOG("Horspool skip\n");
469
52.3k
            }
470
10.6k
            window = window_last - len_needle + 1;
471
10.6k
            assert((window[len_needle - 1] & TABLE_MASK) ==
472
10.6k
                   (needle[len_needle - 1] & TABLE_MASK));
473
10.6k
            Py_ssize_t i = cut;
474
10.8k
            for (; i < len_needle; i++) {
475
10.7k
                if (needle[i] != window[i]) {
476
10.5k
                    if (i < gap_jump_end) {
477
10.5k
                        LOG("Early right half mismatch: jump by gap.\n");
478
10.5k
                        assert(gap >= i - cut + 1);
479
10.5k
                        window_last += gap;
480
10.5k
                    }
481
0
                    else {
482
0
                        LOG("Late right half mismatch: jump by n (>gap)\n");
483
0
                        assert(i - cut + 1 > gap);
484
0
                        window_last += i - cut + 1;
485
0
                    }
486
10.5k
                    goto windowloop;
487
10.5k
                }
488
10.7k
            }
489
98
            for (Py_ssize_t i = 0; i < cut; i++) {
490
96
                if (needle[i] != window[i]) {
491
80
                    LOG("Left half does not match.\n");
492
80
                    window_last += period;
493
80
                    goto windowloop;
494
80
                }
495
96
            }
496
2
            LOG("Found a match!\n");
497
2
            return window - haystack;
498
82
        }
499
10.6k
    }
500
1
    LOG("Not found. Returning -1.\n");
501
1
    return -1;
502
18
}
Unexecuted instantiation: bytesobject.c:stringlib__two_way
Unexecuted instantiation: unicodeobject.c:asciilib__two_way
unicodeobject.c:ucs1lib__two_way
Line
Count
Source
372
18
{
373
    // Crochemore and Perrin's (1991) Two-Way algorithm.
374
    // See http://www-igm.univ-mlv.fr/~lecroq/string/node26.html#SECTION00260
375
18
    const Py_ssize_t len_needle = p->len_needle;
376
18
    const Py_ssize_t cut = p->cut;
377
18
    Py_ssize_t period = p->period;
378
18
    const STRINGLIB_CHAR *const needle = p->needle;
379
18
    const STRINGLIB_CHAR *window_last = haystack + len_needle - 1;
380
18
    const STRINGLIB_CHAR *const haystack_end = haystack + len_haystack;
381
18
    SHIFT_TYPE *table = p->table;
382
18
    const STRINGLIB_CHAR *window;
383
18
    LOG("===== Two-way: \"%s\" in \"%s\". =====\n", needle, haystack);
384
385
18
    Py_ssize_t gap = p->gap;
386
18
    Py_ssize_t gap_jump_end = Py_MIN(len_needle, cut + gap);
387
18
    if (p->is_periodic) {
388
0
        LOG("Needle is periodic.\n");
389
0
        Py_ssize_t memory = 0;
390
0
      periodicwindowloop:
391
0
        while (window_last < haystack_end) {
392
0
            assert(memory == 0);
393
0
            for (;;) {
394
0
                LOG_LINEUP();
395
0
                Py_ssize_t shift = table[(*window_last) & TABLE_MASK];
396
0
                window_last += shift;
397
0
                if (shift == 0) {
398
0
                    break;
399
0
                }
400
0
                if (window_last >= haystack_end) {
401
0
                    return -1;
402
0
                }
403
0
                LOG("Horspool skip\n");
404
0
            }
405
0
          no_shift:
406
0
            window = window_last - len_needle + 1;
407
0
            assert((window[len_needle - 1] & TABLE_MASK) ==
408
0
                   (needle[len_needle - 1] & TABLE_MASK));
409
0
            Py_ssize_t i = Py_MAX(cut, memory);
410
0
            for (; i < len_needle; i++) {
411
0
                if (needle[i] != window[i]) {
412
0
                    if (i < gap_jump_end) {
413
0
                        LOG("Early right half mismatch: jump by gap.\n");
414
0
                        assert(gap >= i - cut + 1);
415
0
                        window_last += gap;
416
0
                    }
417
0
                    else {
418
0
                        LOG("Late right half mismatch: jump by n (>gap)\n");
419
0
                        assert(i - cut + 1 > gap);
420
0
                        window_last += i - cut + 1;
421
0
                    }
422
0
                    memory = 0;
423
0
                    goto periodicwindowloop;
424
0
                }
425
0
            }
426
0
            for (i = memory; i < cut; i++) {
427
0
                if (needle[i] != window[i]) {
428
0
                    LOG("Left half does not match.\n");
429
0
                    window_last += period;
430
0
                    memory = len_needle - period;
431
0
                    if (window_last >= haystack_end) {
432
0
                        return -1;
433
0
                    }
434
0
                    Py_ssize_t shift = table[(*window_last) & TABLE_MASK];
435
0
                    if (shift) {
436
                        // A mismatch has been identified to the right
437
                        // of where i will next start, so we can jump
438
                        // at least as far as if the mismatch occurred
439
                        // on the first comparison.
440
0
                        Py_ssize_t mem_jump = Py_MAX(cut, memory) - cut + 1;
441
0
                        LOG("Skip with Memory.\n");
442
0
                        memory = 0;
443
0
                        window_last += Py_MAX(shift, mem_jump);
444
0
                        goto periodicwindowloop;
445
0
                    }
446
0
                    goto no_shift;
447
0
                }
448
0
            }
449
0
            LOG("Found a match!\n");
450
0
            return window - haystack;
451
0
        }
452
0
    }
453
18
    else {
454
18
        period = Py_MAX(gap, period);
455
18
        LOG("Needle is not periodic.\n");
456
10.6k
      windowloop:
457
10.6k
        while (window_last < haystack_end) {
458
62.9k
            for (;;) {
459
62.9k
                LOG_LINEUP();
460
62.9k
                Py_ssize_t shift = table[(*window_last) & TABLE_MASK];
461
62.9k
                window_last += shift;
462
62.9k
                if (shift == 0) {
463
10.6k
                    break;
464
10.6k
                }
465
52.3k
                if (window_last >= haystack_end) {
466
15
                    return -1;
467
15
                }
468
52.3k
                LOG("Horspool skip\n");
469
52.3k
            }
470
10.6k
            window = window_last - len_needle + 1;
471
10.6k
            assert((window[len_needle - 1] & TABLE_MASK) ==
472
10.6k
                   (needle[len_needle - 1] & TABLE_MASK));
473
10.6k
            Py_ssize_t i = cut;
474
10.8k
            for (; i < len_needle; i++) {
475
10.7k
                if (needle[i] != window[i]) {
476
10.5k
                    if (i < gap_jump_end) {
477
10.5k
                        LOG("Early right half mismatch: jump by gap.\n");
478
10.5k
                        assert(gap >= i - cut + 1);
479
10.5k
                        window_last += gap;
480
10.5k
                    }
481
0
                    else {
482
0
                        LOG("Late right half mismatch: jump by n (>gap)\n");
483
0
                        assert(i - cut + 1 > gap);
484
0
                        window_last += i - cut + 1;
485
0
                    }
486
10.5k
                    goto windowloop;
487
10.5k
                }
488
10.7k
            }
489
98
            for (Py_ssize_t i = 0; i < cut; i++) {
490
96
                if (needle[i] != window[i]) {
491
80
                    LOG("Left half does not match.\n");
492
80
                    window_last += period;
493
80
                    goto windowloop;
494
80
                }
495
96
            }
496
2
            LOG("Found a match!\n");
497
2
            return window - haystack;
498
82
        }
499
10.6k
    }
500
1
    LOG("Not found. Returning -1.\n");
501
1
    return -1;
502
18
}
Unexecuted instantiation: unicodeobject.c:ucs2lib__two_way
Unexecuted instantiation: unicodeobject.c:ucs4lib__two_way
Unexecuted instantiation: bytes_methods.c:stringlib__two_way
Unexecuted instantiation: bytearrayobject.c:stringlib__two_way
503
504
505
static Py_ssize_t
506
STRINGLIB(_two_way_find)(const STRINGLIB_CHAR *haystack,
507
                         Py_ssize_t len_haystack,
508
                         const STRINGLIB_CHAR *needle,
509
                         Py_ssize_t len_needle)
510
18
{
511
18
    LOG("###### Finding \"%s\" in \"%s\".\n", needle, haystack);
512
18
    STRINGLIB(prework) p;
513
18
    STRINGLIB(_preprocess)(needle, len_needle, &p);
514
18
    return STRINGLIB(_two_way)(haystack, len_haystack, &p);
515
18
}
Unexecuted instantiation: bytesobject.c:stringlib__two_way_find
Unexecuted instantiation: unicodeobject.c:asciilib__two_way_find
unicodeobject.c:ucs1lib__two_way_find
Line
Count
Source
510
18
{
511
18
    LOG("###### Finding \"%s\" in \"%s\".\n", needle, haystack);
512
18
    STRINGLIB(prework) p;
513
18
    STRINGLIB(_preprocess)(needle, len_needle, &p);
514
18
    return STRINGLIB(_two_way)(haystack, len_haystack, &p);
515
18
}
Unexecuted instantiation: unicodeobject.c:ucs2lib__two_way_find
Unexecuted instantiation: unicodeobject.c:ucs4lib__two_way_find
Unexecuted instantiation: bytes_methods.c:stringlib__two_way_find
Unexecuted instantiation: bytearrayobject.c:stringlib__two_way_find
516
517
518
static Py_ssize_t
519
STRINGLIB(_two_way_count)(const STRINGLIB_CHAR *haystack,
520
                          Py_ssize_t len_haystack,
521
                          const STRINGLIB_CHAR *needle,
522
                          Py_ssize_t len_needle,
523
                          Py_ssize_t maxcount)
524
0
{
525
0
    LOG("###### Counting \"%s\" in \"%s\".\n", needle, haystack);
526
0
    STRINGLIB(prework) p;
527
0
    STRINGLIB(_preprocess)(needle, len_needle, &p);
528
0
    Py_ssize_t index = 0, count = 0;
529
0
    while (1) {
530
0
        Py_ssize_t result;
531
0
        result = STRINGLIB(_two_way)(haystack + index,
532
0
                                     len_haystack - index, &p);
533
0
        if (result == -1) {
534
0
            return count;
535
0
        }
536
0
        count++;
537
0
        if (count == maxcount) {
538
0
            return maxcount;
539
0
        }
540
0
        index += result + len_needle;
541
0
    }
542
0
    return count;
543
0
}
Unexecuted instantiation: bytesobject.c:stringlib__two_way_count
Unexecuted instantiation: unicodeobject.c:asciilib__two_way_count
Unexecuted instantiation: unicodeobject.c:ucs1lib__two_way_count
Unexecuted instantiation: unicodeobject.c:ucs2lib__two_way_count
Unexecuted instantiation: unicodeobject.c:ucs4lib__two_way_count
Unexecuted instantiation: bytes_methods.c:stringlib__two_way_count
Unexecuted instantiation: bytearrayobject.c:stringlib__two_way_count
544
545
#undef SHIFT_TYPE
546
#undef NOT_FOUND
547
#undef SHIFT_OVERFLOW
548
#undef TABLE_SIZE_BITS
549
#undef TABLE_SIZE
550
#undef TABLE_MASK
551
552
#undef LOG
553
#undef LOG_STRING
554
#undef LOG_LINEUP
555
556
static inline Py_ssize_t
557
STRINGLIB(default_find)(const STRINGLIB_CHAR* s, Py_ssize_t n,
558
                        const STRINGLIB_CHAR* p, Py_ssize_t m,
559
                        Py_ssize_t maxcount, int mode)
560
14.0M
{
561
14.0M
    const Py_ssize_t w = n - m;
562
14.0M
    Py_ssize_t mlast = m - 1, count = 0;
563
14.0M
    Py_ssize_t gap = mlast;
564
14.0M
    const STRINGLIB_CHAR last = p[mlast];
565
14.0M
    const STRINGLIB_CHAR *const ss = &s[mlast];
566
567
14.0M
    unsigned long mask = 0;
568
28.0M
    for (Py_ssize_t i = 0; i < mlast; i++) {
569
14.0M
        STRINGLIB_BLOOM_ADD(mask, p[i]);
570
14.0M
        if (p[i] == last) {
571
297k
            gap = mlast - i - 1;
572
297k
        }
573
14.0M
    }
574
14.0M
    STRINGLIB_BLOOM_ADD(mask, last);
575
576
4.93G
    for (Py_ssize_t i = 0; i <= w; i++) {
577
4.92G
        if (ss[i] == last) {
578
            /* candidate match */
579
61.0M
            Py_ssize_t j;
580
81.7M
            for (j = 0; j < mlast; j++) {
581
61.1M
                if (s[i+j] != p[j]) {
582
40.4M
                    break;
583
40.4M
                }
584
61.1M
            }
585
61.0M
            if (j == mlast) {
586
                /* got a match! */
587
20.6M
                if (mode != FAST_COUNT) {
588
10.3M
                    return i;
589
10.3M
                }
590
10.2M
                count++;
591
10.2M
                if (count == maxcount) {
592
0
                    return maxcount;
593
0
                }
594
10.2M
                i = i + mlast;
595
10.2M
                continue;
596
10.2M
            }
597
            /* miss: check if next character is part of pattern */
598
40.4M
            if (!STRINGLIB_BLOOM(mask, ss[i+1])) {
599
7.02M
                i = i + m;
600
7.02M
            }
601
33.4M
            else {
602
33.4M
                i = i + gap;
603
33.4M
            }
604
40.4M
        }
605
4.86G
        else {
606
            /* skip: check if next character is part of pattern */
607
4.86G
            if (!STRINGLIB_BLOOM(mask, ss[i+1])) {
608
4.81G
                i = i + m;
609
4.81G
            }
610
4.86G
        }
611
4.92G
    }
612
3.64M
    return mode == FAST_COUNT ? count : -1;
613
14.0M
}
Unexecuted instantiation: bytesobject.c:stringlib_default_find
unicodeobject.c:asciilib_default_find
Line
Count
Source
560
1.62M
{
561
1.62M
    const Py_ssize_t w = n - m;
562
1.62M
    Py_ssize_t mlast = m - 1, count = 0;
563
1.62M
    Py_ssize_t gap = mlast;
564
1.62M
    const STRINGLIB_CHAR last = p[mlast];
565
1.62M
    const STRINGLIB_CHAR *const ss = &s[mlast];
566
567
1.62M
    unsigned long mask = 0;
568
3.25M
    for (Py_ssize_t i = 0; i < mlast; i++) {
569
1.62M
        STRINGLIB_BLOOM_ADD(mask, p[i]);
570
1.62M
        if (p[i] == last) {
571
3.52k
            gap = mlast - i - 1;
572
3.52k
        }
573
1.62M
    }
574
1.62M
    STRINGLIB_BLOOM_ADD(mask, last);
575
576
137M
    for (Py_ssize_t i = 0; i <= w; i++) {
577
137M
        if (ss[i] == last) {
578
            /* candidate match */
579
3.52M
            Py_ssize_t j;
580
5.11M
            for (j = 0; j < mlast; j++) {
581
3.52M
                if (s[i+j] != p[j]) {
582
1.93M
                    break;
583
1.93M
                }
584
3.52M
            }
585
3.52M
            if (j == mlast) {
586
                /* got a match! */
587
1.59M
                if (mode != FAST_COUNT) {
588
1.59M
                    return i;
589
1.59M
                }
590
0
                count++;
591
0
                if (count == maxcount) {
592
0
                    return maxcount;
593
0
                }
594
0
                i = i + mlast;
595
0
                continue;
596
0
            }
597
            /* miss: check if next character is part of pattern */
598
1.93M
            if (!STRINGLIB_BLOOM(mask, ss[i+1])) {
599
24.3k
                i = i + m;
600
24.3k
            }
601
1.90M
            else {
602
1.90M
                i = i + gap;
603
1.90M
            }
604
1.93M
        }
605
134M
        else {
606
            /* skip: check if next character is part of pattern */
607
134M
            if (!STRINGLIB_BLOOM(mask, ss[i+1])) {
608
128M
                i = i + m;
609
128M
            }
610
134M
        }
611
137M
    }
612
34.8k
    return mode == FAST_COUNT ? count : -1;
613
1.62M
}
unicodeobject.c:ucs1lib_default_find
Line
Count
Source
560
4.70M
{
561
4.70M
    const Py_ssize_t w = n - m;
562
4.70M
    Py_ssize_t mlast = m - 1, count = 0;
563
4.70M
    Py_ssize_t gap = mlast;
564
4.70M
    const STRINGLIB_CHAR last = p[mlast];
565
4.70M
    const STRINGLIB_CHAR *const ss = &s[mlast];
566
567
4.70M
    unsigned long mask = 0;
568
9.45M
    for (Py_ssize_t i = 0; i < mlast; i++) {
569
4.75M
        STRINGLIB_BLOOM_ADD(mask, p[i]);
570
4.75M
        if (p[i] == last) {
571
238k
            gap = mlast - i - 1;
572
238k
        }
573
4.75M
    }
574
4.70M
    STRINGLIB_BLOOM_ADD(mask, last);
575
576
2.61G
    for (Py_ssize_t i = 0; i <= w; i++) {
577
2.60G
        if (ss[i] == last) {
578
            /* candidate match */
579
18.6M
            Py_ssize_t j;
580
22.7M
            for (j = 0; j < mlast; j++) {
581
18.6M
                if (s[i+j] != p[j]) {
582
14.5M
                    break;
583
14.5M
                }
584
18.6M
            }
585
18.6M
            if (j == mlast) {
586
                /* got a match! */
587
4.07M
                if (mode != FAST_COUNT) {
588
1.24M
                    return i;
589
1.24M
                }
590
2.82M
                count++;
591
2.82M
                if (count == maxcount) {
592
0
                    return maxcount;
593
0
                }
594
2.82M
                i = i + mlast;
595
2.82M
                continue;
596
2.82M
            }
597
            /* miss: check if next character is part of pattern */
598
14.5M
            if (!STRINGLIB_BLOOM(mask, ss[i+1])) {
599
3.43M
                i = i + m;
600
3.43M
            }
601
11.1M
            else {
602
11.1M
                i = i + gap;
603
11.1M
            }
604
14.5M
        }
605
2.59G
        else {
606
            /* skip: check if next character is part of pattern */
607
2.59G
            if (!STRINGLIB_BLOOM(mask, ss[i+1])) {
608
2.57G
                i = i + m;
609
2.57G
            }
610
2.59G
        }
611
2.60G
    }
612
3.45M
    return mode == FAST_COUNT ? count : -1;
613
4.70M
}
unicodeobject.c:ucs2lib_default_find
Line
Count
Source
560
3.12M
{
561
3.12M
    const Py_ssize_t w = n - m;
562
3.12M
    Py_ssize_t mlast = m - 1, count = 0;
563
3.12M
    Py_ssize_t gap = mlast;
564
3.12M
    const STRINGLIB_CHAR last = p[mlast];
565
3.12M
    const STRINGLIB_CHAR *const ss = &s[mlast];
566
567
3.12M
    unsigned long mask = 0;
568
6.26M
    for (Py_ssize_t i = 0; i < mlast; i++) {
569
3.14M
        STRINGLIB_BLOOM_ADD(mask, p[i]);
570
3.14M
        if (p[i] == last) {
571
32.2k
            gap = mlast - i - 1;
572
32.2k
        }
573
3.14M
    }
574
3.12M
    STRINGLIB_BLOOM_ADD(mask, last);
575
576
917M
    for (Py_ssize_t i = 0; i <= w; i++) {
577
917M
        if (ss[i] == last) {
578
            /* candidate match */
579
11.6M
            Py_ssize_t j;
580
17.6M
            for (j = 0; j < mlast; j++) {
581
11.6M
                if (s[i+j] != p[j]) {
582
5.71M
                    break;
583
5.71M
                }
584
11.6M
            }
585
11.6M
            if (j == mlast) {
586
                /* got a match! */
587
5.96M
                if (mode != FAST_COUNT) {
588
3.01M
                    return i;
589
3.01M
                }
590
2.94M
                count++;
591
2.94M
                if (count == maxcount) {
592
0
                    return maxcount;
593
0
                }
594
2.94M
                i = i + mlast;
595
2.94M
                continue;
596
2.94M
            }
597
            /* miss: check if next character is part of pattern */
598
5.71M
            if (!STRINGLIB_BLOOM(mask, ss[i+1])) {
599
1.59M
                i = i + m;
600
1.59M
            }
601
4.12M
            else {
602
4.12M
                i = i + gap;
603
4.12M
            }
604
5.71M
        }
605
906M
        else {
606
            /* skip: check if next character is part of pattern */
607
906M
            if (!STRINGLIB_BLOOM(mask, ss[i+1])) {
608
896M
                i = i + m;
609
896M
            }
610
906M
        }
611
917M
    }
612
109k
    return mode == FAST_COUNT ? count : -1;
613
3.12M
}
unicodeobject.c:ucs4lib_default_find
Line
Count
Source
560
4.54M
{
561
4.54M
    const Py_ssize_t w = n - m;
562
4.54M
    Py_ssize_t mlast = m - 1, count = 0;
563
4.54M
    Py_ssize_t gap = mlast;
564
4.54M
    const STRINGLIB_CHAR last = p[mlast];
565
4.54M
    const STRINGLIB_CHAR *const ss = &s[mlast];
566
567
4.54M
    unsigned long mask = 0;
568
9.08M
    for (Py_ssize_t i = 0; i < mlast; i++) {
569
4.54M
        STRINGLIB_BLOOM_ADD(mask, p[i]);
570
4.54M
        if (p[i] == last) {
571
20.8k
            gap = mlast - i - 1;
572
20.8k
        }
573
4.54M
    }
574
4.54M
    STRINGLIB_BLOOM_ADD(mask, last);
575
576
1.26G
    for (Py_ssize_t i = 0; i <= w; i++) {
577
1.26G
        if (ss[i] == last) {
578
            /* candidate match */
579
27.2M
            Py_ssize_t j;
580
36.2M
            for (j = 0; j < mlast; j++) {
581
27.2M
                if (s[i+j] != p[j]) {
582
18.2M
                    break;
583
18.2M
                }
584
27.2M
            }
585
27.2M
            if (j == mlast) {
586
                /* got a match! */
587
8.98M
                if (mode != FAST_COUNT) {
588
4.50M
                    return i;
589
4.50M
                }
590
4.48M
                count++;
591
4.48M
                if (count == maxcount) {
592
0
                    return maxcount;
593
0
                }
594
4.48M
                i = i + mlast;
595
4.48M
                continue;
596
4.48M
            }
597
            /* miss: check if next character is part of pattern */
598
18.2M
            if (!STRINGLIB_BLOOM(mask, ss[i+1])) {
599
1.97M
                i = i + m;
600
1.97M
            }
601
16.2M
            else {
602
16.2M
                i = i + gap;
603
16.2M
            }
604
18.2M
        }
605
1.23G
        else {
606
            /* skip: check if next character is part of pattern */
607
1.23G
            if (!STRINGLIB_BLOOM(mask, ss[i+1])) {
608
1.21G
                i = i + m;
609
1.21G
            }
610
1.23G
        }
611
1.26G
    }
612
42.2k
    return mode == FAST_COUNT ? count : -1;
613
4.54M
}
bytes_methods.c:stringlib_default_find
Line
Count
Source
560
2.81k
{
561
2.81k
    const Py_ssize_t w = n - m;
562
2.81k
    Py_ssize_t mlast = m - 1, count = 0;
563
2.81k
    Py_ssize_t gap = mlast;
564
2.81k
    const STRINGLIB_CHAR last = p[mlast];
565
2.81k
    const STRINGLIB_CHAR *const ss = &s[mlast];
566
567
2.81k
    unsigned long mask = 0;
568
11.2k
    for (Py_ssize_t i = 0; i < mlast; i++) {
569
8.43k
        STRINGLIB_BLOOM_ADD(mask, p[i]);
570
8.43k
        if (p[i] == last) {
571
2.81k
            gap = mlast - i - 1;
572
2.81k
        }
573
8.43k
    }
574
2.81k
    STRINGLIB_BLOOM_ADD(mask, last);
575
576
1.61M
    for (Py_ssize_t i = 0; i <= w; i++) {
577
1.61M
        if (ss[i] == last) {
578
            /* candidate match */
579
8.39k
            Py_ssize_t j;
580
16.4k
            for (j = 0; j < mlast; j++) {
581
13.8k
                if (s[i+j] != p[j]) {
582
5.79k
                    break;
583
5.79k
                }
584
13.8k
            }
585
8.39k
            if (j == mlast) {
586
                /* got a match! */
587
2.60k
                if (mode != FAST_COUNT) {
588
2.60k
                    return i;
589
2.60k
                }
590
0
                count++;
591
0
                if (count == maxcount) {
592
0
                    return maxcount;
593
0
                }
594
0
                i = i + mlast;
595
0
                continue;
596
0
            }
597
            /* miss: check if next character is part of pattern */
598
5.79k
            if (!STRINGLIB_BLOOM(mask, ss[i+1])) {
599
640
                i = i + m;
600
640
            }
601
5.15k
            else {
602
5.15k
                i = i + gap;
603
5.15k
            }
604
5.79k
        }
605
1.60M
        else {
606
            /* skip: check if next character is part of pattern */
607
1.60M
            if (!STRINGLIB_BLOOM(mask, ss[i+1])) {
608
120k
                i = i + m;
609
120k
            }
610
1.60M
        }
611
1.61M
    }
612
212
    return mode == FAST_COUNT ? count : -1;
613
2.81k
}
Unexecuted instantiation: bytearrayobject.c:stringlib_default_find
614
615
616
static Py_ssize_t
617
STRINGLIB(adaptive_find)(const STRINGLIB_CHAR* s, Py_ssize_t n,
618
                         const STRINGLIB_CHAR* p, Py_ssize_t m,
619
                         Py_ssize_t maxcount, int mode)
620
0
{
621
0
    const Py_ssize_t w = n - m;
622
0
    Py_ssize_t mlast = m - 1, count = 0;
623
0
    Py_ssize_t gap = mlast;
624
0
    Py_ssize_t hits = 0, res;
625
0
    const STRINGLIB_CHAR last = p[mlast];
626
0
    const STRINGLIB_CHAR *const ss = &s[mlast];
627
628
0
    unsigned long mask = 0;
629
0
    for (Py_ssize_t i = 0; i < mlast; i++) {
630
0
        STRINGLIB_BLOOM_ADD(mask, p[i]);
631
0
        if (p[i] == last) {
632
0
            gap = mlast - i - 1;
633
0
        }
634
0
    }
635
0
    STRINGLIB_BLOOM_ADD(mask, last);
636
637
0
    for (Py_ssize_t i = 0; i <= w; i++) {
638
0
        if (ss[i] == last) {
639
            /* candidate match */
640
0
            Py_ssize_t j;
641
0
            for (j = 0; j < mlast; j++) {
642
0
                if (s[i+j] != p[j]) {
643
0
                    break;
644
0
                }
645
0
            }
646
0
            if (j == mlast) {
647
                /* got a match! */
648
0
                if (mode != FAST_COUNT) {
649
0
                    return i;
650
0
                }
651
0
                count++;
652
0
                if (count == maxcount) {
653
0
                    return maxcount;
654
0
                }
655
0
                i = i + mlast;
656
0
                continue;
657
0
            }
658
0
            hits += j + 1;
659
0
            if (hits > m / 4 && w - i > 2000) {
660
0
                if (mode == FAST_SEARCH) {
661
0
                    res = STRINGLIB(_two_way_find)(s + i, n - i, p, m);
662
0
                    return res == -1 ? -1 : res + i;
663
0
                }
664
0
                else {
665
0
                    res = STRINGLIB(_two_way_count)(s + i, n - i, p, m,
666
0
                                                    maxcount - count);
667
0
                    return res + count;
668
0
                }
669
0
            }
670
            /* miss: check if next character is part of pattern */
671
0
            if (!STRINGLIB_BLOOM(mask, ss[i+1])) {
672
0
                i = i + m;
673
0
            }
674
0
            else {
675
0
                i = i + gap;
676
0
            }
677
0
        }
678
0
        else {
679
            /* skip: check if next character is part of pattern */
680
0
            if (!STRINGLIB_BLOOM(mask, ss[i+1])) {
681
0
                i = i + m;
682
0
            }
683
0
        }
684
0
    }
685
0
    return mode == FAST_COUNT ? count : -1;
686
0
}
Unexecuted instantiation: bytesobject.c:stringlib_adaptive_find
Unexecuted instantiation: unicodeobject.c:asciilib_adaptive_find
Unexecuted instantiation: unicodeobject.c:ucs1lib_adaptive_find
Unexecuted instantiation: unicodeobject.c:ucs2lib_adaptive_find
Unexecuted instantiation: unicodeobject.c:ucs4lib_adaptive_find
Unexecuted instantiation: bytes_methods.c:stringlib_adaptive_find
Unexecuted instantiation: bytearrayobject.c:stringlib_adaptive_find
687
688
689
static Py_ssize_t
690
STRINGLIB(default_rfind)(const STRINGLIB_CHAR* s, Py_ssize_t n,
691
                         const STRINGLIB_CHAR* p, Py_ssize_t m,
692
                         Py_ssize_t maxcount, int mode)
693
4
{
694
    /* create compressed boyer-moore delta 1 table */
695
4
    unsigned long mask = 0;
696
4
    Py_ssize_t i, j, mlast = m - 1, skip = m - 1, w = n - m;
697
698
    /* process pattern[0] outside the loop */
699
4
    STRINGLIB_BLOOM_ADD(mask, p[0]);
700
    /* process pattern[:0:-1] */
701
16
    for (i = mlast; i > 0; i--) {
702
12
        STRINGLIB_BLOOM_ADD(mask, p[i]);
703
12
        if (p[i] == p[0]) {
704
0
            skip = i - 1;
705
0
        }
706
12
    }
707
708
356
    for (i = w; i >= 0; i--) {
709
352
        if (s[i] == p[0]) {
710
            /* candidate match */
711
8
            for (j = mlast; j > 0; j--) {
712
8
                if (s[i+j] != p[j]) {
713
8
                    break;
714
8
                }
715
8
            }
716
8
            if (j == 0) {
717
                /* got a match! */
718
0
                return i;
719
0
            }
720
            /* miss: check if previous character is part of pattern */
721
8
            if (i > 0 && !STRINGLIB_BLOOM(mask, s[i-1])) {
722
8
                i = i - m;
723
8
            }
724
0
            else {
725
0
                i = i - skip;
726
0
            }
727
8
        }
728
344
        else {
729
            /* skip: check if previous character is part of pattern */
730
344
            if (i > 0 && !STRINGLIB_BLOOM(mask, s[i-1])) {
731
336
                i = i - m;
732
336
            }
733
344
        }
734
352
    }
735
4
    return -1;
736
4
}
Unexecuted instantiation: bytesobject.c:stringlib_default_rfind
Unexecuted instantiation: unicodeobject.c:asciilib_default_rfind
Unexecuted instantiation: unicodeobject.c:ucs1lib_default_rfind
Unexecuted instantiation: unicodeobject.c:ucs2lib_default_rfind
Unexecuted instantiation: unicodeobject.c:ucs4lib_default_rfind
bytes_methods.c:stringlib_default_rfind
Line
Count
Source
693
4
{
694
    /* create compressed boyer-moore delta 1 table */
695
4
    unsigned long mask = 0;
696
4
    Py_ssize_t i, j, mlast = m - 1, skip = m - 1, w = n - m;
697
698
    /* process pattern[0] outside the loop */
699
4
    STRINGLIB_BLOOM_ADD(mask, p[0]);
700
    /* process pattern[:0:-1] */
701
16
    for (i = mlast; i > 0; i--) {
702
12
        STRINGLIB_BLOOM_ADD(mask, p[i]);
703
12
        if (p[i] == p[0]) {
704
0
            skip = i - 1;
705
0
        }
706
12
    }
707
708
356
    for (i = w; i >= 0; i--) {
709
352
        if (s[i] == p[0]) {
710
            /* candidate match */
711
8
            for (j = mlast; j > 0; j--) {
712
8
                if (s[i+j] != p[j]) {
713
8
                    break;
714
8
                }
715
8
            }
716
8
            if (j == 0) {
717
                /* got a match! */
718
0
                return i;
719
0
            }
720
            /* miss: check if previous character is part of pattern */
721
8
            if (i > 0 && !STRINGLIB_BLOOM(mask, s[i-1])) {
722
8
                i = i - m;
723
8
            }
724
0
            else {
725
0
                i = i - skip;
726
0
            }
727
8
        }
728
344
        else {
729
            /* skip: check if previous character is part of pattern */
730
344
            if (i > 0 && !STRINGLIB_BLOOM(mask, s[i-1])) {
731
336
                i = i - m;
732
336
            }
733
344
        }
734
352
    }
735
4
    return -1;
736
4
}
Unexecuted instantiation: bytearrayobject.c:stringlib_default_rfind
737
738
739
static inline Py_ssize_t
740
STRINGLIB(count_char)(const STRINGLIB_CHAR *s, Py_ssize_t n,
741
                      const STRINGLIB_CHAR p0, Py_ssize_t maxcount)
742
0
{
743
0
    Py_ssize_t i, count = 0;
744
0
    for (i = 0; i < n; i++) {
745
0
        if (s[i] == p0) {
746
0
            count++;
747
0
            if (count == maxcount) {
748
0
                return maxcount;
749
0
            }
750
0
        }
751
0
    }
752
0
    return count;
753
0
}
Unexecuted instantiation: bytesobject.c:stringlib_count_char
Unexecuted instantiation: unicodeobject.c:asciilib_count_char
Unexecuted instantiation: unicodeobject.c:ucs1lib_count_char
Unexecuted instantiation: unicodeobject.c:ucs2lib_count_char
Unexecuted instantiation: unicodeobject.c:ucs4lib_count_char
Unexecuted instantiation: bytes_methods.c:stringlib_count_char
Unexecuted instantiation: bytearrayobject.c:stringlib_count_char
754
755
756
static inline Py_ssize_t
757
STRINGLIB(count_char_no_maxcount)(const STRINGLIB_CHAR *s, Py_ssize_t n,
758
                                  const STRINGLIB_CHAR p0)
759
/* A specialized function of count_char that does not cut off at a maximum.
760
   As a result, the compiler is able to vectorize the loop. */
761
54.3M
{
762
54.3M
    Py_ssize_t count = 0;
763
11.8G
    for (Py_ssize_t i = 0; i < n; i++) {
764
11.8G
        if (s[i] == p0) {
765
236M
            count++;
766
236M
        }
767
11.8G
    }
768
54.3M
    return count;
769
54.3M
}
Unexecuted instantiation: bytesobject.c:stringlib_count_char_no_maxcount
Unexecuted instantiation: unicodeobject.c:asciilib_count_char_no_maxcount
unicodeobject.c:ucs1lib_count_char_no_maxcount
Line
Count
Source
761
46.7M
{
762
46.7M
    Py_ssize_t count = 0;
763
7.97G
    for (Py_ssize_t i = 0; i < n; i++) {
764
7.92G
        if (s[i] == p0) {
765
55.7M
            count++;
766
55.7M
        }
767
7.92G
    }
768
46.7M
    return count;
769
46.7M
}
unicodeobject.c:ucs2lib_count_char_no_maxcount
Line
Count
Source
761
6.80M
{
762
6.80M
    Py_ssize_t count = 0;
763
1.96G
    for (Py_ssize_t i = 0; i < n; i++) {
764
1.95G
        if (s[i] == p0) {
765
61.2M
            count++;
766
61.2M
        }
767
1.95G
    }
768
6.80M
    return count;
769
6.80M
}
unicodeobject.c:ucs4lib_count_char_no_maxcount
Line
Count
Source
761
819k
{
762
819k
    Py_ssize_t count = 0;
763
1.95G
    for (Py_ssize_t i = 0; i < n; i++) {
764
1.95G
        if (s[i] == p0) {
765
119M
            count++;
766
119M
        }
767
1.95G
    }
768
819k
    return count;
769
819k
}
Unexecuted instantiation: bytes_methods.c:stringlib_count_char_no_maxcount
Unexecuted instantiation: bytearrayobject.c:stringlib_count_char_no_maxcount
770
771
772
Py_LOCAL_INLINE(Py_ssize_t)
773
FASTSEARCH(const STRINGLIB_CHAR* s, Py_ssize_t n,
774
           const STRINGLIB_CHAR* p, Py_ssize_t m,
775
           Py_ssize_t maxcount, int mode)
776
209M
{
777
209M
    if (n < m || (mode == FAST_COUNT && maxcount == 0)) {
778
8.05k
        return -1;
779
8.05k
    }
780
781
    /* look for special cases */
782
209M
    if (m <= 1) {
783
195M
        if (m <= 0) {
784
0
            return -1;
785
0
        }
786
        /* use special case for 1-character strings */
787
195M
        if (mode == FAST_SEARCH)
788
141M
            return STRINGLIB(find_char)(s, n, p[0]);
789
54.3M
        else if (mode == FAST_RSEARCH)
790
9.02k
            return STRINGLIB(rfind_char)(s, n, p[0]);
791
54.3M
        else {
792
54.3M
            if (maxcount == PY_SSIZE_T_MAX) {
793
54.3M
                return STRINGLIB(count_char_no_maxcount)(s, n, p[0]);
794
54.3M
            }
795
0
            return STRINGLIB(count_char)(s, n, p[0], maxcount);
796
54.3M
        }
797
195M
    }
798
799
14.0M
    if (mode != FAST_RSEARCH) {
800
14.0M
        if (n < 2500 || (m < 100 && n < 30000) || m < 6) {
801
14.0M
            return STRINGLIB(default_find)(s, n, p, m, maxcount, mode);
802
14.0M
        }
803
18
        else if ((m >> 2) * 3 < (n >> 2)) {
804
            /* 33% threshold, but don't overflow. */
805
            /* For larger problems where the needle isn't a huge
806
               percentage of the size of the haystack, the relatively
807
               expensive O(m) startup cost of the two-way algorithm
808
               will surely pay off. */
809
18
            if (mode == FAST_SEARCH) {
810
18
                return STRINGLIB(_two_way_find)(s, n, p, m);
811
18
            }
812
0
            else {
813
0
                return STRINGLIB(_two_way_count)(s, n, p, m, maxcount);
814
0
            }
815
18
        }
816
0
        else {
817
            /* To ensure that we have good worst-case behavior,
818
               here's an adaptive version of the algorithm, where if
819
               we match O(m) characters without any matches of the
820
               entire needle, then we predict that the startup cost of
821
               the two-way algorithm will probably be worth it. */
822
0
            return STRINGLIB(adaptive_find)(s, n, p, m, maxcount, mode);
823
0
        }
824
14.0M
    }
825
4
    else {
826
        /* FAST_RSEARCH */
827
4
        return STRINGLIB(default_rfind)(s, n, p, m, maxcount, mode);
828
4
    }
829
14.0M
}
Unexecuted instantiation: bytesobject.c:fastsearch
unicodeobject.c:asciilib_fastsearch
Line
Count
Source
776
18.3M
{
777
18.3M
    if (n < m || (mode == FAST_COUNT && maxcount == 0)) {
778
0
        return -1;
779
0
    }
780
781
    /* look for special cases */
782
18.3M
    if (m <= 1) {
783
16.7M
        if (m <= 0) {
784
0
            return -1;
785
0
        }
786
        /* use special case for 1-character strings */
787
16.7M
        if (mode == FAST_SEARCH)
788
16.6M
            return STRINGLIB(find_char)(s, n, p[0]);
789
9.02k
        else if (mode == FAST_RSEARCH)
790
9.02k
            return STRINGLIB(rfind_char)(s, n, p[0]);
791
0
        else {
792
0
            if (maxcount == PY_SSIZE_T_MAX) {
793
0
                return STRINGLIB(count_char_no_maxcount)(s, n, p[0]);
794
0
            }
795
0
            return STRINGLIB(count_char)(s, n, p[0], maxcount);
796
0
        }
797
16.7M
    }
798
799
1.62M
    if (mode != FAST_RSEARCH) {
800
1.62M
        if (n < 2500 || (m < 100 && n < 30000) || m < 6) {
801
1.62M
            return STRINGLIB(default_find)(s, n, p, m, maxcount, mode);
802
1.62M
        }
803
0
        else if ((m >> 2) * 3 < (n >> 2)) {
804
            /* 33% threshold, but don't overflow. */
805
            /* For larger problems where the needle isn't a huge
806
               percentage of the size of the haystack, the relatively
807
               expensive O(m) startup cost of the two-way algorithm
808
               will surely pay off. */
809
0
            if (mode == FAST_SEARCH) {
810
0
                return STRINGLIB(_two_way_find)(s, n, p, m);
811
0
            }
812
0
            else {
813
0
                return STRINGLIB(_two_way_count)(s, n, p, m, maxcount);
814
0
            }
815
0
        }
816
0
        else {
817
            /* To ensure that we have good worst-case behavior,
818
               here's an adaptive version of the algorithm, where if
819
               we match O(m) characters without any matches of the
820
               entire needle, then we predict that the startup cost of
821
               the two-way algorithm will probably be worth it. */
822
0
            return STRINGLIB(adaptive_find)(s, n, p, m, maxcount, mode);
823
0
        }
824
1.62M
    }
825
0
    else {
826
        /* FAST_RSEARCH */
827
0
        return STRINGLIB(default_rfind)(s, n, p, m, maxcount, mode);
828
0
    }
829
1.62M
}
unicodeobject.c:ucs1lib_fastsearch
Line
Count
Source
776
56.8M
{
777
56.8M
    if (n < m || (mode == FAST_COUNT && maxcount == 0)) {
778
0
        return -1;
779
0
    }
780
781
    /* look for special cases */
782
56.8M
    if (m <= 1) {
783
52.1M
        if (m <= 0) {
784
0
            return -1;
785
0
        }
786
        /* use special case for 1-character strings */
787
52.1M
        if (mode == FAST_SEARCH)
788
5.40M
            return STRINGLIB(find_char)(s, n, p[0]);
789
46.7M
        else if (mode == FAST_RSEARCH)
790
0
            return STRINGLIB(rfind_char)(s, n, p[0]);
791
46.7M
        else {
792
46.7M
            if (maxcount == PY_SSIZE_T_MAX) {
793
46.7M
                return STRINGLIB(count_char_no_maxcount)(s, n, p[0]);
794
46.7M
            }
795
0
            return STRINGLIB(count_char)(s, n, p[0], maxcount);
796
46.7M
        }
797
52.1M
    }
798
799
4.70M
    if (mode != FAST_RSEARCH) {
800
4.70M
        if (n < 2500 || (m < 100 && n < 30000) || m < 6) {
801
4.70M
            return STRINGLIB(default_find)(s, n, p, m, maxcount, mode);
802
4.70M
        }
803
18
        else if ((m >> 2) * 3 < (n >> 2)) {
804
            /* 33% threshold, but don't overflow. */
805
            /* For larger problems where the needle isn't a huge
806
               percentage of the size of the haystack, the relatively
807
               expensive O(m) startup cost of the two-way algorithm
808
               will surely pay off. */
809
18
            if (mode == FAST_SEARCH) {
810
18
                return STRINGLIB(_two_way_find)(s, n, p, m);
811
18
            }
812
0
            else {
813
0
                return STRINGLIB(_two_way_count)(s, n, p, m, maxcount);
814
0
            }
815
18
        }
816
0
        else {
817
            /* To ensure that we have good worst-case behavior,
818
               here's an adaptive version of the algorithm, where if
819
               we match O(m) characters without any matches of the
820
               entire needle, then we predict that the startup cost of
821
               the two-way algorithm will probably be worth it. */
822
0
            return STRINGLIB(adaptive_find)(s, n, p, m, maxcount, mode);
823
0
        }
824
4.70M
    }
825
0
    else {
826
        /* FAST_RSEARCH */
827
0
        return STRINGLIB(default_rfind)(s, n, p, m, maxcount, mode);
828
0
    }
829
4.70M
}
unicodeobject.c:ucs2lib_fastsearch
Line
Count
Source
776
58.0M
{
777
58.0M
    if (n < m || (mode == FAST_COUNT && maxcount == 0)) {
778
8.04k
        return -1;
779
8.04k
    }
780
781
    /* look for special cases */
782
58.0M
    if (m <= 1) {
783
54.8M
        if (m <= 0) {
784
0
            return -1;
785
0
        }
786
        /* use special case for 1-character strings */
787
54.8M
        if (mode == FAST_SEARCH)
788
48.0M
            return STRINGLIB(find_char)(s, n, p[0]);
789
6.80M
        else if (mode == FAST_RSEARCH)
790
0
            return STRINGLIB(rfind_char)(s, n, p[0]);
791
6.80M
        else {
792
6.80M
            if (maxcount == PY_SSIZE_T_MAX) {
793
6.80M
                return STRINGLIB(count_char_no_maxcount)(s, n, p[0]);
794
6.80M
            }
795
0
            return STRINGLIB(count_char)(s, n, p[0], maxcount);
796
6.80M
        }
797
54.8M
    }
798
799
3.12M
    if (mode != FAST_RSEARCH) {
800
3.12M
        if (n < 2500 || (m < 100 && n < 30000) || m < 6) {
801
3.12M
            return STRINGLIB(default_find)(s, n, p, m, maxcount, mode);
802
3.12M
        }
803
0
        else if ((m >> 2) * 3 < (n >> 2)) {
804
            /* 33% threshold, but don't overflow. */
805
            /* For larger problems where the needle isn't a huge
806
               percentage of the size of the haystack, the relatively
807
               expensive O(m) startup cost of the two-way algorithm
808
               will surely pay off. */
809
0
            if (mode == FAST_SEARCH) {
810
0
                return STRINGLIB(_two_way_find)(s, n, p, m);
811
0
            }
812
0
            else {
813
0
                return STRINGLIB(_two_way_count)(s, n, p, m, maxcount);
814
0
            }
815
0
        }
816
0
        else {
817
            /* To ensure that we have good worst-case behavior,
818
               here's an adaptive version of the algorithm, where if
819
               we match O(m) characters without any matches of the
820
               entire needle, then we predict that the startup cost of
821
               the two-way algorithm will probably be worth it. */
822
0
            return STRINGLIB(adaptive_find)(s, n, p, m, maxcount, mode);
823
0
        }
824
3.12M
    }
825
0
    else {
826
        /* FAST_RSEARCH */
827
0
        return STRINGLIB(default_rfind)(s, n, p, m, maxcount, mode);
828
0
    }
829
3.12M
}
unicodeobject.c:ucs4lib_fastsearch
Line
Count
Source
776
76.1M
{
777
76.1M
    if (n < m || (mode == FAST_COUNT && maxcount == 0)) {
778
0
        return -1;
779
0
    }
780
781
    /* look for special cases */
782
76.1M
    if (m <= 1) {
783
71.6M
        if (m <= 0) {
784
0
            return -1;
785
0
        }
786
        /* use special case for 1-character strings */
787
71.6M
        if (mode == FAST_SEARCH)
788
70.8M
            return STRINGLIB(find_char)(s, n, p[0]);
789
819k
        else if (mode == FAST_RSEARCH)
790
0
            return STRINGLIB(rfind_char)(s, n, p[0]);
791
819k
        else {
792
819k
            if (maxcount == PY_SSIZE_T_MAX) {
793
819k
                return STRINGLIB(count_char_no_maxcount)(s, n, p[0]);
794
819k
            }
795
0
            return STRINGLIB(count_char)(s, n, p[0], maxcount);
796
819k
        }
797
71.6M
    }
798
799
4.54M
    if (mode != FAST_RSEARCH) {
800
4.54M
        if (n < 2500 || (m < 100 && n < 30000) || m < 6) {
801
4.54M
            return STRINGLIB(default_find)(s, n, p, m, maxcount, mode);
802
4.54M
        }
803
0
        else if ((m >> 2) * 3 < (n >> 2)) {
804
            /* 33% threshold, but don't overflow. */
805
            /* For larger problems where the needle isn't a huge
806
               percentage of the size of the haystack, the relatively
807
               expensive O(m) startup cost of the two-way algorithm
808
               will surely pay off. */
809
0
            if (mode == FAST_SEARCH) {
810
0
                return STRINGLIB(_two_way_find)(s, n, p, m);
811
0
            }
812
0
            else {
813
0
                return STRINGLIB(_two_way_count)(s, n, p, m, maxcount);
814
0
            }
815
0
        }
816
0
        else {
817
            /* To ensure that we have good worst-case behavior,
818
               here's an adaptive version of the algorithm, where if
819
               we match O(m) characters without any matches of the
820
               entire needle, then we predict that the startup cost of
821
               the two-way algorithm will probably be worth it. */
822
0
            return STRINGLIB(adaptive_find)(s, n, p, m, maxcount, mode);
823
0
        }
824
4.54M
    }
825
0
    else {
826
        /* FAST_RSEARCH */
827
0
        return STRINGLIB(default_rfind)(s, n, p, m, maxcount, mode);
828
0
    }
829
4.54M
}
bytes_methods.c:fastsearch
Line
Count
Source
776
2.82k
{
777
2.82k
    if (n < m || (mode == FAST_COUNT && maxcount == 0)) {
778
9
        return -1;
779
9
    }
780
781
    /* look for special cases */
782
2.81k
    if (m <= 1) {
783
0
        if (m <= 0) {
784
0
            return -1;
785
0
        }
786
        /* use special case for 1-character strings */
787
0
        if (mode == FAST_SEARCH)
788
0
            return STRINGLIB(find_char)(s, n, p[0]);
789
0
        else if (mode == FAST_RSEARCH)
790
0
            return STRINGLIB(rfind_char)(s, n, p[0]);
791
0
        else {
792
0
            if (maxcount == PY_SSIZE_T_MAX) {
793
0
                return STRINGLIB(count_char_no_maxcount)(s, n, p[0]);
794
0
            }
795
0
            return STRINGLIB(count_char)(s, n, p[0], maxcount);
796
0
        }
797
0
    }
798
799
2.81k
    if (mode != FAST_RSEARCH) {
800
2.81k
        if (n < 2500 || (m < 100 && n < 30000) || m < 6) {
801
2.81k
            return STRINGLIB(default_find)(s, n, p, m, maxcount, mode);
802
2.81k
        }
803
0
        else if ((m >> 2) * 3 < (n >> 2)) {
804
            /* 33% threshold, but don't overflow. */
805
            /* For larger problems where the needle isn't a huge
806
               percentage of the size of the haystack, the relatively
807
               expensive O(m) startup cost of the two-way algorithm
808
               will surely pay off. */
809
0
            if (mode == FAST_SEARCH) {
810
0
                return STRINGLIB(_two_way_find)(s, n, p, m);
811
0
            }
812
0
            else {
813
0
                return STRINGLIB(_two_way_count)(s, n, p, m, maxcount);
814
0
            }
815
0
        }
816
0
        else {
817
            /* To ensure that we have good worst-case behavior,
818
               here's an adaptive version of the algorithm, where if
819
               we match O(m) characters without any matches of the
820
               entire needle, then we predict that the startup cost of
821
               the two-way algorithm will probably be worth it. */
822
0
            return STRINGLIB(adaptive_find)(s, n, p, m, maxcount, mode);
823
0
        }
824
2.81k
    }
825
4
    else {
826
        /* FAST_RSEARCH */
827
4
        return STRINGLIB(default_rfind)(s, n, p, m, maxcount, mode);
828
4
    }
829
2.81k
}
Unexecuted instantiation: bytearrayobject.c:fastsearch
830