Coverage Report

Created: 2026-05-16 06:46

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/cpython/Objects/stringlib/fastsearch.h
Line
Count
Source
1
/* stringlib: fastsearch implementation */
2
3
#define STRINGLIB_FASTSEARCH_H
4
5
/* fast search/count implementation, based on a mix between boyer-
6
   moore and horspool, with a few more bells and whistles on the top.
7
   for some more background, see:
8
   https://web.archive.org/web/20201107074620/http://effbot.org/zone/stringlib.htm */
9
10
/* note: fastsearch may access s[n], which isn't a problem when using
11
   Python's ordinary string types, but may cause problems if you're
12
   using this code in other contexts.  also, the count mode returns -1
13
   if there cannot possibly be a match in the target string, and 0 if
14
   it has actually checked for matches, but didn't find any.  callers
15
   beware! */
16
17
/* If the strings are long enough, use Crochemore and Perrin's Two-Way
18
   algorithm, which has worst-case O(n) runtime and best-case O(n/k).
19
   Also compute a table of shifts to achieve O(n/k) in more cases,
20
   and often (data dependent) deduce larger shifts than pure C&P can
21
   deduce. See stringlib_find_two_way_notes.txt in this folder for a
22
   detailed explanation. */
23
24
278M
#define FAST_COUNT 0
25
111M
#define FAST_SEARCH 1
26
73.9M
#define FAST_RSEARCH 2
27
28
#if LONG_BIT >= 128
29
#define STRINGLIB_BLOOM_WIDTH 128
30
#elif LONG_BIT >= 64
31
1.17G
#define STRINGLIB_BLOOM_WIDTH 64
32
#elif LONG_BIT >= 32
33
#define STRINGLIB_BLOOM_WIDTH 32
34
#else
35
#error "LONG_BIT is smaller than 32"
36
#endif
37
38
#define STRINGLIB_BLOOM_ADD(mask, ch) \
39
214M
    ((mask |= (1UL << ((ch) & (STRINGLIB_BLOOM_WIDTH -1)))))
40
#define STRINGLIB_BLOOM(mask, ch)     \
41
955M
    ((mask &  (1UL << ((ch) & (STRINGLIB_BLOOM_WIDTH -1)))))
42
43
#ifdef STRINGLIB_FAST_MEMCHR
44
230M
#  define MEMCHR_CUT_OFF 15
45
#else
46
8.53M
#  define MEMCHR_CUT_OFF 40
47
#endif
48
49
Py_LOCAL_INLINE(Py_ssize_t)
50
STRINGLIB(find_char)(const STRINGLIB_CHAR* s, Py_ssize_t n, STRINGLIB_CHAR ch)
51
239M
{
52
239M
    const STRINGLIB_CHAR *p, *e;
53
54
239M
    p = s;
55
239M
    e = s + n;
56
239M
    if (n > MEMCHR_CUT_OFF) {
57
#ifdef STRINGLIB_FAST_MEMCHR
58
34.4M
        p = STRINGLIB_FAST_MEMCHR(s, ch, n);
59
34.4M
        if (p != NULL)
60
33.1M
            return (p - s);
61
1.36M
        return -1;
62
#else
63
        /* use memchr if we can choose a needle without too many likely
64
           false positives */
65
        const STRINGLIB_CHAR *s1, *e1;
66
        unsigned char needle = ch & 0xff;
67
        /* If looking for a multiple of 256, we'd have too
68
           many false positives looking for the '\0' byte in UCS2
69
           and UCS4 representations. */
70
7.99M
        if (needle != 0) {
71
8.00M
            do {
72
8.00M
                const void *candidate = memchr(p, needle,
73
8.00M
                                               (e - p) * sizeof(STRINGLIB_CHAR));
74
8.00M
                if (candidate == NULL)
75
71.1k
                    return -1;
76
7.93M
                s1 = p;
77
7.93M
                p = (const STRINGLIB_CHAR *)
78
7.93M
                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
79
7.93M
                if (*p == ch)
80
7.90M
                    return (p - s);
81
                /* False positive */
82
34.3k
                p++;
83
34.3k
                if (p - s1 > MEMCHR_CUT_OFF)
84
14.1k
                    continue;
85
20.2k
                if (e - p <= MEMCHR_CUT_OFF)
86
2.37k
                    break;
87
17.8k
                e1 = p + MEMCHR_CUT_OFF;
88
596k
                while (p != e1) {
89
583k
                    if (*p == ch)
90
3.98k
                        return (p - s);
91
579k
                    p++;
92
579k
                }
93
17.8k
            }
94
7.98M
            while (e - p > MEMCHR_CUT_OFF);
95
7.98M
        }
96
#endif
97
42.4M
    }
98
1.04G
    while (p < e) {
99
869M
        if (*p == ch)
100
20.0M
            return (p - s);
101
849M
        p++;
102
849M
    }
103
176M
    return -1;
104
196M
}
bytesobject.c:stringlib_find_char
Line
Count
Source
51
361k
{
52
361k
    const STRINGLIB_CHAR *p, *e;
53
54
361k
    p = s;
55
361k
    e = s + n;
56
361k
    if (n > MEMCHR_CUT_OFF) {
57
361k
#ifdef STRINGLIB_FAST_MEMCHR
58
361k
        p = STRINGLIB_FAST_MEMCHR(s, ch, n);
59
361k
        if (p != NULL)
60
355k
            return (p - s);
61
5.89k
        return -1;
62
#else
63
        /* use memchr if we can choose a needle without too many likely
64
           false positives */
65
        const STRINGLIB_CHAR *s1, *e1;
66
        unsigned char needle = ch & 0xff;
67
        /* If looking for a multiple of 256, we'd have too
68
           many false positives looking for the '\0' byte in UCS2
69
           and UCS4 representations. */
70
        if (needle != 0) {
71
            do {
72
                const void *candidate = memchr(p, needle,
73
                                               (e - p) * sizeof(STRINGLIB_CHAR));
74
                if (candidate == NULL)
75
                    return -1;
76
                s1 = p;
77
                p = (const STRINGLIB_CHAR *)
78
                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
79
                if (*p == ch)
80
                    return (p - s);
81
                /* False positive */
82
                p++;
83
                if (p - s1 > MEMCHR_CUT_OFF)
84
                    continue;
85
                if (e - p <= MEMCHR_CUT_OFF)
86
                    break;
87
                e1 = p + MEMCHR_CUT_OFF;
88
                while (p != e1) {
89
                    if (*p == ch)
90
                        return (p - s);
91
                    p++;
92
                }
93
            }
94
            while (e - p > MEMCHR_CUT_OFF);
95
        }
96
#endif
97
361k
    }
98
84
    while (p < e) {
99
84
        if (*p == ch)
100
14
            return (p - s);
101
70
        p++;
102
70
    }
103
0
    return -1;
104
14
}
unicodeobject.c:ucs1lib_find_char
Line
Count
Source
51
205M
{
52
205M
    const STRINGLIB_CHAR *p, *e;
53
54
205M
    p = s;
55
205M
    e = s + n;
56
205M
    if (n > MEMCHR_CUT_OFF) {
57
19.9M
#ifdef STRINGLIB_FAST_MEMCHR
58
19.9M
        p = STRINGLIB_FAST_MEMCHR(s, ch, n);
59
19.9M
        if (p != NULL)
60
19.0M
            return (p - s);
61
846k
        return -1;
62
#else
63
        /* use memchr if we can choose a needle without too many likely
64
           false positives */
65
        const STRINGLIB_CHAR *s1, *e1;
66
        unsigned char needle = ch & 0xff;
67
        /* If looking for a multiple of 256, we'd have too
68
           many false positives looking for the '\0' byte in UCS2
69
           and UCS4 representations. */
70
        if (needle != 0) {
71
            do {
72
                const void *candidate = memchr(p, needle,
73
                                               (e - p) * sizeof(STRINGLIB_CHAR));
74
                if (candidate == NULL)
75
                    return -1;
76
                s1 = p;
77
                p = (const STRINGLIB_CHAR *)
78
                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
79
                if (*p == ch)
80
                    return (p - s);
81
                /* False positive */
82
                p++;
83
                if (p - s1 > MEMCHR_CUT_OFF)
84
                    continue;
85
                if (e - p <= MEMCHR_CUT_OFF)
86
                    break;
87
                e1 = p + MEMCHR_CUT_OFF;
88
                while (p != e1) {
89
                    if (*p == ch)
90
                        return (p - s);
91
                    p++;
92
                }
93
            }
94
            while (e - p > MEMCHR_CUT_OFF);
95
        }
96
#endif
97
19.9M
    }
98
961M
    while (p < e) {
99
788M
        if (*p == ch)
100
13.0M
            return (p - s);
101
775M
        p++;
102
775M
    }
103
172M
    return -1;
104
185M
}
unicodeobject.c:ucs2lib_find_char
Line
Count
Source
51
8.43M
{
52
8.43M
    const STRINGLIB_CHAR *p, *e;
53
54
8.43M
    p = s;
55
8.43M
    e = s + n;
56
8.43M
    if (n > MEMCHR_CUT_OFF) {
57
#ifdef STRINGLIB_FAST_MEMCHR
58
        p = STRINGLIB_FAST_MEMCHR(s, ch, n);
59
        if (p != NULL)
60
            return (p - s);
61
        return -1;
62
#else
63
        /* use memchr if we can choose a needle without too many likely
64
           false positives */
65
7.99M
        const STRINGLIB_CHAR *s1, *e1;
66
7.99M
        unsigned char needle = ch & 0xff;
67
        /* If looking for a multiple of 256, we'd have too
68
           many false positives looking for the '\0' byte in UCS2
69
           and UCS4 representations. */
70
7.99M
        if (needle != 0) {
71
8.00M
            do {
72
8.00M
                const void *candidate = memchr(p, needle,
73
8.00M
                                               (e - p) * sizeof(STRINGLIB_CHAR));
74
8.00M
                if (candidate == NULL)
75
71.1k
                    return -1;
76
7.93M
                s1 = p;
77
7.93M
                p = (const STRINGLIB_CHAR *)
78
7.93M
                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
79
7.93M
                if (*p == ch)
80
7.90M
                    return (p - s);
81
                /* False positive */
82
34.3k
                p++;
83
34.3k
                if (p - s1 > MEMCHR_CUT_OFF)
84
14.1k
                    continue;
85
20.2k
                if (e - p <= MEMCHR_CUT_OFF)
86
2.37k
                    break;
87
17.8k
                e1 = p + MEMCHR_CUT_OFF;
88
596k
                while (p != e1) {
89
583k
                    if (*p == ch)
90
3.98k
                        return (p - s);
91
579k
                    p++;
92
579k
                }
93
17.8k
            }
94
7.98M
            while (e - p > MEMCHR_CUT_OFF);
95
7.98M
        }
96
7.99M
#endif
97
7.99M
    }
98
6.59M
    while (p < e) {
99
6.33M
        if (*p == ch)
100
200k
            return (p - s);
101
6.13M
        p++;
102
6.13M
    }
103
261k
    return -1;
104
461k
}
unicodeobject.c:ucs4lib_find_char
Line
Count
Source
51
5.94M
{
52
5.94M
    const STRINGLIB_CHAR *p, *e;
53
54
5.94M
    p = s;
55
5.94M
    e = s + n;
56
5.94M
    if (n > MEMCHR_CUT_OFF) {
57
5.89M
#ifdef STRINGLIB_FAST_MEMCHR
58
5.89M
        p = STRINGLIB_FAST_MEMCHR(s, ch, n);
59
5.89M
        if (p != NULL)
60
5.88M
            return (p - s);
61
14.4k
        return -1;
62
#else
63
        /* use memchr if we can choose a needle without too many likely
64
           false positives */
65
        const STRINGLIB_CHAR *s1, *e1;
66
        unsigned char needle = ch & 0xff;
67
        /* If looking for a multiple of 256, we'd have too
68
           many false positives looking for the '\0' byte in UCS2
69
           and UCS4 representations. */
70
        if (needle != 0) {
71
            do {
72
                const void *candidate = memchr(p, needle,
73
                                               (e - p) * sizeof(STRINGLIB_CHAR));
74
                if (candidate == NULL)
75
                    return -1;
76
                s1 = p;
77
                p = (const STRINGLIB_CHAR *)
78
                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
79
                if (*p == ch)
80
                    return (p - s);
81
                /* False positive */
82
                p++;
83
                if (p - s1 > MEMCHR_CUT_OFF)
84
                    continue;
85
                if (e - p <= MEMCHR_CUT_OFF)
86
                    break;
87
                e1 = p + MEMCHR_CUT_OFF;
88
                while (p != e1) {
89
                    if (*p == ch)
90
                        return (p - s);
91
                    p++;
92
                }
93
            }
94
            while (e - p > MEMCHR_CUT_OFF);
95
        }
96
#endif
97
5.89M
    }
98
202k
    while (p < e) {
99
169k
        if (*p == ch)
100
19.1k
            return (p - s);
101
150k
        p++;
102
150k
    }
103
32.8k
    return -1;
104
51.9k
}
unicodeobject.c:asciilib_find_char
Line
Count
Source
51
5.34M
{
52
5.34M
    const STRINGLIB_CHAR *p, *e;
53
54
5.34M
    p = s;
55
5.34M
    e = s + n;
56
5.34M
    if (n > MEMCHR_CUT_OFF) {
57
4.25M
#ifdef STRINGLIB_FAST_MEMCHR
58
4.25M
        p = STRINGLIB_FAST_MEMCHR(s, ch, n);
59
4.25M
        if (p != NULL)
60
3.81M
            return (p - s);
61
440k
        return -1;
62
#else
63
        /* use memchr if we can choose a needle without too many likely
64
           false positives */
65
        const STRINGLIB_CHAR *s1, *e1;
66
        unsigned char needle = ch & 0xff;
67
        /* If looking for a multiple of 256, we'd have too
68
           many false positives looking for the '\0' byte in UCS2
69
           and UCS4 representations. */
70
        if (needle != 0) {
71
            do {
72
                const void *candidate = memchr(p, needle,
73
                                               (e - p) * sizeof(STRINGLIB_CHAR));
74
                if (candidate == NULL)
75
                    return -1;
76
                s1 = p;
77
                p = (const STRINGLIB_CHAR *)
78
                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
79
                if (*p == ch)
80
                    return (p - s);
81
                /* False positive */
82
                p++;
83
                if (p - s1 > MEMCHR_CUT_OFF)
84
                    continue;
85
                if (e - p <= MEMCHR_CUT_OFF)
86
                    break;
87
                e1 = p + MEMCHR_CUT_OFF;
88
                while (p != e1) {
89
                    if (*p == ch)
90
                        return (p - s);
91
                    p++;
92
                }
93
            }
94
            while (e - p > MEMCHR_CUT_OFF);
95
        }
96
#endif
97
4.25M
    }
98
3.46M
    while (p < e) {
99
3.41M
        if (*p == ch)
100
1.03M
            return (p - s);
101
2.37M
        p++;
102
2.37M
    }
103
57.1k
    return -1;
104
1.09M
}
bytes_methods.c:stringlib_find_char
Line
Count
Source
51
13.4M
{
52
13.4M
    const STRINGLIB_CHAR *p, *e;
53
54
13.4M
    p = s;
55
13.4M
    e = s + n;
56
13.4M
    if (n > MEMCHR_CUT_OFF) {
57
4.05M
#ifdef STRINGLIB_FAST_MEMCHR
58
4.05M
        p = STRINGLIB_FAST_MEMCHR(s, ch, n);
59
4.05M
        if (p != NULL)
60
3.99M
            return (p - s);
61
61.4k
        return -1;
62
#else
63
        /* use memchr if we can choose a needle without too many likely
64
           false positives */
65
        const STRINGLIB_CHAR *s1, *e1;
66
        unsigned char needle = ch & 0xff;
67
        /* If looking for a multiple of 256, we'd have too
68
           many false positives looking for the '\0' byte in UCS2
69
           and UCS4 representations. */
70
        if (needle != 0) {
71
            do {
72
                const void *candidate = memchr(p, needle,
73
                                               (e - p) * sizeof(STRINGLIB_CHAR));
74
                if (candidate == NULL)
75
                    return -1;
76
                s1 = p;
77
                p = (const STRINGLIB_CHAR *)
78
                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
79
                if (*p == ch)
80
                    return (p - s);
81
                /* False positive */
82
                p++;
83
                if (p - s1 > MEMCHR_CUT_OFF)
84
                    continue;
85
                if (e - p <= MEMCHR_CUT_OFF)
86
                    break;
87
                e1 = p + MEMCHR_CUT_OFF;
88
                while (p != e1) {
89
                    if (*p == ch)
90
                        return (p - s);
91
                    p++;
92
                }
93
            }
94
            while (e - p > MEMCHR_CUT_OFF);
95
        }
96
#endif
97
4.05M
    }
98
75.1M
    while (p < e) {
99
71.4M
        if (*p == ch)
100
5.75M
            return (p - s);
101
65.6M
        p++;
102
65.6M
    }
103
3.68M
    return -1;
104
9.43M
}
Unexecuted instantiation: bytearrayobject.c:stringlib_find_char
105
106
#undef MEMCHR_CUT_OFF
107
108
#if STRINGLIB_SIZEOF_CHAR == 1
109
398k
#  define MEMRCHR_CUT_OFF 15
110
#else
111
406k
#  define MEMRCHR_CUT_OFF 40
112
#endif
113
114
115
Py_LOCAL_INLINE(Py_ssize_t)
116
STRINGLIB(rfind_char)(const STRINGLIB_CHAR* s, Py_ssize_t n, STRINGLIB_CHAR ch)
117
759k
{
118
759k
    const STRINGLIB_CHAR *p;
119
759k
#ifdef HAVE_MEMRCHR
120
    /* memrchr() is a GNU extension, available since glibc 2.1.91.  it
121
       doesn't seem as optimized as memchr(), but is still quite
122
       faster than our hand-written loop below. There is no wmemrchr
123
       for 4-byte chars. */
124
125
759k
    if (n > MEMRCHR_CUT_OFF) {
126
#if STRINGLIB_SIZEOF_CHAR == 1
127
        p = memrchr(s, ch, n);
128
106k
        if (p != NULL)
129
100k
            return (p - s);
130
6.10k
        return -1;
131
#else
132
        /* use memrchr if we can choose a needle without too many likely
133
           false positives */
134
        const STRINGLIB_CHAR *s1;
135
        Py_ssize_t n1;
136
        unsigned char needle = ch & 0xff;
137
        /* If looking for a multiple of 256, we'd have too
138
           many false positives looking for the '\0' byte in UCS2
139
           and UCS4 representations. */
140
283k
        if (needle != 0) {
141
296k
            do {
142
296k
                void *candidate = memrchr(s, needle,
143
296k
                                          n * sizeof(STRINGLIB_CHAR));
144
296k
                if (candidate == NULL)
145
1.42k
                    return -1;
146
295k
                n1 = n;
147
295k
                p = (const STRINGLIB_CHAR *)
148
295k
                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
149
295k
                n = p - s;
150
295k
                if (*p == ch)
151
279k
                    return n;
152
                /* False positive */
153
15.4k
                if (n1 - n > MEMRCHR_CUT_OFF)
154
6.96k
                    continue;
155
8.45k
                if (n <= MEMRCHR_CUT_OFF)
156
963
                    break;
157
7.49k
                s1 = p - MEMRCHR_CUT_OFF;
158
291k
                while (p > s1) {
159
284k
                    p--;
160
284k
                    if (*p == ch)
161
643
                        return (p - s);
162
284k
                }
163
6.85k
                n = p - s;
164
6.85k
            }
165
283k
            while (n > MEMRCHR_CUT_OFF);
166
283k
        }
167
#endif
168
389k
    }
169
371k
#endif  /* HAVE_MEMRCHR */
170
371k
    p = s + n;
171
2.41M
    while (p > s) {
172
2.22M
        p--;
173
2.22M
        if (*p == ch)
174
179k
            return (p - s);
175
2.22M
    }
176
192k
    return -1;
177
371k
}
Unexecuted instantiation: bytesobject.c:stringlib_rfind_char
unicodeobject.c:ucs1lib_rfind_char
Line
Count
Source
117
70.8k
{
118
70.8k
    const STRINGLIB_CHAR *p;
119
70.8k
#ifdef HAVE_MEMRCHR
120
    /* memrchr() is a GNU extension, available since glibc 2.1.91.  it
121
       doesn't seem as optimized as memchr(), but is still quite
122
       faster than our hand-written loop below. There is no wmemrchr
123
       for 4-byte chars. */
124
125
70.8k
    if (n > MEMRCHR_CUT_OFF) {
126
65.9k
#if STRINGLIB_SIZEOF_CHAR == 1
127
65.9k
        p = memrchr(s, ch, n);
128
65.9k
        if (p != NULL)
129
63.9k
            return (p - s);
130
1.93k
        return -1;
131
#else
132
        /* use memrchr if we can choose a needle without too many likely
133
           false positives */
134
        const STRINGLIB_CHAR *s1;
135
        Py_ssize_t n1;
136
        unsigned char needle = ch & 0xff;
137
        /* If looking for a multiple of 256, we'd have too
138
           many false positives looking for the '\0' byte in UCS2
139
           and UCS4 representations. */
140
        if (needle != 0) {
141
            do {
142
                void *candidate = memrchr(s, needle,
143
                                          n * sizeof(STRINGLIB_CHAR));
144
                if (candidate == NULL)
145
                    return -1;
146
                n1 = n;
147
                p = (const STRINGLIB_CHAR *)
148
                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
149
                n = p - s;
150
                if (*p == ch)
151
                    return n;
152
                /* False positive */
153
                if (n1 - n > MEMRCHR_CUT_OFF)
154
                    continue;
155
                if (n <= MEMRCHR_CUT_OFF)
156
                    break;
157
                s1 = p - MEMRCHR_CUT_OFF;
158
                while (p > s1) {
159
                    p--;
160
                    if (*p == ch)
161
                        return (p - s);
162
                }
163
                n = p - s;
164
            }
165
            while (n > MEMRCHR_CUT_OFF);
166
        }
167
#endif
168
65.9k
    }
169
4.94k
#endif  /* HAVE_MEMRCHR */
170
4.94k
    p = s + n;
171
20.7k
    while (p > s) {
172
18.9k
        p--;
173
18.9k
        if (*p == ch)
174
3.09k
            return (p - s);
175
18.9k
    }
176
1.85k
    return -1;
177
4.94k
}
unicodeobject.c:ucs2lib_rfind_char
Line
Count
Source
117
232k
{
118
232k
    const STRINGLIB_CHAR *p;
119
232k
#ifdef HAVE_MEMRCHR
120
    /* memrchr() is a GNU extension, available since glibc 2.1.91.  it
121
       doesn't seem as optimized as memchr(), but is still quite
122
       faster than our hand-written loop below. There is no wmemrchr
123
       for 4-byte chars. */
124
125
232k
    if (n > MEMRCHR_CUT_OFF) {
126
#if STRINGLIB_SIZEOF_CHAR == 1
127
        p = memrchr(s, ch, n);
128
        if (p != NULL)
129
            return (p - s);
130
        return -1;
131
#else
132
        /* use memrchr if we can choose a needle without too many likely
133
           false positives */
134
198k
        const STRINGLIB_CHAR *s1;
135
198k
        Py_ssize_t n1;
136
198k
        unsigned char needle = ch & 0xff;
137
        /* If looking for a multiple of 256, we'd have too
138
           many false positives looking for the '\0' byte in UCS2
139
           and UCS4 representations. */
140
198k
        if (needle != 0) {
141
201k
            do {
142
201k
                void *candidate = memrchr(s, needle,
143
201k
                                          n * sizeof(STRINGLIB_CHAR));
144
201k
                if (candidate == NULL)
145
838
                    return -1;
146
200k
                n1 = n;
147
200k
                p = (const STRINGLIB_CHAR *)
148
200k
                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
149
200k
                n = p - s;
150
200k
                if (*p == ch)
151
196k
                    return n;
152
                /* False positive */
153
4.14k
                if (n1 - n > MEMRCHR_CUT_OFF)
154
1.76k
                    continue;
155
2.38k
                if (n <= MEMRCHR_CUT_OFF)
156
576
                    break;
157
1.80k
                s1 = p - MEMRCHR_CUT_OFF;
158
67.5k
                while (p > s1) {
159
66.0k
                    p--;
160
66.0k
                    if (*p == ch)
161
256
                        return (p - s);
162
66.0k
                }
163
1.55k
                n = p - s;
164
1.55k
            }
165
198k
            while (n > MEMRCHR_CUT_OFF);
166
198k
        }
167
198k
#endif
168
198k
    }
169
34.1k
#endif  /* HAVE_MEMRCHR */
170
34.1k
    p = s + n;
171
127k
    while (p > s) {
172
125k
        p--;
173
125k
        if (*p == ch)
174
31.7k
            return (p - s);
175
125k
    }
176
2.40k
    return -1;
177
34.1k
}
unicodeobject.c:ucs4lib_rfind_char
Line
Count
Source
117
129k
{
118
129k
    const STRINGLIB_CHAR *p;
119
129k
#ifdef HAVE_MEMRCHR
120
    /* memrchr() is a GNU extension, available since glibc 2.1.91.  it
121
       doesn't seem as optimized as memchr(), but is still quite
122
       faster than our hand-written loop below. There is no wmemrchr
123
       for 4-byte chars. */
124
125
129k
    if (n > MEMRCHR_CUT_OFF) {
126
#if STRINGLIB_SIZEOF_CHAR == 1
127
        p = memrchr(s, ch, n);
128
        if (p != NULL)
129
            return (p - s);
130
        return -1;
131
#else
132
        /* use memrchr if we can choose a needle without too many likely
133
           false positives */
134
84.7k
        const STRINGLIB_CHAR *s1;
135
84.7k
        Py_ssize_t n1;
136
84.7k
        unsigned char needle = ch & 0xff;
137
        /* If looking for a multiple of 256, we'd have too
138
           many false positives looking for the '\0' byte in UCS2
139
           and UCS4 representations. */
140
84.7k
        if (needle != 0) {
141
94.9k
            do {
142
94.9k
                void *candidate = memrchr(s, needle,
143
94.9k
                                          n * sizeof(STRINGLIB_CHAR));
144
94.9k
                if (candidate == NULL)
145
590
                    return -1;
146
94.3k
                n1 = n;
147
94.3k
                p = (const STRINGLIB_CHAR *)
148
94.3k
                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
149
94.3k
                n = p - s;
150
94.3k
                if (*p == ch)
151
83.0k
                    return n;
152
                /* False positive */
153
11.2k
                if (n1 - n > MEMRCHR_CUT_OFF)
154
5.20k
                    continue;
155
6.07k
                if (n <= MEMRCHR_CUT_OFF)
156
387
                    break;
157
5.68k
                s1 = p - MEMRCHR_CUT_OFF;
158
223k
                while (p > s1) {
159
218k
                    p--;
160
218k
                    if (*p == ch)
161
387
                        return (p - s);
162
218k
                }
163
5.30k
                n = p - s;
164
5.30k
            }
165
84.7k
            while (n > MEMRCHR_CUT_OFF);
166
84.7k
        }
167
84.7k
#endif
168
84.7k
    }
169
45.2k
#endif  /* HAVE_MEMRCHR */
170
45.2k
    p = s + n;
171
287k
    while (p > s) {
172
285k
        p--;
173
285k
        if (*p == ch)
174
43.3k
            return (p - s);
175
285k
    }
176
1.88k
    return -1;
177
45.2k
}
unicodeobject.c:asciilib_rfind_char
Line
Count
Source
117
56.6k
{
118
56.6k
    const STRINGLIB_CHAR *p;
119
56.6k
#ifdef HAVE_MEMRCHR
120
    /* memrchr() is a GNU extension, available since glibc 2.1.91.  it
121
       doesn't seem as optimized as memchr(), but is still quite
122
       faster than our hand-written loop below. There is no wmemrchr
123
       for 4-byte chars. */
124
125
56.6k
    if (n > MEMRCHR_CUT_OFF) {
126
18.5k
#if STRINGLIB_SIZEOF_CHAR == 1
127
18.5k
        p = memrchr(s, ch, n);
128
18.5k
        if (p != NULL)
129
18.2k
            return (p - s);
130
291
        return -1;
131
#else
132
        /* use memrchr if we can choose a needle without too many likely
133
           false positives */
134
        const STRINGLIB_CHAR *s1;
135
        Py_ssize_t n1;
136
        unsigned char needle = ch & 0xff;
137
        /* If looking for a multiple of 256, we'd have too
138
           many false positives looking for the '\0' byte in UCS2
139
           and UCS4 representations. */
140
        if (needle != 0) {
141
            do {
142
                void *candidate = memrchr(s, needle,
143
                                          n * sizeof(STRINGLIB_CHAR));
144
                if (candidate == NULL)
145
                    return -1;
146
                n1 = n;
147
                p = (const STRINGLIB_CHAR *)
148
                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
149
                n = p - s;
150
                if (*p == ch)
151
                    return n;
152
                /* False positive */
153
                if (n1 - n > MEMRCHR_CUT_OFF)
154
                    continue;
155
                if (n <= MEMRCHR_CUT_OFF)
156
                    break;
157
                s1 = p - MEMRCHR_CUT_OFF;
158
                while (p > s1) {
159
                    p--;
160
                    if (*p == ch)
161
                        return (p - s);
162
                }
163
                n = p - s;
164
            }
165
            while (n > MEMRCHR_CUT_OFF);
166
        }
167
#endif
168
18.5k
    }
169
38.0k
#endif  /* HAVE_MEMRCHR */
170
38.0k
    p = s + n;
171
219k
    while (p > s) {
172
191k
        p--;
173
191k
        if (*p == ch)
174
10.5k
            return (p - s);
175
191k
    }
176
27.5k
    return -1;
177
38.0k
}
bytes_methods.c:stringlib_rfind_char
Line
Count
Source
117
271k
{
118
271k
    const STRINGLIB_CHAR *p;
119
271k
#ifdef HAVE_MEMRCHR
120
    /* memrchr() is a GNU extension, available since glibc 2.1.91.  it
121
       doesn't seem as optimized as memchr(), but is still quite
122
       faster than our hand-written loop below. There is no wmemrchr
123
       for 4-byte chars. */
124
125
271k
    if (n > MEMRCHR_CUT_OFF) {
126
21.9k
#if STRINGLIB_SIZEOF_CHAR == 1
127
21.9k
        p = memrchr(s, ch, n);
128
21.9k
        if (p != NULL)
129
18.1k
            return (p - s);
130
3.88k
        return -1;
131
#else
132
        /* use memrchr if we can choose a needle without too many likely
133
           false positives */
134
        const STRINGLIB_CHAR *s1;
135
        Py_ssize_t n1;
136
        unsigned char needle = ch & 0xff;
137
        /* If looking for a multiple of 256, we'd have too
138
           many false positives looking for the '\0' byte in UCS2
139
           and UCS4 representations. */
140
        if (needle != 0) {
141
            do {
142
                void *candidate = memrchr(s, needle,
143
                                          n * sizeof(STRINGLIB_CHAR));
144
                if (candidate == NULL)
145
                    return -1;
146
                n1 = n;
147
                p = (const STRINGLIB_CHAR *)
148
                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
149
                n = p - s;
150
                if (*p == ch)
151
                    return n;
152
                /* False positive */
153
                if (n1 - n > MEMRCHR_CUT_OFF)
154
                    continue;
155
                if (n <= MEMRCHR_CUT_OFF)
156
                    break;
157
                s1 = p - MEMRCHR_CUT_OFF;
158
                while (p > s1) {
159
                    p--;
160
                    if (*p == ch)
161
                        return (p - s);
162
                }
163
                n = p - s;
164
            }
165
            while (n > MEMRCHR_CUT_OFF);
166
        }
167
#endif
168
21.9k
    }
169
249k
#endif  /* HAVE_MEMRCHR */
170
249k
    p = s + n;
171
1.76M
    while (p > s) {
172
1.60M
        p--;
173
1.60M
        if (*p == ch)
174
90.6k
            return (p - s);
175
1.60M
    }
176
158k
    return -1;
177
249k
}
Unexecuted instantiation: bytearrayobject.c:stringlib_rfind_char
178
179
#undef MEMRCHR_CUT_OFF
180
181
/* Change to a 1 to see logging comments walk through the algorithm. */
182
#if 0 && STRINGLIB_SIZEOF_CHAR == 1
183
# define LOG(...) printf(__VA_ARGS__)
184
# define LOG_STRING(s, n) printf("\"%.*s\"", (int)(n), s)
185
# define LOG_LINEUP() do {                                         \
186
    LOG("> "); LOG_STRING(haystack, len_haystack); LOG("\n> ");    \
187
    LOG("%*s",(int)(window_last - haystack + 1 - len_needle), ""); \
188
    LOG_STRING(needle, len_needle); LOG("\n");                     \
189
} while(0)
190
#else
191
# define LOG(...)
192
# define LOG_STRING(s, n)
193
# define LOG_LINEUP()
194
#endif
195
196
Py_LOCAL_INLINE(Py_ssize_t)
197
STRINGLIB(_lex_search)(const STRINGLIB_CHAR *needle, Py_ssize_t len_needle,
198
                       Py_ssize_t *return_period, int invert_alphabet)
199
40
{
200
    /* Do a lexicographic search. Essentially this:
201
           >>> max(needle[i:] for i in range(len(needle)+1))
202
       Also find the period of the right half.   */
203
40
    Py_ssize_t max_suffix = 0;
204
40
    Py_ssize_t candidate = 1;
205
40
    Py_ssize_t k = 0;
206
    // The period of the right half.
207
40
    Py_ssize_t period = 1;
208
209
400
    while (candidate + k < len_needle) {
210
        // each loop increases candidate + k + max_suffix
211
360
        STRINGLIB_CHAR a = needle[candidate + k];
212
360
        STRINGLIB_CHAR b = needle[max_suffix + k];
213
        // check if the suffix at candidate is better than max_suffix
214
360
        if (invert_alphabet ? (b < a) : (a < b)) {
215
            // Fell short of max_suffix.
216
            // The next k + 1 characters are non-increasing
217
            // from candidate, so they won't start a maximal suffix.
218
260
            candidate += k + 1;
219
260
            k = 0;
220
            // We've ruled out any period smaller than what's
221
            // been scanned since max_suffix.
222
260
            period = candidate - max_suffix;
223
260
        }
224
100
        else if (a == b) {
225
20
            if (k + 1 != period) {
226
                // Keep scanning the equal strings
227
0
                k++;
228
0
            }
229
20
            else {
230
                // Matched a whole period.
231
                // Start matching the next period.
232
20
                candidate += period;
233
20
                k = 0;
234
20
            }
235
20
        }
236
80
        else {
237
            // Did better than max_suffix, so replace it.
238
80
            max_suffix = candidate;
239
80
            candidate++;
240
80
            k = 0;
241
80
            period = 1;
242
80
        }
243
360
    }
244
40
    *return_period = period;
245
40
    return max_suffix;
246
40
}
Unexecuted instantiation: bytesobject.c:stringlib__lex_search
Unexecuted instantiation: unicodeobject.c:asciilib__lex_search
unicodeobject.c:ucs1lib__lex_search
Line
Count
Source
199
40
{
200
    /* Do a lexicographic search. Essentially this:
201
           >>> max(needle[i:] for i in range(len(needle)+1))
202
       Also find the period of the right half.   */
203
40
    Py_ssize_t max_suffix = 0;
204
40
    Py_ssize_t candidate = 1;
205
40
    Py_ssize_t k = 0;
206
    // The period of the right half.
207
40
    Py_ssize_t period = 1;
208
209
400
    while (candidate + k < len_needle) {
210
        // each loop increases candidate + k + max_suffix
211
360
        STRINGLIB_CHAR a = needle[candidate + k];
212
360
        STRINGLIB_CHAR b = needle[max_suffix + k];
213
        // check if the suffix at candidate is better than max_suffix
214
360
        if (invert_alphabet ? (b < a) : (a < b)) {
215
            // Fell short of max_suffix.
216
            // The next k + 1 characters are non-increasing
217
            // from candidate, so they won't start a maximal suffix.
218
260
            candidate += k + 1;
219
260
            k = 0;
220
            // We've ruled out any period smaller than what's
221
            // been scanned since max_suffix.
222
260
            period = candidate - max_suffix;
223
260
        }
224
100
        else if (a == b) {
225
20
            if (k + 1 != period) {
226
                // Keep scanning the equal strings
227
0
                k++;
228
0
            }
229
20
            else {
230
                // Matched a whole period.
231
                // Start matching the next period.
232
20
                candidate += period;
233
20
                k = 0;
234
20
            }
235
20
        }
236
80
        else {
237
            // Did better than max_suffix, so replace it.
238
80
            max_suffix = candidate;
239
80
            candidate++;
240
80
            k = 0;
241
80
            period = 1;
242
80
        }
243
360
    }
244
40
    *return_period = period;
245
40
    return max_suffix;
246
40
}
Unexecuted instantiation: unicodeobject.c:ucs2lib__lex_search
Unexecuted instantiation: unicodeobject.c:ucs4lib__lex_search
Unexecuted instantiation: bytes_methods.c:stringlib__lex_search
Unexecuted instantiation: bytearrayobject.c:stringlib__lex_search
247
248
Py_LOCAL_INLINE(Py_ssize_t)
249
STRINGLIB(_factorize)(const STRINGLIB_CHAR *needle,
250
                      Py_ssize_t len_needle,
251
                      Py_ssize_t *return_period)
252
20
{
253
    /* Do a "critical factorization", making it so that:
254
       >>> needle = (left := needle[:cut]) + (right := needle[cut:])
255
       where the "local period" of the cut is maximal.
256
257
       The local period of the cut is the minimal length of a string w
258
       such that (left endswith w or w endswith left)
259
       and (right startswith w or w startswith right).
260
261
       The Critical Factorization Theorem says that this maximal local
262
       period is the global period of the string.
263
264
       Crochemore and Perrin (1991) show that this cut can be computed
265
       as the later of two cuts: one that gives a lexicographically
266
       maximal right half, and one that gives the same with the
267
       with respect to a reversed alphabet-ordering.
268
269
       This is what we want to happen:
270
           >>> x = "GCAGAGAG"
271
           >>> cut, period = factorize(x)
272
           >>> x[:cut], (right := x[cut:])
273
           ('GC', 'AGAGAG')
274
           >>> period  # right half period
275
           2
276
           >>> right[period:] == right[:-period]
277
           True
278
279
       This is how the local period lines up in the above example:
280
                GC | AGAGAG
281
           AGAGAGC = AGAGAGC
282
       The length of this minimal repetition is 7, which is indeed the
283
       period of the original string. */
284
285
20
    Py_ssize_t cut1, period1, cut2, period2, cut, period;
286
20
    cut1 = STRINGLIB(_lex_search)(needle, len_needle, &period1, 0);
287
20
    cut2 = STRINGLIB(_lex_search)(needle, len_needle, &period2, 1);
288
289
    // Take the later cut.
290
20
    if (cut1 > cut2) {
291
20
        period = period1;
292
20
        cut = cut1;
293
20
    }
294
0
    else {
295
0
        period = period2;
296
0
        cut = cut2;
297
0
    }
298
299
20
    LOG("split: "); LOG_STRING(needle, cut);
300
20
    LOG(" + "); LOG_STRING(needle + cut, len_needle - cut);
301
20
    LOG("\n");
302
303
20
    *return_period = period;
304
20
    return cut;
305
20
}
Unexecuted instantiation: bytesobject.c:stringlib__factorize
Unexecuted instantiation: unicodeobject.c:asciilib__factorize
unicodeobject.c:ucs1lib__factorize
Line
Count
Source
252
20
{
253
    /* Do a "critical factorization", making it so that:
254
       >>> needle = (left := needle[:cut]) + (right := needle[cut:])
255
       where the "local period" of the cut is maximal.
256
257
       The local period of the cut is the minimal length of a string w
258
       such that (left endswith w or w endswith left)
259
       and (right startswith w or w startswith right).
260
261
       The Critical Factorization Theorem says that this maximal local
262
       period is the global period of the string.
263
264
       Crochemore and Perrin (1991) show that this cut can be computed
265
       as the later of two cuts: one that gives a lexicographically
266
       maximal right half, and one that gives the same with the
267
       with respect to a reversed alphabet-ordering.
268
269
       This is what we want to happen:
270
           >>> x = "GCAGAGAG"
271
           >>> cut, period = factorize(x)
272
           >>> x[:cut], (right := x[cut:])
273
           ('GC', 'AGAGAG')
274
           >>> period  # right half period
275
           2
276
           >>> right[period:] == right[:-period]
277
           True
278
279
       This is how the local period lines up in the above example:
280
                GC | AGAGAG
281
           AGAGAGC = AGAGAGC
282
       The length of this minimal repetition is 7, which is indeed the
283
       period of the original string. */
284
285
20
    Py_ssize_t cut1, period1, cut2, period2, cut, period;
286
20
    cut1 = STRINGLIB(_lex_search)(needle, len_needle, &period1, 0);
287
20
    cut2 = STRINGLIB(_lex_search)(needle, len_needle, &period2, 1);
288
289
    // Take the later cut.
290
20
    if (cut1 > cut2) {
291
20
        period = period1;
292
20
        cut = cut1;
293
20
    }
294
0
    else {
295
0
        period = period2;
296
0
        cut = cut2;
297
0
    }
298
299
20
    LOG("split: "); LOG_STRING(needle, cut);
300
20
    LOG(" + "); LOG_STRING(needle + cut, len_needle - cut);
301
20
    LOG("\n");
302
303
20
    *return_period = period;
304
20
    return cut;
305
20
}
Unexecuted instantiation: unicodeobject.c:ucs2lib__factorize
Unexecuted instantiation: unicodeobject.c:ucs4lib__factorize
Unexecuted instantiation: bytes_methods.c:stringlib__factorize
Unexecuted instantiation: bytearrayobject.c:stringlib__factorize
306
307
308
220
#define SHIFT_TYPE uint8_t
309
#define MAX_SHIFT UINT8_MAX
310
311
72.6k
#define TABLE_SIZE_BITS 6u
312
72.6k
#define TABLE_SIZE (1U << TABLE_SIZE_BITS)
313
71.3k
#define TABLE_MASK (TABLE_SIZE - 1U)
314
315
typedef struct STRINGLIB(_pre) {
316
    const STRINGLIB_CHAR *needle;
317
    Py_ssize_t len_needle;
318
    Py_ssize_t cut;
319
    Py_ssize_t period;
320
    Py_ssize_t gap;
321
    int is_periodic;
322
    SHIFT_TYPE table[TABLE_SIZE];
323
} STRINGLIB(prework);
324
325
326
static void
327
STRINGLIB(_preprocess)(const STRINGLIB_CHAR *needle, Py_ssize_t len_needle,
328
                       STRINGLIB(prework) *p)
329
20
{
330
20
    p->needle = needle;
331
20
    p->len_needle = len_needle;
332
20
    p->cut = STRINGLIB(_factorize)(needle, len_needle, &(p->period));
333
20
    assert(p->period + p->cut <= len_needle);
334
20
    p->is_periodic = (0 == memcmp(needle,
335
20
                                  needle + p->period,
336
20
                                  p->cut * STRINGLIB_SIZEOF_CHAR));
337
20
    if (p->is_periodic) {
338
0
        assert(p->cut <= len_needle/2);
339
0
        assert(p->cut < p->period);
340
0
    }
341
20
    else {
342
        // A lower bound on the period
343
20
        p->period = Py_MAX(p->cut, len_needle - p->cut) + 1;
344
20
    }
345
    // The gap between the last character and the previous
346
    // occurrence of an equivalent character (modulo TABLE_SIZE)
347
20
    p->gap = len_needle;
348
20
    STRINGLIB_CHAR last = needle[len_needle - 1] & TABLE_MASK;
349
140
    for (Py_ssize_t i = len_needle - 2; i >= 0; i--) {
350
140
        STRINGLIB_CHAR x = needle[i] & TABLE_MASK;
351
140
        if (x == last) {
352
20
            p->gap = len_needle - 1 - i;
353
20
            break;
354
20
        }
355
140
    }
356
    // Fill up a compressed Boyer-Moore "Bad Character" table
357
20
    Py_ssize_t not_found_shift = Py_MIN(len_needle, MAX_SHIFT);
358
1.30k
    for (Py_ssize_t i = 0; i < (Py_ssize_t)TABLE_SIZE; i++) {
359
1.28k
        p->table[i] = Py_SAFE_DOWNCAST(not_found_shift,
360
1.28k
                                       Py_ssize_t, SHIFT_TYPE);
361
1.28k
    }
362
220
    for (Py_ssize_t i = len_needle - not_found_shift; i < len_needle; i++) {
363
200
        SHIFT_TYPE shift = Py_SAFE_DOWNCAST(len_needle - 1 - i,
364
200
                                            Py_ssize_t, SHIFT_TYPE);
365
200
        p->table[needle[i] & TABLE_MASK] = shift;
366
200
    }
367
20
}
Unexecuted instantiation: bytesobject.c:stringlib__preprocess
Unexecuted instantiation: unicodeobject.c:asciilib__preprocess
unicodeobject.c:ucs1lib__preprocess
Line
Count
Source
329
20
{
330
20
    p->needle = needle;
331
20
    p->len_needle = len_needle;
332
20
    p->cut = STRINGLIB(_factorize)(needle, len_needle, &(p->period));
333
20
    assert(p->period + p->cut <= len_needle);
334
20
    p->is_periodic = (0 == memcmp(needle,
335
20
                                  needle + p->period,
336
20
                                  p->cut * STRINGLIB_SIZEOF_CHAR));
337
20
    if (p->is_periodic) {
338
0
        assert(p->cut <= len_needle/2);
339
0
        assert(p->cut < p->period);
340
0
    }
341
20
    else {
342
        // A lower bound on the period
343
20
        p->period = Py_MAX(p->cut, len_needle - p->cut) + 1;
344
20
    }
345
    // The gap between the last character and the previous
346
    // occurrence of an equivalent character (modulo TABLE_SIZE)
347
20
    p->gap = len_needle;
348
20
    STRINGLIB_CHAR last = needle[len_needle - 1] & TABLE_MASK;
349
140
    for (Py_ssize_t i = len_needle - 2; i >= 0; i--) {
350
140
        STRINGLIB_CHAR x = needle[i] & TABLE_MASK;
351
140
        if (x == last) {
352
20
            p->gap = len_needle - 1 - i;
353
20
            break;
354
20
        }
355
140
    }
356
    // Fill up a compressed Boyer-Moore "Bad Character" table
357
20
    Py_ssize_t not_found_shift = Py_MIN(len_needle, MAX_SHIFT);
358
1.30k
    for (Py_ssize_t i = 0; i < (Py_ssize_t)TABLE_SIZE; i++) {
359
1.28k
        p->table[i] = Py_SAFE_DOWNCAST(not_found_shift,
360
1.28k
                                       Py_ssize_t, SHIFT_TYPE);
361
1.28k
    }
362
220
    for (Py_ssize_t i = len_needle - not_found_shift; i < len_needle; i++) {
363
200
        SHIFT_TYPE shift = Py_SAFE_DOWNCAST(len_needle - 1 - i,
364
200
                                            Py_ssize_t, SHIFT_TYPE);
365
200
        p->table[needle[i] & TABLE_MASK] = shift;
366
200
    }
367
20
}
Unexecuted instantiation: unicodeobject.c:ucs2lib__preprocess
Unexecuted instantiation: unicodeobject.c:ucs4lib__preprocess
Unexecuted instantiation: bytes_methods.c:stringlib__preprocess
Unexecuted instantiation: bytearrayobject.c:stringlib__preprocess
368
369
static Py_ssize_t
370
STRINGLIB(_two_way)(const STRINGLIB_CHAR *haystack, Py_ssize_t len_haystack,
371
                    STRINGLIB(prework) *p)
372
20
{
373
    // Crochemore and Perrin's (1991) Two-Way algorithm.
374
    // See http://www-igm.univ-mlv.fr/~lecroq/string/node26.html#SECTION00260
375
20
    const Py_ssize_t len_needle = p->len_needle;
376
20
    const Py_ssize_t cut = p->cut;
377
20
    Py_ssize_t period = p->period;
378
20
    const STRINGLIB_CHAR *const needle = p->needle;
379
20
    const STRINGLIB_CHAR *window_last = haystack + len_needle - 1;
380
20
    const STRINGLIB_CHAR *const haystack_end = haystack + len_haystack;
381
20
    SHIFT_TYPE *table = p->table;
382
20
    const STRINGLIB_CHAR *window;
383
20
    LOG("===== Two-way: \"%s\" in \"%s\". =====\n", needle, haystack);
384
385
20
    Py_ssize_t gap = p->gap;
386
20
    Py_ssize_t gap_jump_end = Py_MIN(len_needle, cut + gap);
387
20
    if (p->is_periodic) {
388
0
        LOG("Needle is periodic.\n");
389
0
        Py_ssize_t memory = 0;
390
0
      periodicwindowloop:
391
0
        while (window_last < haystack_end) {
392
0
            assert(memory == 0);
393
0
            for (;;) {
394
0
                LOG_LINEUP();
395
0
                Py_ssize_t shift = table[(*window_last) & TABLE_MASK];
396
0
                window_last += shift;
397
0
                if (shift == 0) {
398
0
                    break;
399
0
                }
400
0
                if (window_last >= haystack_end) {
401
0
                    return -1;
402
0
                }
403
0
                LOG("Horspool skip\n");
404
0
            }
405
0
          no_shift:
406
0
            window = window_last - len_needle + 1;
407
0
            assert((window[len_needle - 1] & TABLE_MASK) ==
408
0
                   (needle[len_needle - 1] & TABLE_MASK));
409
0
            Py_ssize_t i = Py_MAX(cut, memory);
410
0
            for (; i < len_needle; i++) {
411
0
                if (needle[i] != window[i]) {
412
0
                    if (i < gap_jump_end) {
413
0
                        LOG("Early right half mismatch: jump by gap.\n");
414
0
                        assert(gap >= i - cut + 1);
415
0
                        window_last += gap;
416
0
                    }
417
0
                    else {
418
0
                        LOG("Late right half mismatch: jump by n (>gap)\n");
419
0
                        assert(i - cut + 1 > gap);
420
0
                        window_last += i - cut + 1;
421
0
                    }
422
0
                    memory = 0;
423
0
                    goto periodicwindowloop;
424
0
                }
425
0
            }
426
0
            for (i = memory; i < cut; i++) {
427
0
                if (needle[i] != window[i]) {
428
0
                    LOG("Left half does not match.\n");
429
0
                    window_last += period;
430
0
                    memory = len_needle - period;
431
0
                    if (window_last >= haystack_end) {
432
0
                        return -1;
433
0
                    }
434
0
                    Py_ssize_t shift = table[(*window_last) & TABLE_MASK];
435
0
                    if (shift) {
436
                        // A mismatch has been identified to the right
437
                        // of where i will next start, so we can jump
438
                        // at least as far as if the mismatch occurred
439
                        // on the first comparison.
440
0
                        Py_ssize_t mem_jump = Py_MAX(cut, memory) - cut + 1;
441
0
                        LOG("Skip with Memory.\n");
442
0
                        memory = 0;
443
0
                        window_last += Py_MAX(shift, mem_jump);
444
0
                        goto periodicwindowloop;
445
0
                    }
446
0
                    goto no_shift;
447
0
                }
448
0
            }
449
0
            LOG("Found a match!\n");
450
0
            return window - haystack;
451
0
        }
452
0
    }
453
20
    else {
454
20
        period = Py_MAX(gap, period);
455
20
        LOG("Needle is not periodic.\n");
456
13.9k
      windowloop:
457
13.9k
        while (window_last < haystack_end) {
458
71.0k
            for (;;) {
459
71.0k
                LOG_LINEUP();
460
71.0k
                Py_ssize_t shift = table[(*window_last) & TABLE_MASK];
461
71.0k
                window_last += shift;
462
71.0k
                if (shift == 0) {
463
13.9k
                    break;
464
13.9k
                }
465
57.0k
                if (window_last >= haystack_end) {
466
16
                    return -1;
467
16
                }
468
57.0k
                LOG("Horspool skip\n");
469
57.0k
            }
470
13.9k
            window = window_last - len_needle + 1;
471
13.9k
            assert((window[len_needle - 1] & TABLE_MASK) ==
472
13.9k
                   (needle[len_needle - 1] & TABLE_MASK));
473
13.9k
            Py_ssize_t i = cut;
474
14.1k
            for (; i < len_needle; i++) {
475
14.1k
                if (needle[i] != window[i]) {
476
13.8k
                    if (i < gap_jump_end) {
477
13.8k
                        LOG("Early right half mismatch: jump by gap.\n");
478
13.8k
                        assert(gap >= i - cut + 1);
479
13.8k
                        window_last += gap;
480
13.8k
                    }
481
0
                    else {
482
0
                        LOG("Late right half mismatch: jump by n (>gap)\n");
483
0
                        assert(i - cut + 1 > gap);
484
0
                        window_last += i - cut + 1;
485
0
                    }
486
13.8k
                    goto windowloop;
487
13.8k
                }
488
14.1k
            }
489
186
            for (Py_ssize_t i = 0; i < cut; i++) {
490
184
                if (needle[i] != window[i]) {
491
88
                    LOG("Left half does not match.\n");
492
88
                    window_last += period;
493
88
                    goto windowloop;
494
88
                }
495
184
            }
496
2
            LOG("Found a match!\n");
497
2
            return window - haystack;
498
90
        }
499
13.9k
    }
500
2
    LOG("Not found. Returning -1.\n");
501
2
    return -1;
502
20
}
Unexecuted instantiation: bytesobject.c:stringlib__two_way
Unexecuted instantiation: unicodeobject.c:asciilib__two_way
unicodeobject.c:ucs1lib__two_way
Line
Count
Source
372
20
{
373
    // Crochemore and Perrin's (1991) Two-Way algorithm.
374
    // See http://www-igm.univ-mlv.fr/~lecroq/string/node26.html#SECTION00260
375
20
    const Py_ssize_t len_needle = p->len_needle;
376
20
    const Py_ssize_t cut = p->cut;
377
20
    Py_ssize_t period = p->period;
378
20
    const STRINGLIB_CHAR *const needle = p->needle;
379
20
    const STRINGLIB_CHAR *window_last = haystack + len_needle - 1;
380
20
    const STRINGLIB_CHAR *const haystack_end = haystack + len_haystack;
381
20
    SHIFT_TYPE *table = p->table;
382
20
    const STRINGLIB_CHAR *window;
383
20
    LOG("===== Two-way: \"%s\" in \"%s\". =====\n", needle, haystack);
384
385
20
    Py_ssize_t gap = p->gap;
386
20
    Py_ssize_t gap_jump_end = Py_MIN(len_needle, cut + gap);
387
20
    if (p->is_periodic) {
388
0
        LOG("Needle is periodic.\n");
389
0
        Py_ssize_t memory = 0;
390
0
      periodicwindowloop:
391
0
        while (window_last < haystack_end) {
392
0
            assert(memory == 0);
393
0
            for (;;) {
394
0
                LOG_LINEUP();
395
0
                Py_ssize_t shift = table[(*window_last) & TABLE_MASK];
396
0
                window_last += shift;
397
0
                if (shift == 0) {
398
0
                    break;
399
0
                }
400
0
                if (window_last >= haystack_end) {
401
0
                    return -1;
402
0
                }
403
0
                LOG("Horspool skip\n");
404
0
            }
405
0
          no_shift:
406
0
            window = window_last - len_needle + 1;
407
0
            assert((window[len_needle - 1] & TABLE_MASK) ==
408
0
                   (needle[len_needle - 1] & TABLE_MASK));
409
0
            Py_ssize_t i = Py_MAX(cut, memory);
410
0
            for (; i < len_needle; i++) {
411
0
                if (needle[i] != window[i]) {
412
0
                    if (i < gap_jump_end) {
413
0
                        LOG("Early right half mismatch: jump by gap.\n");
414
0
                        assert(gap >= i - cut + 1);
415
0
                        window_last += gap;
416
0
                    }
417
0
                    else {
418
0
                        LOG("Late right half mismatch: jump by n (>gap)\n");
419
0
                        assert(i - cut + 1 > gap);
420
0
                        window_last += i - cut + 1;
421
0
                    }
422
0
                    memory = 0;
423
0
                    goto periodicwindowloop;
424
0
                }
425
0
            }
426
0
            for (i = memory; i < cut; i++) {
427
0
                if (needle[i] != window[i]) {
428
0
                    LOG("Left half does not match.\n");
429
0
                    window_last += period;
430
0
                    memory = len_needle - period;
431
0
                    if (window_last >= haystack_end) {
432
0
                        return -1;
433
0
                    }
434
0
                    Py_ssize_t shift = table[(*window_last) & TABLE_MASK];
435
0
                    if (shift) {
436
                        // A mismatch has been identified to the right
437
                        // of where i will next start, so we can jump
438
                        // at least as far as if the mismatch occurred
439
                        // on the first comparison.
440
0
                        Py_ssize_t mem_jump = Py_MAX(cut, memory) - cut + 1;
441
0
                        LOG("Skip with Memory.\n");
442
0
                        memory = 0;
443
0
                        window_last += Py_MAX(shift, mem_jump);
444
0
                        goto periodicwindowloop;
445
0
                    }
446
0
                    goto no_shift;
447
0
                }
448
0
            }
449
0
            LOG("Found a match!\n");
450
0
            return window - haystack;
451
0
        }
452
0
    }
453
20
    else {
454
20
        period = Py_MAX(gap, period);
455
20
        LOG("Needle is not periodic.\n");
456
13.9k
      windowloop:
457
13.9k
        while (window_last < haystack_end) {
458
71.0k
            for (;;) {
459
71.0k
                LOG_LINEUP();
460
71.0k
                Py_ssize_t shift = table[(*window_last) & TABLE_MASK];
461
71.0k
                window_last += shift;
462
71.0k
                if (shift == 0) {
463
13.9k
                    break;
464
13.9k
                }
465
57.0k
                if (window_last >= haystack_end) {
466
16
                    return -1;
467
16
                }
468
57.0k
                LOG("Horspool skip\n");
469
57.0k
            }
470
13.9k
            window = window_last - len_needle + 1;
471
13.9k
            assert((window[len_needle - 1] & TABLE_MASK) ==
472
13.9k
                   (needle[len_needle - 1] & TABLE_MASK));
473
13.9k
            Py_ssize_t i = cut;
474
14.1k
            for (; i < len_needle; i++) {
475
14.1k
                if (needle[i] != window[i]) {
476
13.8k
                    if (i < gap_jump_end) {
477
13.8k
                        LOG("Early right half mismatch: jump by gap.\n");
478
13.8k
                        assert(gap >= i - cut + 1);
479
13.8k
                        window_last += gap;
480
13.8k
                    }
481
0
                    else {
482
0
                        LOG("Late right half mismatch: jump by n (>gap)\n");
483
0
                        assert(i - cut + 1 > gap);
484
0
                        window_last += i - cut + 1;
485
0
                    }
486
13.8k
                    goto windowloop;
487
13.8k
                }
488
14.1k
            }
489
186
            for (Py_ssize_t i = 0; i < cut; i++) {
490
184
                if (needle[i] != window[i]) {
491
88
                    LOG("Left half does not match.\n");
492
88
                    window_last += period;
493
88
                    goto windowloop;
494
88
                }
495
184
            }
496
2
            LOG("Found a match!\n");
497
2
            return window - haystack;
498
90
        }
499
13.9k
    }
500
2
    LOG("Not found. Returning -1.\n");
501
2
    return -1;
502
20
}
Unexecuted instantiation: unicodeobject.c:ucs2lib__two_way
Unexecuted instantiation: unicodeobject.c:ucs4lib__two_way
Unexecuted instantiation: bytes_methods.c:stringlib__two_way
Unexecuted instantiation: bytearrayobject.c:stringlib__two_way
503
504
505
static Py_ssize_t
506
STRINGLIB(_two_way_find)(const STRINGLIB_CHAR *haystack,
507
                         Py_ssize_t len_haystack,
508
                         const STRINGLIB_CHAR *needle,
509
                         Py_ssize_t len_needle)
510
20
{
511
20
    LOG("###### Finding \"%s\" in \"%s\".\n", needle, haystack);
512
20
    STRINGLIB(prework) p;
513
20
    STRINGLIB(_preprocess)(needle, len_needle, &p);
514
20
    return STRINGLIB(_two_way)(haystack, len_haystack, &p);
515
20
}
Unexecuted instantiation: bytesobject.c:stringlib__two_way_find
Unexecuted instantiation: unicodeobject.c:asciilib__two_way_find
unicodeobject.c:ucs1lib__two_way_find
Line
Count
Source
510
20
{
511
20
    LOG("###### Finding \"%s\" in \"%s\".\n", needle, haystack);
512
20
    STRINGLIB(prework) p;
513
20
    STRINGLIB(_preprocess)(needle, len_needle, &p);
514
20
    return STRINGLIB(_two_way)(haystack, len_haystack, &p);
515
20
}
Unexecuted instantiation: unicodeobject.c:ucs2lib__two_way_find
Unexecuted instantiation: unicodeobject.c:ucs4lib__two_way_find
Unexecuted instantiation: bytes_methods.c:stringlib__two_way_find
Unexecuted instantiation: bytearrayobject.c:stringlib__two_way_find
516
517
518
static Py_ssize_t
519
STRINGLIB(_two_way_count)(const STRINGLIB_CHAR *haystack,
520
                          Py_ssize_t len_haystack,
521
                          const STRINGLIB_CHAR *needle,
522
                          Py_ssize_t len_needle,
523
                          Py_ssize_t maxcount)
524
0
{
525
0
    LOG("###### Counting \"%s\" in \"%s\".\n", needle, haystack);
526
0
    STRINGLIB(prework) p;
527
0
    STRINGLIB(_preprocess)(needle, len_needle, &p);
528
0
    Py_ssize_t index = 0, count = 0;
529
0
    while (1) {
530
0
        Py_ssize_t result;
531
0
        result = STRINGLIB(_two_way)(haystack + index,
532
0
                                     len_haystack - index, &p);
533
0
        if (result == -1) {
534
0
            return count;
535
0
        }
536
0
        count++;
537
0
        if (count == maxcount) {
538
0
            return maxcount;
539
0
        }
540
0
        index += result + len_needle;
541
0
    }
542
0
    return count;
543
0
}
Unexecuted instantiation: bytesobject.c:stringlib__two_way_count
Unexecuted instantiation: unicodeobject.c:asciilib__two_way_count
Unexecuted instantiation: unicodeobject.c:ucs1lib__two_way_count
Unexecuted instantiation: unicodeobject.c:ucs2lib__two_way_count
Unexecuted instantiation: unicodeobject.c:ucs4lib__two_way_count
Unexecuted instantiation: bytes_methods.c:stringlib__two_way_count
Unexecuted instantiation: bytearrayobject.c:stringlib__two_way_count
544
545
#undef SHIFT_TYPE
546
#undef NOT_FOUND
547
#undef SHIFT_OVERFLOW
548
#undef TABLE_SIZE_BITS
549
#undef TABLE_SIZE
550
#undef TABLE_MASK
551
552
#undef LOG
553
#undef LOG_STRING
554
#undef LOG_LINEUP
555
556
static inline Py_ssize_t
557
STRINGLIB(default_find)(const STRINGLIB_CHAR* s, Py_ssize_t n,
558
                        const STRINGLIB_CHAR* p, Py_ssize_t m,
559
                        Py_ssize_t maxcount, int mode)
560
37.8M
{
561
37.8M
    const Py_ssize_t w = n - m;
562
37.8M
    Py_ssize_t mlast = m - 1, count = 0;
563
37.8M
    Py_ssize_t gap = mlast;
564
37.8M
    const STRINGLIB_CHAR last = p[mlast];
565
37.8M
    const STRINGLIB_CHAR *const ss = &s[mlast];
566
567
37.8M
    unsigned long mask = 0;
568
214M
    for (Py_ssize_t i = 0; i < mlast; i++) {
569
176M
        STRINGLIB_BLOOM_ADD(mask, p[i]);
570
176M
        if (p[i] == last) {
571
1.23M
            gap = mlast - i - 1;
572
1.23M
        }
573
176M
    }
574
37.8M
    STRINGLIB_BLOOM_ADD(mask, last);
575
576
1.00G
    for (Py_ssize_t i = 0; i <= w; i++) {
577
979M
        if (ss[i] == last) {
578
            /* candidate match */
579
42.9M
            Py_ssize_t j;
580
66.7M
            for (j = 0; j < mlast; j++) {
581
44.1M
                if (s[i+j] != p[j]) {
582
20.3M
                    break;
583
20.3M
                }
584
44.1M
            }
585
42.9M
            if (j == mlast) {
586
                /* got a match! */
587
22.6M
                if (mode != FAST_COUNT) {
588
11.8M
                    return i;
589
11.8M
                }
590
10.7M
                count++;
591
10.7M
                if (count == maxcount) {
592
0
                    return maxcount;
593
0
                }
594
10.7M
                i = i + mlast;
595
10.7M
                continue;
596
10.7M
            }
597
            /* miss: check if next character is part of pattern */
598
20.3M
            if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) {
599
4.90M
                i = i + m;
600
4.90M
            }
601
15.4M
            else {
602
15.4M
                i = i + gap;
603
15.4M
            }
604
20.3M
        }
605
936M
        else {
606
            /* skip: check if next character is part of pattern */
607
936M
            if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) {
608
811M
                i = i + m;
609
811M
            }
610
936M
        }
611
979M
    }
612
26.0M
    return mode == FAST_COUNT ? count : -1;
613
37.8M
}
Unexecuted instantiation: bytesobject.c:stringlib_default_find
unicodeobject.c:asciilib_default_find
Line
Count
Source
560
2.29M
{
561
2.29M
    const Py_ssize_t w = n - m;
562
2.29M
    Py_ssize_t mlast = m - 1, count = 0;
563
2.29M
    Py_ssize_t gap = mlast;
564
2.29M
    const STRINGLIB_CHAR last = p[mlast];
565
2.29M
    const STRINGLIB_CHAR *const ss = &s[mlast];
566
567
2.29M
    unsigned long mask = 0;
568
4.99M
    for (Py_ssize_t i = 0; i < mlast; i++) {
569
2.70M
        STRINGLIB_BLOOM_ADD(mask, p[i]);
570
2.70M
        if (p[i] == last) {
571
38.3k
            gap = mlast - i - 1;
572
38.3k
        }
573
2.70M
    }
574
2.29M
    STRINGLIB_BLOOM_ADD(mask, last);
575
576
43.2M
    for (Py_ssize_t i = 0; i <= w; i++) {
577
43.1M
        if (ss[i] == last) {
578
            /* candidate match */
579
4.20M
            Py_ssize_t j;
580
6.84M
            for (j = 0; j < mlast; j++) {
581
4.59M
                if (s[i+j] != p[j]) {
582
1.95M
                    break;
583
1.95M
                }
584
4.59M
            }
585
4.20M
            if (j == mlast) {
586
                /* got a match! */
587
2.25M
                if (mode != FAST_COUNT) {
588
2.25M
                    return i;
589
2.25M
                }
590
0
                count++;
591
0
                if (count == maxcount) {
592
0
                    return maxcount;
593
0
                }
594
0
                i = i + mlast;
595
0
                continue;
596
0
            }
597
            /* miss: check if next character is part of pattern */
598
1.95M
            if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) {
599
99.9k
                i = i + m;
600
99.9k
            }
601
1.85M
            else {
602
1.85M
                i = i + gap;
603
1.85M
            }
604
1.95M
        }
605
38.9M
        else {
606
            /* skip: check if next character is part of pattern */
607
38.9M
            if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) {
608
35.2M
                i = i + m;
609
35.2M
            }
610
38.9M
        }
611
43.1M
    }
612
41.7k
    return mode == FAST_COUNT ? count : -1;
613
2.29M
}
unicodeobject.c:ucs1lib_default_find
Line
Count
Source
560
27.9M
{
561
27.9M
    const Py_ssize_t w = n - m;
562
27.9M
    Py_ssize_t mlast = m - 1, count = 0;
563
27.9M
    Py_ssize_t gap = mlast;
564
27.9M
    const STRINGLIB_CHAR last = p[mlast];
565
27.9M
    const STRINGLIB_CHAR *const ss = &s[mlast];
566
567
27.9M
    unsigned long mask = 0;
568
194M
    for (Py_ssize_t i = 0; i < mlast; i++) {
569
166M
        STRINGLIB_BLOOM_ADD(mask, p[i]);
570
166M
        if (p[i] == last) {
571
1.13M
            gap = mlast - i - 1;
572
1.13M
        }
573
166M
    }
574
27.9M
    STRINGLIB_BLOOM_ADD(mask, last);
575
576
448M
    for (Py_ssize_t i = 0; i <= w; i++) {
577
422M
        if (ss[i] == last) {
578
            /* candidate match */
579
14.8M
            Py_ssize_t j;
580
21.1M
            for (j = 0; j < mlast; j++) {
581
15.4M
                if (s[i+j] != p[j]) {
582
9.17M
                    break;
583
9.17M
                }
584
15.4M
            }
585
14.8M
            if (j == mlast) {
586
                /* got a match! */
587
5.69M
                if (mode != FAST_COUNT) {
588
2.08M
                    return i;
589
2.08M
                }
590
3.61M
                count++;
591
3.61M
                if (count == maxcount) {
592
0
                    return maxcount;
593
0
                }
594
3.61M
                i = i + mlast;
595
3.61M
                continue;
596
3.61M
            }
597
            /* miss: check if next character is part of pattern */
598
9.17M
            if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) {
599
2.04M
                i = i + m;
600
2.04M
            }
601
7.12M
            else {
602
7.12M
                i = i + gap;
603
7.12M
            }
604
9.17M
        }
605
407M
        else {
606
            /* skip: check if next character is part of pattern */
607
407M
            if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) {
608
299M
                i = i + m;
609
299M
            }
610
407M
        }
611
422M
    }
612
25.8M
    return mode == FAST_COUNT ? count : -1;
613
27.9M
}
unicodeobject.c:ucs2lib_default_find
Line
Count
Source
560
3.32M
{
561
3.32M
    const Py_ssize_t w = n - m;
562
3.32M
    Py_ssize_t mlast = m - 1, count = 0;
563
3.32M
    Py_ssize_t gap = mlast;
564
3.32M
    const STRINGLIB_CHAR last = p[mlast];
565
3.32M
    const STRINGLIB_CHAR *const ss = &s[mlast];
566
567
3.32M
    unsigned long mask = 0;
568
6.74M
    for (Py_ssize_t i = 0; i < mlast; i++) {
569
3.41M
        STRINGLIB_BLOOM_ADD(mask, p[i]);
570
3.41M
        if (p[i] == last) {
571
36.9k
            gap = mlast - i - 1;
572
36.9k
        }
573
3.41M
    }
574
3.32M
    STRINGLIB_BLOOM_ADD(mask, last);
575
576
311M
    for (Py_ssize_t i = 0; i <= w; i++) {
577
311M
        if (ss[i] == last) {
578
            /* candidate match */
579
10.4M
            Py_ssize_t j;
580
16.8M
            for (j = 0; j < mlast; j++) {
581
10.5M
                if (s[i+j] != p[j]) {
582
4.15M
                    break;
583
4.15M
                }
584
10.5M
            }
585
10.4M
            if (j == mlast) {
586
                /* got a match! */
587
6.31M
                if (mode != FAST_COUNT) {
588
3.22M
                    return i;
589
3.22M
                }
590
3.08M
                count++;
591
3.08M
                if (count == maxcount) {
592
0
                    return maxcount;
593
0
                }
594
3.08M
                i = i + mlast;
595
3.08M
                continue;
596
3.08M
            }
597
            /* miss: check if next character is part of pattern */
598
4.15M
            if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) {
599
1.12M
                i = i + m;
600
1.12M
            }
601
3.03M
            else {
602
3.03M
                i = i + gap;
603
3.03M
            }
604
4.15M
        }
605
301M
        else {
606
            /* skip: check if next character is part of pattern */
607
301M
            if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) {
608
294M
                i = i + m;
609
294M
            }
610
301M
        }
611
311M
    }
612
99.7k
    return mode == FAST_COUNT ? count : -1;
613
3.32M
}
unicodeobject.c:ucs4lib_default_find
Line
Count
Source
560
4.27M
{
561
4.27M
    const Py_ssize_t w = n - m;
562
4.27M
    Py_ssize_t mlast = m - 1, count = 0;
563
4.27M
    Py_ssize_t gap = mlast;
564
4.27M
    const STRINGLIB_CHAR last = p[mlast];
565
4.27M
    const STRINGLIB_CHAR *const ss = &s[mlast];
566
567
4.27M
    unsigned long mask = 0;
568
8.67M
    for (Py_ssize_t i = 0; i < mlast; i++) {
569
4.39M
        STRINGLIB_BLOOM_ADD(mask, p[i]);
570
4.39M
        if (p[i] == last) {
571
19.0k
            gap = mlast - i - 1;
572
19.0k
        }
573
4.39M
    }
574
4.27M
    STRINGLIB_BLOOM_ADD(mask, last);
575
576
200M
    for (Py_ssize_t i = 0; i <= w; i++) {
577
200M
        if (ss[i] == last) {
578
            /* candidate match */
579
13.3M
            Py_ssize_t j;
580
21.8M
            for (j = 0; j < mlast; j++) {
581
13.5M
                if (s[i+j] != p[j]) {
582
5.05M
                    break;
583
5.05M
                }
584
13.5M
            }
585
13.3M
            if (j == mlast) {
586
                /* got a match! */
587
8.34M
                if (mode != FAST_COUNT) {
588
4.24M
                    return i;
589
4.24M
                }
590
4.10M
                count++;
591
4.10M
                if (count == maxcount) {
592
0
                    return maxcount;
593
0
                }
594
4.10M
                i = i + mlast;
595
4.10M
                continue;
596
4.10M
            }
597
            /* miss: check if next character is part of pattern */
598
5.05M
            if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) {
599
1.61M
                i = i + m;
600
1.61M
            }
601
3.43M
            else {
602
3.43M
                i = i + gap;
603
3.43M
            }
604
5.05M
        }
605
186M
        else {
606
            /* skip: check if next character is part of pattern */
607
186M
            if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) {
608
180M
                i = i + m;
609
180M
            }
610
186M
        }
611
200M
    }
612
30.8k
    return mode == FAST_COUNT ? count : -1;
613
4.27M
}
bytes_methods.c:stringlib_default_find
Line
Count
Source
560
2.95k
{
561
2.95k
    const Py_ssize_t w = n - m;
562
2.95k
    Py_ssize_t mlast = m - 1, count = 0;
563
2.95k
    Py_ssize_t gap = mlast;
564
2.95k
    const STRINGLIB_CHAR last = p[mlast];
565
2.95k
    const STRINGLIB_CHAR *const ss = &s[mlast];
566
567
2.95k
    unsigned long mask = 0;
568
11.8k
    for (Py_ssize_t i = 0; i < mlast; i++) {
569
8.85k
        STRINGLIB_BLOOM_ADD(mask, p[i]);
570
8.85k
        if (p[i] == last) {
571
2.95k
            gap = mlast - i - 1;
572
2.95k
        }
573
8.85k
    }
574
2.95k
    STRINGLIB_BLOOM_ADD(mask, last);
575
576
1.48M
    for (Py_ssize_t i = 0; i <= w; i++) {
577
1.48M
        if (ss[i] == last) {
578
            /* candidate match */
579
35.2k
            Py_ssize_t j;
580
44.2k
            for (j = 0; j < mlast; j++) {
581
41.5k
                if (s[i+j] != p[j]) {
582
32.5k
                    break;
583
32.5k
                }
584
41.5k
            }
585
35.2k
            if (j == mlast) {
586
                /* got a match! */
587
2.72k
                if (mode != FAST_COUNT) {
588
2.72k
                    return i;
589
2.72k
                }
590
0
                count++;
591
0
                if (count == maxcount) {
592
0
                    return maxcount;
593
0
                }
594
0
                i = i + mlast;
595
0
                continue;
596
0
            }
597
            /* miss: check if next character is part of pattern */
598
32.5k
            if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) {
599
27.2k
                i = i + m;
600
27.2k
            }
601
5.29k
            else {
602
5.29k
                i = i + gap;
603
5.29k
            }
604
32.5k
        }
605
1.44M
        else {
606
            /* skip: check if next character is part of pattern */
607
1.44M
            if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) {
608
1.11M
                i = i + m;
609
1.11M
            }
610
1.44M
        }
611
1.48M
    }
612
226
    return mode == FAST_COUNT ? count : -1;
613
2.95k
}
Unexecuted instantiation: bytearrayobject.c:stringlib_default_find
614
615
616
static Py_ssize_t
617
STRINGLIB(adaptive_find)(const STRINGLIB_CHAR* s, Py_ssize_t n,
618
                         const STRINGLIB_CHAR* p, Py_ssize_t m,
619
                         Py_ssize_t maxcount, int mode)
620
0
{
621
0
    const Py_ssize_t w = n - m;
622
0
    Py_ssize_t mlast = m - 1, count = 0;
623
0
    Py_ssize_t gap = mlast;
624
0
    Py_ssize_t hits = 0, res;
625
0
    const STRINGLIB_CHAR last = p[mlast];
626
0
    const STRINGLIB_CHAR *const ss = &s[mlast];
627
628
0
    unsigned long mask = 0;
629
0
    for (Py_ssize_t i = 0; i < mlast; i++) {
630
0
        STRINGLIB_BLOOM_ADD(mask, p[i]);
631
0
        if (p[i] == last) {
632
0
            gap = mlast - i - 1;
633
0
        }
634
0
    }
635
0
    STRINGLIB_BLOOM_ADD(mask, last);
636
637
0
    for (Py_ssize_t i = 0; i <= w; i++) {
638
0
        if (ss[i] == last) {
639
            /* candidate match */
640
0
            Py_ssize_t j;
641
0
            for (j = 0; j < mlast; j++) {
642
0
                if (s[i+j] != p[j]) {
643
0
                    break;
644
0
                }
645
0
            }
646
0
            if (j == mlast) {
647
                /* got a match! */
648
0
                if (mode != FAST_COUNT) {
649
0
                    return i;
650
0
                }
651
0
                count++;
652
0
                if (count == maxcount) {
653
0
                    return maxcount;
654
0
                }
655
0
                i = i + mlast;
656
0
                continue;
657
0
            }
658
0
            hits += j + 1;
659
0
            if (hits > m / 4 && w - i > 2000) {
660
0
                if (mode == FAST_SEARCH) {
661
0
                    res = STRINGLIB(_two_way_find)(s + i, n - i, p, m);
662
0
                    return res == -1 ? -1 : res + i;
663
0
                }
664
0
                else {
665
0
                    res = STRINGLIB(_two_way_count)(s + i, n - i, p, m,
666
0
                                                    maxcount - count);
667
0
                    return res + count;
668
0
                }
669
0
            }
670
            /* miss: check if next character is part of pattern */
671
0
            if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) {
672
0
                i = i + m;
673
0
            }
674
0
            else {
675
0
                i = i + gap;
676
0
            }
677
0
        }
678
0
        else {
679
            /* skip: check if next character is part of pattern */
680
0
            if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) {
681
0
                i = i + m;
682
0
            }
683
0
        }
684
0
    }
685
0
    return mode == FAST_COUNT ? count : -1;
686
0
}
Unexecuted instantiation: bytesobject.c:stringlib_adaptive_find
Unexecuted instantiation: unicodeobject.c:asciilib_adaptive_find
Unexecuted instantiation: unicodeobject.c:ucs1lib_adaptive_find
Unexecuted instantiation: unicodeobject.c:ucs2lib_adaptive_find
Unexecuted instantiation: unicodeobject.c:ucs4lib_adaptive_find
Unexecuted instantiation: bytes_methods.c:stringlib_adaptive_find
Unexecuted instantiation: bytearrayobject.c:stringlib_adaptive_find
687
688
689
static Py_ssize_t
690
STRINGLIB(default_rfind)(const STRINGLIB_CHAR* s, Py_ssize_t n,
691
                         const STRINGLIB_CHAR* p, Py_ssize_t m,
692
                         Py_ssize_t maxcount, int mode)
693
7.90k
{
694
    /* create compressed boyer-moore delta 1 table */
695
7.90k
    unsigned long mask = 0;
696
7.90k
    Py_ssize_t i, j, mlast = m - 1, skip = m - 1, w = n - m;
697
698
    /* process pattern[0] outside the loop */
699
7.90k
    STRINGLIB_BLOOM_ADD(mask, p[0]);
700
    /* process pattern[:0:-1] */
701
31.6k
    for (i = mlast; i > 0; i--) {
702
23.7k
        STRINGLIB_BLOOM_ADD(mask, p[i]);
703
23.7k
        if (p[i] == p[0]) {
704
0
            skip = i - 1;
705
0
        }
706
23.7k
    }
707
708
1.69M
    for (i = w; i >= 0; i--) {
709
1.69M
        if (s[i] == p[0]) {
710
            /* candidate match */
711
80.7k
            for (j = mlast; j > 0; j--) {
712
72.9k
                if (s[i+j] != p[j]) {
713
48.7k
                    break;
714
48.7k
                }
715
72.9k
            }
716
56.5k
            if (j == 0) {
717
                /* got a match! */
718
7.80k
                return i;
719
7.80k
            }
720
            /* miss: check if previous character is part of pattern */
721
48.7k
            if (i > 0 && !STRINGLIB_BLOOM(mask, s[i-1])) {
722
46.7k
                i = i - m;
723
46.7k
            }
724
2.00k
            else {
725
2.00k
                i = i - skip;
726
2.00k
            }
727
48.7k
        }
728
1.64M
        else {
729
            /* skip: check if previous character is part of pattern */
730
1.64M
            if (i > 0 && !STRINGLIB_BLOOM(mask, s[i-1])) {
731
1.30M
                i = i - m;
732
1.30M
            }
733
1.64M
        }
734
1.69M
    }
735
105
    return -1;
736
7.90k
}
Unexecuted instantiation: bytesobject.c:stringlib_default_rfind
Unexecuted instantiation: unicodeobject.c:asciilib_default_rfind
Unexecuted instantiation: unicodeobject.c:ucs1lib_default_rfind
Unexecuted instantiation: unicodeobject.c:ucs2lib_default_rfind
Unexecuted instantiation: unicodeobject.c:ucs4lib_default_rfind
bytes_methods.c:stringlib_default_rfind
Line
Count
Source
693
7.90k
{
694
    /* create compressed boyer-moore delta 1 table */
695
7.90k
    unsigned long mask = 0;
696
7.90k
    Py_ssize_t i, j, mlast = m - 1, skip = m - 1, w = n - m;
697
698
    /* process pattern[0] outside the loop */
699
7.90k
    STRINGLIB_BLOOM_ADD(mask, p[0]);
700
    /* process pattern[:0:-1] */
701
31.6k
    for (i = mlast; i > 0; i--) {
702
23.7k
        STRINGLIB_BLOOM_ADD(mask, p[i]);
703
23.7k
        if (p[i] == p[0]) {
704
0
            skip = i - 1;
705
0
        }
706
23.7k
    }
707
708
1.69M
    for (i = w; i >= 0; i--) {
709
1.69M
        if (s[i] == p[0]) {
710
            /* candidate match */
711
80.7k
            for (j = mlast; j > 0; j--) {
712
72.9k
                if (s[i+j] != p[j]) {
713
48.7k
                    break;
714
48.7k
                }
715
72.9k
            }
716
56.5k
            if (j == 0) {
717
                /* got a match! */
718
7.80k
                return i;
719
7.80k
            }
720
            /* miss: check if previous character is part of pattern */
721
48.7k
            if (i > 0 && !STRINGLIB_BLOOM(mask, s[i-1])) {
722
46.7k
                i = i - m;
723
46.7k
            }
724
2.00k
            else {
725
2.00k
                i = i - skip;
726
2.00k
            }
727
48.7k
        }
728
1.64M
        else {
729
            /* skip: check if previous character is part of pattern */
730
1.64M
            if (i > 0 && !STRINGLIB_BLOOM(mask, s[i-1])) {
731
1.30M
                i = i - m;
732
1.30M
            }
733
1.64M
        }
734
1.69M
    }
735
105
    return -1;
736
7.90k
}
Unexecuted instantiation: bytearrayobject.c:stringlib_default_rfind
737
738
739
static inline Py_ssize_t
740
STRINGLIB(count_char)(const STRINGLIB_CHAR *s, Py_ssize_t n,
741
                      const STRINGLIB_CHAR p0, Py_ssize_t maxcount)
742
0
{
743
0
    Py_ssize_t i, count = 0;
744
0
    for (i = 0; i < n; i++) {
745
0
        if (s[i] == p0) {
746
0
            count++;
747
0
            if (count == maxcount) {
748
0
                return maxcount;
749
0
            }
750
0
        }
751
0
    }
752
0
    return count;
753
0
}
Unexecuted instantiation: bytesobject.c:stringlib_count_char
Unexecuted instantiation: unicodeobject.c:asciilib_count_char
Unexecuted instantiation: unicodeobject.c:ucs1lib_count_char
Unexecuted instantiation: unicodeobject.c:ucs2lib_count_char
Unexecuted instantiation: unicodeobject.c:ucs4lib_count_char
Unexecuted instantiation: bytes_methods.c:stringlib_count_char
Unexecuted instantiation: bytearrayobject.c:stringlib_count_char
754
755
756
static inline Py_ssize_t
757
STRINGLIB(count_char_no_maxcount)(const STRINGLIB_CHAR *s, Py_ssize_t n,
758
                                  const STRINGLIB_CHAR p0)
759
/* A specialized function of count_char that does not cut off at a maximum.
760
   As a result, the compiler is able to vectorize the loop. */
761
35.9M
{
762
35.9M
    Py_ssize_t count = 0;
763
4.68G
    for (Py_ssize_t i = 0; i < n; i++) {
764
4.64G
        if (s[i] == p0) {
765
883M
            count++;
766
883M
        }
767
4.64G
    }
768
35.9M
    return count;
769
35.9M
}
Unexecuted instantiation: bytesobject.c:stringlib_count_char_no_maxcount
Unexecuted instantiation: unicodeobject.c:asciilib_count_char_no_maxcount
unicodeobject.c:ucs1lib_count_char_no_maxcount
Line
Count
Source
761
23.4M
{
762
23.4M
    Py_ssize_t count = 0;
763
649M
    for (Py_ssize_t i = 0; i < n; i++) {
764
626M
        if (s[i] == p0) {
765
35.5M
            count++;
766
35.5M
        }
767
626M
    }
768
23.4M
    return count;
769
23.4M
}
unicodeobject.c:ucs2lib_count_char_no_maxcount
Line
Count
Source
761
4.36M
{
762
4.36M
    Py_ssize_t count = 0;
763
668M
    for (Py_ssize_t i = 0; i < n; i++) {
764
664M
        if (s[i] == p0) {
765
19.7M
            count++;
766
19.7M
        }
767
664M
    }
768
4.36M
    return count;
769
4.36M
}
unicodeobject.c:ucs4lib_count_char_no_maxcount
Line
Count
Source
761
2.42M
{
762
2.42M
    Py_ssize_t count = 0;
763
443M
    for (Py_ssize_t i = 0; i < n; i++) {
764
440M
        if (s[i] == p0) {
765
13.0M
            count++;
766
13.0M
        }
767
440M
    }
768
2.42M
    return count;
769
2.42M
}
bytes_methods.c:stringlib_count_char_no_maxcount
Line
Count
Source
761
5.69M
{
762
5.69M
    Py_ssize_t count = 0;
763
2.92G
    for (Py_ssize_t i = 0; i < n; i++) {
764
2.91G
        if (s[i] == p0) {
765
815M
            count++;
766
815M
        }
767
2.91G
    }
768
5.69M
    return count;
769
5.69M
}
Unexecuted instantiation: bytearrayobject.c:stringlib_count_char_no_maxcount
770
771
772
Py_LOCAL_INLINE(Py_ssize_t)
773
FASTSEARCH(const STRINGLIB_CHAR* s, Py_ssize_t n,
774
           const STRINGLIB_CHAR* p, Py_ssize_t m,
775
           Py_ssize_t maxcount, int mode)
776
94.9M
{
777
94.9M
    if (n < m || (mode == FAST_COUNT && maxcount == 0)) {
778
522
        return -1;
779
522
    }
780
781
    /* look for special cases */
782
94.9M
    if (m <= 1) {
783
57.0M
        if (m <= 0) {
784
0
            return -1;
785
0
        }
786
        /* use special case for 1-character strings */
787
57.0M
        if (mode == FAST_SEARCH)
788
21.0M
            return STRINGLIB(find_char)(s, n, p[0]);
789
35.9M
        else if (mode == FAST_RSEARCH)
790
56.6k
            return STRINGLIB(rfind_char)(s, n, p[0]);
791
35.9M
        else {
792
35.9M
            if (maxcount == PY_SSIZE_T_MAX) {
793
35.9M
                return STRINGLIB(count_char_no_maxcount)(s, n, p[0]);
794
35.9M
            }
795
0
            return STRINGLIB(count_char)(s, n, p[0], maxcount);
796
35.9M
        }
797
57.0M
    }
798
799
37.8M
    if (mode != FAST_RSEARCH) {
800
37.8M
        if (n < 2500 || (m < 100 && n < 30000) || m < 6) {
801
37.8M
            return STRINGLIB(default_find)(s, n, p, m, maxcount, mode);
802
37.8M
        }
803
20
        else if ((m >> 2) * 3 < (n >> 2)) {
804
            /* 33% threshold, but don't overflow. */
805
            /* For larger problems where the needle isn't a huge
806
               percentage of the size of the haystack, the relatively
807
               expensive O(m) startup cost of the two-way algorithm
808
               will surely pay off. */
809
20
            if (mode == FAST_SEARCH) {
810
20
                return STRINGLIB(_two_way_find)(s, n, p, m);
811
20
            }
812
0
            else {
813
0
                return STRINGLIB(_two_way_count)(s, n, p, m, maxcount);
814
0
            }
815
20
        }
816
0
        else {
817
            /* To ensure that we have good worst-case behavior,
818
               here's an adaptive version of the algorithm, where if
819
               we match O(m) characters without any matches of the
820
               entire needle, then we predict that the startup cost of
821
               the two-way algorithm will probably be worth it. */
822
0
            return STRINGLIB(adaptive_find)(s, n, p, m, maxcount, mode);
823
0
        }
824
37.8M
    }
825
7.90k
    else {
826
        /* FAST_RSEARCH */
827
7.90k
        return STRINGLIB(default_rfind)(s, n, p, m, maxcount, mode);
828
7.90k
    }
829
37.8M
}
bytesobject.c:fastsearch
Line
Count
Source
776
361k
{
777
361k
    if (n < m || (mode == FAST_COUNT && maxcount == 0)) {
778
0
        return -1;
779
0
    }
780
781
    /* look for special cases */
782
361k
    if (m <= 1) {
783
361k
        if (m <= 0) {
784
0
            return -1;
785
0
        }
786
        /* use special case for 1-character strings */
787
361k
        if (mode == FAST_SEARCH)
788
361k
            return STRINGLIB(find_char)(s, n, p[0]);
789
0
        else if (mode == FAST_RSEARCH)
790
0
            return STRINGLIB(rfind_char)(s, n, p[0]);
791
0
        else {
792
0
            if (maxcount == PY_SSIZE_T_MAX) {
793
0
                return STRINGLIB(count_char_no_maxcount)(s, n, p[0]);
794
0
            }
795
0
            return STRINGLIB(count_char)(s, n, p[0], maxcount);
796
0
        }
797
361k
    }
798
799
0
    if (mode != FAST_RSEARCH) {
800
0
        if (n < 2500 || (m < 100 && n < 30000) || m < 6) {
801
0
            return STRINGLIB(default_find)(s, n, p, m, maxcount, mode);
802
0
        }
803
0
        else if ((m >> 2) * 3 < (n >> 2)) {
804
            /* 33% threshold, but don't overflow. */
805
            /* For larger problems where the needle isn't a huge
806
               percentage of the size of the haystack, the relatively
807
               expensive O(m) startup cost of the two-way algorithm
808
               will surely pay off. */
809
0
            if (mode == FAST_SEARCH) {
810
0
                return STRINGLIB(_two_way_find)(s, n, p, m);
811
0
            }
812
0
            else {
813
0
                return STRINGLIB(_two_way_count)(s, n, p, m, maxcount);
814
0
            }
815
0
        }
816
0
        else {
817
            /* To ensure that we have good worst-case behavior,
818
               here's an adaptive version of the algorithm, where if
819
               we match O(m) characters without any matches of the
820
               entire needle, then we predict that the startup cost of
821
               the two-way algorithm will probably be worth it. */
822
0
            return STRINGLIB(adaptive_find)(s, n, p, m, maxcount, mode);
823
0
        }
824
0
    }
825
0
    else {
826
        /* FAST_RSEARCH */
827
0
        return STRINGLIB(default_rfind)(s, n, p, m, maxcount, mode);
828
0
    }
829
0
}
unicodeobject.c:asciilib_fastsearch
Line
Count
Source
776
7.69M
{
777
7.69M
    if (n < m || (mode == FAST_COUNT && maxcount == 0)) {
778
0
        return -1;
779
0
    }
780
781
    /* look for special cases */
782
7.69M
    if (m <= 1) {
783
5.40M
        if (m <= 0) {
784
0
            return -1;
785
0
        }
786
        /* use special case for 1-character strings */
787
5.40M
        if (mode == FAST_SEARCH)
788
5.34M
            return STRINGLIB(find_char)(s, n, p[0]);
789
56.6k
        else if (mode == FAST_RSEARCH)
790
56.6k
            return STRINGLIB(rfind_char)(s, n, p[0]);
791
0
        else {
792
0
            if (maxcount == PY_SSIZE_T_MAX) {
793
0
                return STRINGLIB(count_char_no_maxcount)(s, n, p[0]);
794
0
            }
795
0
            return STRINGLIB(count_char)(s, n, p[0], maxcount);
796
0
        }
797
5.40M
    }
798
799
2.29M
    if (mode != FAST_RSEARCH) {
800
2.29M
        if (n < 2500 || (m < 100 && n < 30000) || m < 6) {
801
2.29M
            return STRINGLIB(default_find)(s, n, p, m, maxcount, mode);
802
2.29M
        }
803
0
        else if ((m >> 2) * 3 < (n >> 2)) {
804
            /* 33% threshold, but don't overflow. */
805
            /* For larger problems where the needle isn't a huge
806
               percentage of the size of the haystack, the relatively
807
               expensive O(m) startup cost of the two-way algorithm
808
               will surely pay off. */
809
0
            if (mode == FAST_SEARCH) {
810
0
                return STRINGLIB(_two_way_find)(s, n, p, m);
811
0
            }
812
0
            else {
813
0
                return STRINGLIB(_two_way_count)(s, n, p, m, maxcount);
814
0
            }
815
0
        }
816
0
        else {
817
            /* To ensure that we have good worst-case behavior,
818
               here's an adaptive version of the algorithm, where if
819
               we match O(m) characters without any matches of the
820
               entire needle, then we predict that the startup cost of
821
               the two-way algorithm will probably be worth it. */
822
0
            return STRINGLIB(adaptive_find)(s, n, p, m, maxcount, mode);
823
0
        }
824
2.29M
    }
825
0
    else {
826
        /* FAST_RSEARCH */
827
0
        return STRINGLIB(default_rfind)(s, n, p, m, maxcount, mode);
828
0
    }
829
2.29M
}
unicodeobject.c:ucs1lib_fastsearch
Line
Count
Source
776
59.4M
{
777
59.4M
    if (n < m || (mode == FAST_COUNT && maxcount == 0)) {
778
0
        return -1;
779
0
    }
780
781
    /* look for special cases */
782
59.4M
    if (m <= 1) {
783
31.4M
        if (m <= 0) {
784
0
            return -1;
785
0
        }
786
        /* use special case for 1-character strings */
787
31.4M
        if (mode == FAST_SEARCH)
788
8.01M
            return STRINGLIB(find_char)(s, n, p[0]);
789
23.4M
        else if (mode == FAST_RSEARCH)
790
0
            return STRINGLIB(rfind_char)(s, n, p[0]);
791
23.4M
        else {
792
23.4M
            if (maxcount == PY_SSIZE_T_MAX) {
793
23.4M
                return STRINGLIB(count_char_no_maxcount)(s, n, p[0]);
794
23.4M
            }
795
0
            return STRINGLIB(count_char)(s, n, p[0], maxcount);
796
23.4M
        }
797
31.4M
    }
798
799
27.9M
    if (mode != FAST_RSEARCH) {
800
27.9M
        if (n < 2500 || (m < 100 && n < 30000) || m < 6) {
801
27.9M
            return STRINGLIB(default_find)(s, n, p, m, maxcount, mode);
802
27.9M
        }
803
20
        else if ((m >> 2) * 3 < (n >> 2)) {
804
            /* 33% threshold, but don't overflow. */
805
            /* For larger problems where the needle isn't a huge
806
               percentage of the size of the haystack, the relatively
807
               expensive O(m) startup cost of the two-way algorithm
808
               will surely pay off. */
809
20
            if (mode == FAST_SEARCH) {
810
20
                return STRINGLIB(_two_way_find)(s, n, p, m);
811
20
            }
812
0
            else {
813
0
                return STRINGLIB(_two_way_count)(s, n, p, m, maxcount);
814
0
            }
815
20
        }
816
0
        else {
817
            /* To ensure that we have good worst-case behavior,
818
               here's an adaptive version of the algorithm, where if
819
               we match O(m) characters without any matches of the
820
               entire needle, then we predict that the startup cost of
821
               the two-way algorithm will probably be worth it. */
822
0
            return STRINGLIB(adaptive_find)(s, n, p, m, maxcount, mode);
823
0
        }
824
27.9M
    }
825
0
    else {
826
        /* FAST_RSEARCH */
827
0
        return STRINGLIB(default_rfind)(s, n, p, m, maxcount, mode);
828
0
    }
829
27.9M
}
unicodeobject.c:ucs2lib_fastsearch
Line
Count
Source
776
11.6M
{
777
11.6M
    if (n < m || (mode == FAST_COUNT && maxcount == 0)) {
778
508
        return -1;
779
508
    }
780
781
    /* look for special cases */
782
11.6M
    if (m <= 1) {
783
8.30M
        if (m <= 0) {
784
0
            return -1;
785
0
        }
786
        /* use special case for 1-character strings */
787
8.30M
        if (mode == FAST_SEARCH)
788
3.94M
            return STRINGLIB(find_char)(s, n, p[0]);
789
4.36M
        else if (mode == FAST_RSEARCH)
790
0
            return STRINGLIB(rfind_char)(s, n, p[0]);
791
4.36M
        else {
792
4.36M
            if (maxcount == PY_SSIZE_T_MAX) {
793
4.36M
                return STRINGLIB(count_char_no_maxcount)(s, n, p[0]);
794
4.36M
            }
795
0
            return STRINGLIB(count_char)(s, n, p[0], maxcount);
796
4.36M
        }
797
8.30M
    }
798
799
3.32M
    if (mode != FAST_RSEARCH) {
800
3.32M
        if (n < 2500 || (m < 100 && n < 30000) || m < 6) {
801
3.32M
            return STRINGLIB(default_find)(s, n, p, m, maxcount, mode);
802
3.32M
        }
803
0
        else if ((m >> 2) * 3 < (n >> 2)) {
804
            /* 33% threshold, but don't overflow. */
805
            /* For larger problems where the needle isn't a huge
806
               percentage of the size of the haystack, the relatively
807
               expensive O(m) startup cost of the two-way algorithm
808
               will surely pay off. */
809
0
            if (mode == FAST_SEARCH) {
810
0
                return STRINGLIB(_two_way_find)(s, n, p, m);
811
0
            }
812
0
            else {
813
0
                return STRINGLIB(_two_way_count)(s, n, p, m, maxcount);
814
0
            }
815
0
        }
816
0
        else {
817
            /* To ensure that we have good worst-case behavior,
818
               here's an adaptive version of the algorithm, where if
819
               we match O(m) characters without any matches of the
820
               entire needle, then we predict that the startup cost of
821
               the two-way algorithm will probably be worth it. */
822
0
            return STRINGLIB(adaptive_find)(s, n, p, m, maxcount, mode);
823
0
        }
824
3.32M
    }
825
0
    else {
826
        /* FAST_RSEARCH */
827
0
        return STRINGLIB(default_rfind)(s, n, p, m, maxcount, mode);
828
0
    }
829
3.32M
}
unicodeobject.c:ucs4lib_fastsearch
Line
Count
Source
776
10.1M
{
777
10.1M
    if (n < m || (mode == FAST_COUNT && maxcount == 0)) {
778
0
        return -1;
779
0
    }
780
781
    /* look for special cases */
782
10.1M
    if (m <= 1) {
783
5.83M
        if (m <= 0) {
784
0
            return -1;
785
0
        }
786
        /* use special case for 1-character strings */
787
5.83M
        if (mode == FAST_SEARCH)
788
3.41M
            return STRINGLIB(find_char)(s, n, p[0]);
789
2.42M
        else if (mode == FAST_RSEARCH)
790
0
            return STRINGLIB(rfind_char)(s, n, p[0]);
791
2.42M
        else {
792
2.42M
            if (maxcount == PY_SSIZE_T_MAX) {
793
2.42M
                return STRINGLIB(count_char_no_maxcount)(s, n, p[0]);
794
2.42M
            }
795
0
            return STRINGLIB(count_char)(s, n, p[0], maxcount);
796
2.42M
        }
797
5.83M
    }
798
799
4.27M
    if (mode != FAST_RSEARCH) {
800
4.27M
        if (n < 2500 || (m < 100 && n < 30000) || m < 6) {
801
4.27M
            return STRINGLIB(default_find)(s, n, p, m, maxcount, mode);
802
4.27M
        }
803
0
        else if ((m >> 2) * 3 < (n >> 2)) {
804
            /* 33% threshold, but don't overflow. */
805
            /* For larger problems where the needle isn't a huge
806
               percentage of the size of the haystack, the relatively
807
               expensive O(m) startup cost of the two-way algorithm
808
               will surely pay off. */
809
0
            if (mode == FAST_SEARCH) {
810
0
                return STRINGLIB(_two_way_find)(s, n, p, m);
811
0
            }
812
0
            else {
813
0
                return STRINGLIB(_two_way_count)(s, n, p, m, maxcount);
814
0
            }
815
0
        }
816
0
        else {
817
            /* To ensure that we have good worst-case behavior,
818
               here's an adaptive version of the algorithm, where if
819
               we match O(m) characters without any matches of the
820
               entire needle, then we predict that the startup cost of
821
               the two-way algorithm will probably be worth it. */
822
0
            return STRINGLIB(adaptive_find)(s, n, p, m, maxcount, mode);
823
0
        }
824
4.27M
    }
825
0
    else {
826
        /* FAST_RSEARCH */
827
0
        return STRINGLIB(default_rfind)(s, n, p, m, maxcount, mode);
828
0
    }
829
4.27M
}
bytes_methods.c:fastsearch
Line
Count
Source
776
5.71M
{
777
5.71M
    if (n < m || (mode == FAST_COUNT && maxcount == 0)) {
778
14
        return -1;
779
14
    }
780
781
    /* look for special cases */
782
5.71M
    if (m <= 1) {
783
5.69M
        if (m <= 0) {
784
0
            return -1;
785
0
        }
786
        /* use special case for 1-character strings */
787
5.69M
        if (mode == FAST_SEARCH)
788
0
            return STRINGLIB(find_char)(s, n, p[0]);
789
5.69M
        else if (mode == FAST_RSEARCH)
790
0
            return STRINGLIB(rfind_char)(s, n, p[0]);
791
5.69M
        else {
792
5.69M
            if (maxcount == PY_SSIZE_T_MAX) {
793
5.69M
                return STRINGLIB(count_char_no_maxcount)(s, n, p[0]);
794
5.69M
            }
795
0
            return STRINGLIB(count_char)(s, n, p[0], maxcount);
796
5.69M
        }
797
5.69M
    }
798
799
10.8k
    if (mode != FAST_RSEARCH) {
800
2.95k
        if (n < 2500 || (m < 100 && n < 30000) || m < 6) {
801
2.95k
            return STRINGLIB(default_find)(s, n, p, m, maxcount, mode);
802
2.95k
        }
803
0
        else if ((m >> 2) * 3 < (n >> 2)) {
804
            /* 33% threshold, but don't overflow. */
805
            /* For larger problems where the needle isn't a huge
806
               percentage of the size of the haystack, the relatively
807
               expensive O(m) startup cost of the two-way algorithm
808
               will surely pay off. */
809
0
            if (mode == FAST_SEARCH) {
810
0
                return STRINGLIB(_two_way_find)(s, n, p, m);
811
0
            }
812
0
            else {
813
0
                return STRINGLIB(_two_way_count)(s, n, p, m, maxcount);
814
0
            }
815
0
        }
816
0
        else {
817
            /* To ensure that we have good worst-case behavior,
818
               here's an adaptive version of the algorithm, where if
819
               we match O(m) characters without any matches of the
820
               entire needle, then we predict that the startup cost of
821
               the two-way algorithm will probably be worth it. */
822
0
            return STRINGLIB(adaptive_find)(s, n, p, m, maxcount, mode);
823
0
        }
824
2.95k
    }
825
7.90k
    else {
826
        /* FAST_RSEARCH */
827
7.90k
        return STRINGLIB(default_rfind)(s, n, p, m, maxcount, mode);
828
7.90k
    }
829
10.8k
}
Unexecuted instantiation: bytearrayobject.c:fastsearch
830