Coverage Report

Created: 2026-01-17 06:16

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/cpython/Objects/stringlib/fastsearch.h
Line
Count
Source
1
/* stringlib: fastsearch implementation */
2
3
#define STRINGLIB_FASTSEARCH_H
4
5
/* fast search/count implementation, based on a mix between boyer-
6
   moore and horspool, with a few more bells and whistles on the top.
7
   for some more background, see:
8
   https://web.archive.org/web/20201107074620/http://effbot.org/zone/stringlib.htm */
9
10
/* note: fastsearch may access s[n], which isn't a problem when using
11
   Python's ordinary string types, but may cause problems if you're
12
   using this code in other contexts.  also, the count mode returns -1
13
   if there cannot possibly be a match in the target string, and 0 if
14
   it has actually checked for matches, but didn't find any.  callers
15
   beware! */
16
17
/* If the strings are long enough, use Crochemore and Perrin's Two-Way
18
   algorithm, which has worst-case O(n) runtime and best-case O(n/k).
19
   Also compute a table of shifts to achieve O(n/k) in more cases,
20
   and often (data dependent) deduce larger shifts than pure C&P can
21
   deduce. See stringlib_find_two_way_notes.txt in this folder for a
22
   detailed explanation. */
23
24
597M
#define FAST_COUNT 0
25
366M
#define FAST_SEARCH 1
26
104M
#define FAST_RSEARCH 2
27
28
#if LONG_BIT >= 128
29
#define STRINGLIB_BLOOM_WIDTH 128
30
#elif LONG_BIT >= 64
31
3.49G
#define STRINGLIB_BLOOM_WIDTH 64
32
#elif LONG_BIT >= 32
33
#define STRINGLIB_BLOOM_WIDTH 32
34
#else
35
#error "LONG_BIT is smaller than 32"
36
#endif
37
38
#define STRINGLIB_BLOOM_ADD(mask, ch) \
39
164M
    ((mask |= (1UL << ((ch) & (STRINGLIB_BLOOM_WIDTH -1)))))
40
#define STRINGLIB_BLOOM(mask, ch)     \
41
3.32G
    ((mask &  (1UL << ((ch) & (STRINGLIB_BLOOM_WIDTH -1)))))
42
43
#ifdef STRINGLIB_FAST_MEMCHR
44
320M
#  define MEMCHR_CUT_OFF 15
45
#else
46
66.2M
#  define MEMCHR_CUT_OFF 40
47
#endif
48
49
Py_LOCAL_INLINE(Py_ssize_t)
50
STRINGLIB(find_char)(const STRINGLIB_CHAR* s, Py_ssize_t n, STRINGLIB_CHAR ch)
51
386M
{
52
386M
    const STRINGLIB_CHAR *p, *e;
53
54
386M
    p = s;
55
386M
    e = s + n;
56
386M
    if (n > MEMCHR_CUT_OFF) {
57
#ifdef STRINGLIB_FAST_MEMCHR
58
92.4M
        p = STRINGLIB_FAST_MEMCHR(s, ch, n);
59
92.4M
        if (p != NULL)
60
90.0M
            return (p - s);
61
2.36M
        return -1;
62
#else
63
        /* use memchr if we can choose a needle without too many likely
64
           false positives */
65
        const STRINGLIB_CHAR *s1, *e1;
66
        unsigned char needle = ch & 0xff;
67
        /* If looking for a multiple of 256, we'd have too
68
           many false positives looking for the '\0' byte in UCS2
69
           and UCS4 representations. */
70
57.3M
        if (needle != 0) {
71
56.8M
            do {
72
56.8M
                void *candidate = memchr(p, needle,
73
56.8M
                                         (e - p) * sizeof(STRINGLIB_CHAR));
74
56.8M
                if (candidate == NULL)
75
595k
                    return -1;
76
56.2M
                s1 = p;
77
56.2M
                p = (const STRINGLIB_CHAR *)
78
56.2M
                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
79
56.2M
                if (*p == ch)
80
56.1M
                    return (p - s);
81
                /* False positive */
82
112k
                p++;
83
112k
                if (p - s1 > MEMCHR_CUT_OFF)
84
53.7k
                    continue;
85
58.6k
                if (e - p <= MEMCHR_CUT_OFF)
86
4.23k
                    break;
87
54.4k
                e1 = p + MEMCHR_CUT_OFF;
88
1.63M
                while (p != e1) {
89
1.59M
                    if (*p == ch)
90
22.7k
                        return (p - s);
91
1.57M
                    p++;
92
1.57M
                }
93
54.4k
            }
94
56.7M
            while (e - p > MEMCHR_CUT_OFF);
95
56.7M
        }
96
#endif
97
149M
    }
98
1.34G
    while (p < e) {
99
1.14G
        if (*p == ch)
100
33.1M
            return (p - s);
101
1.11G
        p++;
102
1.11G
    }
103
203M
    return -1;
104
236M
}
bytesobject.c:stringlib_find_char
Line
Count
Source
51
591k
{
52
591k
    const STRINGLIB_CHAR *p, *e;
53
54
591k
    p = s;
55
591k
    e = s + n;
56
591k
    if (n > MEMCHR_CUT_OFF) {
57
591k
#ifdef STRINGLIB_FAST_MEMCHR
58
591k
        p = STRINGLIB_FAST_MEMCHR(s, ch, n);
59
591k
        if (p != NULL)
60
586k
            return (p - s);
61
5.04k
        return -1;
62
#else
63
        /* use memchr if we can choose a needle without too many likely
64
           false positives */
65
        const STRINGLIB_CHAR *s1, *e1;
66
        unsigned char needle = ch & 0xff;
67
        /* If looking for a multiple of 256, we'd have too
68
           many false positives looking for the '\0' byte in UCS2
69
           and UCS4 representations. */
70
        if (needle != 0) {
71
            do {
72
                void *candidate = memchr(p, needle,
73
                                         (e - p) * sizeof(STRINGLIB_CHAR));
74
                if (candidate == NULL)
75
                    return -1;
76
                s1 = p;
77
                p = (const STRINGLIB_CHAR *)
78
                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
79
                if (*p == ch)
80
                    return (p - s);
81
                /* False positive */
82
                p++;
83
                if (p - s1 > MEMCHR_CUT_OFF)
84
                    continue;
85
                if (e - p <= MEMCHR_CUT_OFF)
86
                    break;
87
                e1 = p + MEMCHR_CUT_OFF;
88
                while (p != e1) {
89
                    if (*p == ch)
90
                        return (p - s);
91
                    p++;
92
                }
93
            }
94
            while (e - p > MEMCHR_CUT_OFF);
95
        }
96
#endif
97
591k
    }
98
30
    while (p < e) {
99
30
        if (*p == ch)
100
5
            return (p - s);
101
25
        p++;
102
25
    }
103
0
    return -1;
104
5
}
unicodeobject.c:ucs1lib_find_char
Line
Count
Source
51
231M
{
52
231M
    const STRINGLIB_CHAR *p, *e;
53
54
231M
    p = s;
55
231M
    e = s + n;
56
231M
    if (n > MEMCHR_CUT_OFF) {
57
21.5M
#ifdef STRINGLIB_FAST_MEMCHR
58
21.5M
        p = STRINGLIB_FAST_MEMCHR(s, ch, n);
59
21.5M
        if (p != NULL)
60
19.9M
            return (p - s);
61
1.56M
        return -1;
62
#else
63
        /* use memchr if we can choose a needle without too many likely
64
           false positives */
65
        const STRINGLIB_CHAR *s1, *e1;
66
        unsigned char needle = ch & 0xff;
67
        /* If looking for a multiple of 256, we'd have too
68
           many false positives looking for the '\0' byte in UCS2
69
           and UCS4 representations. */
70
        if (needle != 0) {
71
            do {
72
                void *candidate = memchr(p, needle,
73
                                         (e - p) * sizeof(STRINGLIB_CHAR));
74
                if (candidate == NULL)
75
                    return -1;
76
                s1 = p;
77
                p = (const STRINGLIB_CHAR *)
78
                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
79
                if (*p == ch)
80
                    return (p - s);
81
                /* False positive */
82
                p++;
83
                if (p - s1 > MEMCHR_CUT_OFF)
84
                    continue;
85
                if (e - p <= MEMCHR_CUT_OFF)
86
                    break;
87
                e1 = p + MEMCHR_CUT_OFF;
88
                while (p != e1) {
89
                    if (*p == ch)
90
                        return (p - s);
91
                    p++;
92
                }
93
            }
94
            while (e - p > MEMCHR_CUT_OFF);
95
        }
96
#endif
97
21.5M
    }
98
1.08G
    while (p < e) {
99
889M
        if (*p == ch)
100
13.8M
            return (p - s);
101
875M
        p++;
102
875M
    }
103
196M
    return -1;
104
209M
}
unicodeobject.c:ucs2lib_find_char
Line
Count
Source
51
65.9M
{
52
65.9M
    const STRINGLIB_CHAR *p, *e;
53
54
65.9M
    p = s;
55
65.9M
    e = s + n;
56
65.9M
    if (n > MEMCHR_CUT_OFF) {
57
#ifdef STRINGLIB_FAST_MEMCHR
58
        p = STRINGLIB_FAST_MEMCHR(s, ch, n);
59
        if (p != NULL)
60
            return (p - s);
61
        return -1;
62
#else
63
        /* use memchr if we can choose a needle without too many likely
64
           false positives */
65
57.3M
        const STRINGLIB_CHAR *s1, *e1;
66
57.3M
        unsigned char needle = ch & 0xff;
67
        /* If looking for a multiple of 256, we'd have too
68
           many false positives looking for the '\0' byte in UCS2
69
           and UCS4 representations. */
70
57.3M
        if (needle != 0) {
71
56.8M
            do {
72
56.8M
                void *candidate = memchr(p, needle,
73
56.8M
                                         (e - p) * sizeof(STRINGLIB_CHAR));
74
56.8M
                if (candidate == NULL)
75
595k
                    return -1;
76
56.2M
                s1 = p;
77
56.2M
                p = (const STRINGLIB_CHAR *)
78
56.2M
                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
79
56.2M
                if (*p == ch)
80
56.1M
                    return (p - s);
81
                /* False positive */
82
112k
                p++;
83
112k
                if (p - s1 > MEMCHR_CUT_OFF)
84
53.7k
                    continue;
85
58.6k
                if (e - p <= MEMCHR_CUT_OFF)
86
4.23k
                    break;
87
54.4k
                e1 = p + MEMCHR_CUT_OFF;
88
1.63M
                while (p != e1) {
89
1.59M
                    if (*p == ch)
90
22.7k
                        return (p - s);
91
1.57M
                    p++;
92
1.57M
                }
93
54.4k
            }
94
56.7M
            while (e - p > MEMCHR_CUT_OFF);
95
56.7M
        }
96
57.3M
#endif
97
57.3M
    }
98
147M
    while (p < e) {
99
144M
        if (*p == ch)
100
5.98M
            return (p - s);
101
138M
        p++;
102
138M
    }
103
3.18M
    return -1;
104
9.16M
}
unicodeobject.c:ucs4lib_find_char
Line
Count
Source
51
45.1M
{
52
45.1M
    const STRINGLIB_CHAR *p, *e;
53
54
45.1M
    p = s;
55
45.1M
    e = s + n;
56
45.1M
    if (n > MEMCHR_CUT_OFF) {
57
45.0M
#ifdef STRINGLIB_FAST_MEMCHR
58
45.0M
        p = STRINGLIB_FAST_MEMCHR(s, ch, n);
59
45.0M
        if (p != NULL)
60
45.0M
            return (p - s);
61
32.3k
        return -1;
62
#else
63
        /* use memchr if we can choose a needle without too many likely
64
           false positives */
65
        const STRINGLIB_CHAR *s1, *e1;
66
        unsigned char needle = ch & 0xff;
67
        /* If looking for a multiple of 256, we'd have too
68
           many false positives looking for the '\0' byte in UCS2
69
           and UCS4 representations. */
70
        if (needle != 0) {
71
            do {
72
                void *candidate = memchr(p, needle,
73
                                         (e - p) * sizeof(STRINGLIB_CHAR));
74
                if (candidate == NULL)
75
                    return -1;
76
                s1 = p;
77
                p = (const STRINGLIB_CHAR *)
78
                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
79
                if (*p == ch)
80
                    return (p - s);
81
                /* False positive */
82
                p++;
83
                if (p - s1 > MEMCHR_CUT_OFF)
84
                    continue;
85
                if (e - p <= MEMCHR_CUT_OFF)
86
                    break;
87
                e1 = p + MEMCHR_CUT_OFF;
88
                while (p != e1) {
89
                    if (*p == ch)
90
                        return (p - s);
91
                    p++;
92
                }
93
            }
94
            while (e - p > MEMCHR_CUT_OFF);
95
        }
96
#endif
97
45.0M
    }
98
339k
    while (p < e) {
99
302k
        if (*p == ch)
100
48.0k
            return (p - s);
101
254k
        p++;
102
254k
    }
103
37.8k
    return -1;
104
85.9k
}
unicodeobject.c:asciilib_find_char
Line
Count
Source
51
24.0M
{
52
24.0M
    const STRINGLIB_CHAR *p, *e;
53
54
24.0M
    p = s;
55
24.0M
    e = s + n;
56
24.0M
    if (n > MEMCHR_CUT_OFF) {
57
19.4M
#ifdef STRINGLIB_FAST_MEMCHR
58
19.4M
        p = STRINGLIB_FAST_MEMCHR(s, ch, n);
59
19.4M
        if (p != NULL)
60
18.7M
            return (p - s);
61
678k
        return -1;
62
#else
63
        /* use memchr if we can choose a needle without too many likely
64
           false positives */
65
        const STRINGLIB_CHAR *s1, *e1;
66
        unsigned char needle = ch & 0xff;
67
        /* If looking for a multiple of 256, we'd have too
68
           many false positives looking for the '\0' byte in UCS2
69
           and UCS4 representations. */
70
        if (needle != 0) {
71
            do {
72
                void *candidate = memchr(p, needle,
73
                                         (e - p) * sizeof(STRINGLIB_CHAR));
74
                if (candidate == NULL)
75
                    return -1;
76
                s1 = p;
77
                p = (const STRINGLIB_CHAR *)
78
                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
79
                if (*p == ch)
80
                    return (p - s);
81
                /* False positive */
82
                p++;
83
                if (p - s1 > MEMCHR_CUT_OFF)
84
                    continue;
85
                if (e - p <= MEMCHR_CUT_OFF)
86
                    break;
87
                e1 = p + MEMCHR_CUT_OFF;
88
                while (p != e1) {
89
                    if (*p == ch)
90
                        return (p - s);
91
                    p++;
92
                }
93
            }
94
            while (e - p > MEMCHR_CUT_OFF);
95
        }
96
#endif
97
19.4M
    }
98
12.0M
    while (p < e) {
99
12.0M
        if (*p == ch)
100
4.53M
            return (p - s);
101
7.50M
        p++;
102
7.50M
    }
103
61.7k
    return -1;
104
4.59M
}
bytes_methods.c:stringlib_find_char
Line
Count
Source
51
18.9M
{
52
18.9M
    const STRINGLIB_CHAR *p, *e;
53
54
18.9M
    p = s;
55
18.9M
    e = s + n;
56
18.9M
    if (n > MEMCHR_CUT_OFF) {
57
5.78M
#ifdef STRINGLIB_FAST_MEMCHR
58
5.78M
        p = STRINGLIB_FAST_MEMCHR(s, ch, n);
59
5.78M
        if (p != NULL)
60
5.69M
            return (p - s);
61
88.6k
        return -1;
62
#else
63
        /* use memchr if we can choose a needle without too many likely
64
           false positives */
65
        const STRINGLIB_CHAR *s1, *e1;
66
        unsigned char needle = ch & 0xff;
67
        /* If looking for a multiple of 256, we'd have too
68
           many false positives looking for the '\0' byte in UCS2
69
           and UCS4 representations. */
70
        if (needle != 0) {
71
            do {
72
                void *candidate = memchr(p, needle,
73
                                         (e - p) * sizeof(STRINGLIB_CHAR));
74
                if (candidate == NULL)
75
                    return -1;
76
                s1 = p;
77
                p = (const STRINGLIB_CHAR *)
78
                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
79
                if (*p == ch)
80
                    return (p - s);
81
                /* False positive */
82
                p++;
83
                if (p - s1 > MEMCHR_CUT_OFF)
84
                    continue;
85
                if (e - p <= MEMCHR_CUT_OFF)
86
                    break;
87
                e1 = p + MEMCHR_CUT_OFF;
88
                while (p != e1) {
89
                    if (*p == ch)
90
                        return (p - s);
91
                    p++;
92
                }
93
            }
94
            while (e - p > MEMCHR_CUT_OFF);
95
        }
96
#endif
97
5.78M
    }
98
103M
    while (p < e) {
99
98.9M
        if (*p == ch)
100
8.68M
            return (p - s);
101
90.2M
        p++;
102
90.2M
    }
103
4.45M
    return -1;
104
13.1M
}
Unexecuted instantiation: bytearrayobject.c:stringlib_find_char
105
106
#undef MEMCHR_CUT_OFF
107
108
#if STRINGLIB_SIZEOF_CHAR == 1
109
138k
#  define MEMRCHR_CUT_OFF 15
110
#else
111
393k
#  define MEMRCHR_CUT_OFF 40
112
#endif
113
114
115
Py_LOCAL_INLINE(Py_ssize_t)
116
STRINGLIB(rfind_char)(const STRINGLIB_CHAR* s, Py_ssize_t n, STRINGLIB_CHAR ch)
117
506k
{
118
506k
    const STRINGLIB_CHAR *p;
119
506k
#ifdef HAVE_MEMRCHR
120
    /* memrchr() is a GNU extension, available since glibc 2.1.91.  it
121
       doesn't seem as optimized as memchr(), but is still quite
122
       faster than our hand-written loop below. There is no wmemrchr
123
       for 4-byte chars. */
124
125
506k
    if (n > MEMRCHR_CUT_OFF) {
126
#if STRINGLIB_SIZEOF_CHAR == 1
127
        p = memrchr(s, ch, n);
128
79.2k
        if (p != NULL)
129
75.5k
            return (p - s);
130
3.61k
        return -1;
131
#else
132
        /* use memrchr if we can choose a needle without too many likely
133
           false positives */
134
        const STRINGLIB_CHAR *s1;
135
        Py_ssize_t n1;
136
        unsigned char needle = ch & 0xff;
137
        /* If looking for a multiple of 256, we'd have too
138
           many false positives looking for the '\0' byte in UCS2
139
           and UCS4 representations. */
140
293k
        if (needle != 0) {
141
299k
            do {
142
299k
                void *candidate = memrchr(s, needle,
143
299k
                                          n * sizeof(STRINGLIB_CHAR));
144
299k
                if (candidate == NULL)
145
1.22k
                    return -1;
146
298k
                n1 = n;
147
298k
                p = (const STRINGLIB_CHAR *)
148
298k
                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
149
298k
                n = p - s;
150
298k
                if (*p == ch)
151
289k
                    return n;
152
                /* False positive */
153
9.40k
                if (n1 - n > MEMRCHR_CUT_OFF)
154
4.23k
                    continue;
155
5.16k
                if (n <= MEMRCHR_CUT_OFF)
156
1.19k
                    break;
157
3.97k
                s1 = p - MEMRCHR_CUT_OFF;
158
143k
                while (p > s1) {
159
140k
                    p--;
160
140k
                    if (*p == ch)
161
790
                        return (p - s);
162
140k
                }
163
3.18k
                n = p - s;
164
3.18k
            }
165
293k
            while (n > MEMRCHR_CUT_OFF);
166
293k
        }
167
#endif
168
372k
    }
169
135k
#endif  /* HAVE_MEMRCHR */
170
135k
    p = s + n;
171
672k
    while (p > s) {
172
626k
        p--;
173
626k
        if (*p == ch)
174
89.5k
            return (p - s);
175
626k
    }
176
45.8k
    return -1;
177
135k
}
Unexecuted instantiation: bytesobject.c:stringlib_rfind_char
unicodeobject.c:ucs1lib_rfind_char
Line
Count
Source
117
69.2k
{
118
69.2k
    const STRINGLIB_CHAR *p;
119
69.2k
#ifdef HAVE_MEMRCHR
120
    /* memrchr() is a GNU extension, available since glibc 2.1.91.  it
121
       doesn't seem as optimized as memchr(), but is still quite
122
       faster than our hand-written loop below. There is no wmemrchr
123
       for 4-byte chars. */
124
125
69.2k
    if (n > MEMRCHR_CUT_OFF) {
126
65.6k
#if STRINGLIB_SIZEOF_CHAR == 1
127
65.6k
        p = memrchr(s, ch, n);
128
65.6k
        if (p != NULL)
129
64.5k
            return (p - s);
130
1.11k
        return -1;
131
#else
132
        /* use memrchr if we can choose a needle without too many likely
133
           false positives */
134
        const STRINGLIB_CHAR *s1;
135
        Py_ssize_t n1;
136
        unsigned char needle = ch & 0xff;
137
        /* If looking for a multiple of 256, we'd have too
138
           many false positives looking for the '\0' byte in UCS2
139
           and UCS4 representations. */
140
        if (needle != 0) {
141
            do {
142
                void *candidate = memrchr(s, needle,
143
                                          n * sizeof(STRINGLIB_CHAR));
144
                if (candidate == NULL)
145
                    return -1;
146
                n1 = n;
147
                p = (const STRINGLIB_CHAR *)
148
                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
149
                n = p - s;
150
                if (*p == ch)
151
                    return n;
152
                /* False positive */
153
                if (n1 - n > MEMRCHR_CUT_OFF)
154
                    continue;
155
                if (n <= MEMRCHR_CUT_OFF)
156
                    break;
157
                s1 = p - MEMRCHR_CUT_OFF;
158
                while (p > s1) {
159
                    p--;
160
                    if (*p == ch)
161
                        return (p - s);
162
                }
163
                n = p - s;
164
            }
165
            while (n > MEMRCHR_CUT_OFF);
166
        }
167
#endif
168
65.6k
    }
169
3.58k
#endif  /* HAVE_MEMRCHR */
170
3.58k
    p = s + n;
171
11.6k
    while (p > s) {
172
10.5k
        p--;
173
10.5k
        if (*p == ch)
174
2.57k
            return (p - s);
175
10.5k
    }
176
1.00k
    return -1;
177
3.58k
}
unicodeobject.c:ucs2lib_rfind_char
Line
Count
Source
117
263k
{
118
263k
    const STRINGLIB_CHAR *p;
119
263k
#ifdef HAVE_MEMRCHR
120
    /* memrchr() is a GNU extension, available since glibc 2.1.91.  it
121
       doesn't seem as optimized as memchr(), but is still quite
122
       faster than our hand-written loop below. There is no wmemrchr
123
       for 4-byte chars. */
124
125
263k
    if (n > MEMRCHR_CUT_OFF) {
126
#if STRINGLIB_SIZEOF_CHAR == 1
127
        p = memrchr(s, ch, n);
128
        if (p != NULL)
129
            return (p - s);
130
        return -1;
131
#else
132
        /* use memrchr if we can choose a needle without too many likely
133
           false positives */
134
219k
        const STRINGLIB_CHAR *s1;
135
219k
        Py_ssize_t n1;
136
219k
        unsigned char needle = ch & 0xff;
137
        /* If looking for a multiple of 256, we'd have too
138
           many false positives looking for the '\0' byte in UCS2
139
           and UCS4 representations. */
140
219k
        if (needle != 0) {
141
222k
            do {
142
222k
                void *candidate = memrchr(s, needle,
143
222k
                                          n * sizeof(STRINGLIB_CHAR));
144
222k
                if (candidate == NULL)
145
672
                    return -1;
146
221k
                n1 = n;
147
221k
                p = (const STRINGLIB_CHAR *)
148
221k
                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
149
221k
                n = p - s;
150
221k
                if (*p == ch)
151
217k
                    return n;
152
                /* False positive */
153
4.16k
                if (n1 - n > MEMRCHR_CUT_OFF)
154
1.48k
                    continue;
155
2.67k
                if (n <= MEMRCHR_CUT_OFF)
156
794
                    break;
157
1.88k
                s1 = p - MEMRCHR_CUT_OFF;
158
66.5k
                while (p > s1) {
159
65.1k
                    p--;
160
65.1k
                    if (*p == ch)
161
428
                        return (p - s);
162
65.1k
                }
163
1.45k
                n = p - s;
164
1.45k
            }
165
219k
            while (n > MEMRCHR_CUT_OFF);
166
219k
        }
167
219k
#endif
168
219k
    }
169
45.4k
#endif  /* HAVE_MEMRCHR */
170
45.4k
    p = s + n;
171
135k
    while (p > s) {
172
133k
        p--;
173
133k
        if (*p == ch)
174
43.4k
            return (p - s);
175
133k
    }
176
2.06k
    return -1;
177
45.4k
}
unicodeobject.c:ucs4lib_rfind_char
Line
Count
Source
117
103k
{
118
103k
    const STRINGLIB_CHAR *p;
119
103k
#ifdef HAVE_MEMRCHR
120
    /* memrchr() is a GNU extension, available since glibc 2.1.91.  it
121
       doesn't seem as optimized as memchr(), but is still quite
122
       faster than our hand-written loop below. There is no wmemrchr
123
       for 4-byte chars. */
124
125
103k
    if (n > MEMRCHR_CUT_OFF) {
126
#if STRINGLIB_SIZEOF_CHAR == 1
127
        p = memrchr(s, ch, n);
128
        if (p != NULL)
129
            return (p - s);
130
        return -1;
131
#else
132
        /* use memrchr if we can choose a needle without too many likely
133
           false positives */
134
73.8k
        const STRINGLIB_CHAR *s1;
135
73.8k
        Py_ssize_t n1;
136
73.8k
        unsigned char needle = ch & 0xff;
137
        /* If looking for a multiple of 256, we'd have too
138
           many false positives looking for the '\0' byte in UCS2
139
           and UCS4 representations. */
140
73.8k
        if (needle != 0) {
141
77.9k
            do {
142
77.9k
                void *candidate = memrchr(s, needle,
143
77.9k
                                          n * sizeof(STRINGLIB_CHAR));
144
77.9k
                if (candidate == NULL)
145
551
                    return -1;
146
77.3k
                n1 = n;
147
77.3k
                p = (const STRINGLIB_CHAR *)
148
77.3k
                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
149
77.3k
                n = p - s;
150
77.3k
                if (*p == ch)
151
72.1k
                    return n;
152
                /* False positive */
153
5.24k
                if (n1 - n > MEMRCHR_CUT_OFF)
154
2.75k
                    continue;
155
2.49k
                if (n <= MEMRCHR_CUT_OFF)
156
404
                    break;
157
2.08k
                s1 = p - MEMRCHR_CUT_OFF;
158
76.7k
                while (p > s1) {
159
75.0k
                    p--;
160
75.0k
                    if (*p == ch)
161
362
                        return (p - s);
162
75.0k
                }
163
1.72k
                n = p - s;
164
1.72k
            }
165
73.8k
            while (n > MEMRCHR_CUT_OFF);
166
73.8k
        }
167
73.8k
#endif
168
73.8k
    }
169
30.1k
#endif  /* HAVE_MEMRCHR */
170
30.1k
    p = s + n;
171
225k
    while (p > s) {
172
223k
        p--;
173
223k
        if (*p == ch)
174
28.4k
            return (p - s);
175
223k
    }
176
1.74k
    return -1;
177
30.1k
}
unicodeobject.c:asciilib_rfind_char
Line
Count
Source
117
49.8k
{
118
49.8k
    const STRINGLIB_CHAR *p;
119
49.8k
#ifdef HAVE_MEMRCHR
120
    /* memrchr() is a GNU extension, available since glibc 2.1.91.  it
121
       doesn't seem as optimized as memchr(), but is still quite
122
       faster than our hand-written loop below. There is no wmemrchr
123
       for 4-byte chars. */
124
125
49.8k
    if (n > MEMRCHR_CUT_OFF) {
126
10.3k
#if STRINGLIB_SIZEOF_CHAR == 1
127
10.3k
        p = memrchr(s, ch, n);
128
10.3k
        if (p != NULL)
129
10.1k
            return (p - s);
130
146
        return -1;
131
#else
132
        /* use memrchr if we can choose a needle without too many likely
133
           false positives */
134
        const STRINGLIB_CHAR *s1;
135
        Py_ssize_t n1;
136
        unsigned char needle = ch & 0xff;
137
        /* If looking for a multiple of 256, we'd have too
138
           many false positives looking for the '\0' byte in UCS2
139
           and UCS4 representations. */
140
        if (needle != 0) {
141
            do {
142
                void *candidate = memrchr(s, needle,
143
                                          n * sizeof(STRINGLIB_CHAR));
144
                if (candidate == NULL)
145
                    return -1;
146
                n1 = n;
147
                p = (const STRINGLIB_CHAR *)
148
                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
149
                n = p - s;
150
                if (*p == ch)
151
                    return n;
152
                /* False positive */
153
                if (n1 - n > MEMRCHR_CUT_OFF)
154
                    continue;
155
                if (n <= MEMRCHR_CUT_OFF)
156
                    break;
157
                s1 = p - MEMRCHR_CUT_OFF;
158
                while (p > s1) {
159
                    p--;
160
                    if (*p == ch)
161
                        return (p - s);
162
                }
163
                n = p - s;
164
            }
165
            while (n > MEMRCHR_CUT_OFF);
166
        }
167
#endif
168
10.3k
    }
169
39.5k
#endif  /* HAVE_MEMRCHR */
170
39.5k
    p = s + n;
171
216k
    while (p > s) {
172
186k
        p--;
173
186k
        if (*p == ch)
174
9.76k
            return (p - s);
175
186k
    }
176
29.8k
    return -1;
177
39.5k
}
bytes_methods.c:stringlib_rfind_char
Line
Count
Source
117
19.7k
{
118
19.7k
    const STRINGLIB_CHAR *p;
119
19.7k
#ifdef HAVE_MEMRCHR
120
    /* memrchr() is a GNU extension, available since glibc 2.1.91.  it
121
       doesn't seem as optimized as memchr(), but is still quite
122
       faster than our hand-written loop below. There is no wmemrchr
123
       for 4-byte chars. */
124
125
19.7k
    if (n > MEMRCHR_CUT_OFF) {
126
3.18k
#if STRINGLIB_SIZEOF_CHAR == 1
127
3.18k
        p = memrchr(s, ch, n);
128
3.18k
        if (p != NULL)
129
825
            return (p - s);
130
2.35k
        return -1;
131
#else
132
        /* use memrchr if we can choose a needle without too many likely
133
           false positives */
134
        const STRINGLIB_CHAR *s1;
135
        Py_ssize_t n1;
136
        unsigned char needle = ch & 0xff;
137
        /* If looking for a multiple of 256, we'd have too
138
           many false positives looking for the '\0' byte in UCS2
139
           and UCS4 representations. */
140
        if (needle != 0) {
141
            do {
142
                void *candidate = memrchr(s, needle,
143
                                          n * sizeof(STRINGLIB_CHAR));
144
                if (candidate == NULL)
145
                    return -1;
146
                n1 = n;
147
                p = (const STRINGLIB_CHAR *)
148
                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
149
                n = p - s;
150
                if (*p == ch)
151
                    return n;
152
                /* False positive */
153
                if (n1 - n > MEMRCHR_CUT_OFF)
154
                    continue;
155
                if (n <= MEMRCHR_CUT_OFF)
156
                    break;
157
                s1 = p - MEMRCHR_CUT_OFF;
158
                while (p > s1) {
159
                    p--;
160
                    if (*p == ch)
161
                        return (p - s);
162
                }
163
                n = p - s;
164
            }
165
            while (n > MEMRCHR_CUT_OFF);
166
        }
167
#endif
168
3.18k
    }
169
16.6k
#endif  /* HAVE_MEMRCHR */
170
16.6k
    p = s + n;
171
83.3k
    while (p > s) {
172
72.1k
        p--;
173
72.1k
        if (*p == ch)
174
5.38k
            return (p - s);
175
72.1k
    }
176
11.2k
    return -1;
177
16.6k
}
Unexecuted instantiation: bytearrayobject.c:stringlib_rfind_char
178
179
#undef MEMRCHR_CUT_OFF
180
181
/* Change to a 1 to see logging comments walk through the algorithm. */
182
#if 0 && STRINGLIB_SIZEOF_CHAR == 1
183
# define LOG(...) printf(__VA_ARGS__)
184
# define LOG_STRING(s, n) printf("\"%.*s\"", (int)(n), s)
185
# define LOG_LINEUP() do {                                         \
186
    LOG("> "); LOG_STRING(haystack, len_haystack); LOG("\n> ");    \
187
    LOG("%*s",(int)(window_last - haystack + 1 - len_needle), ""); \
188
    LOG_STRING(needle, len_needle); LOG("\n");                     \
189
} while(0)
190
#else
191
# define LOG(...)
192
# define LOG_STRING(s, n)
193
# define LOG_LINEUP()
194
#endif
195
196
Py_LOCAL_INLINE(Py_ssize_t)
197
STRINGLIB(_lex_search)(const STRINGLIB_CHAR *needle, Py_ssize_t len_needle,
198
                       Py_ssize_t *return_period, int invert_alphabet)
199
44
{
200
    /* Do a lexicographic search. Essentially this:
201
           >>> max(needle[i:] for i in range(len(needle)+1))
202
       Also find the period of the right half.   */
203
44
    Py_ssize_t max_suffix = 0;
204
44
    Py_ssize_t candidate = 1;
205
44
    Py_ssize_t k = 0;
206
    // The period of the right half.
207
44
    Py_ssize_t period = 1;
208
209
440
    while (candidate + k < len_needle) {
210
        // each loop increases candidate + k + max_suffix
211
396
        STRINGLIB_CHAR a = needle[candidate + k];
212
396
        STRINGLIB_CHAR b = needle[max_suffix + k];
213
        // check if the suffix at candidate is better than max_suffix
214
396
        if (invert_alphabet ? (b < a) : (a < b)) {
215
            // Fell short of max_suffix.
216
            // The next k + 1 characters are non-increasing
217
            // from candidate, so they won't start a maximal suffix.
218
286
            candidate += k + 1;
219
286
            k = 0;
220
            // We've ruled out any period smaller than what's
221
            // been scanned since max_suffix.
222
286
            period = candidate - max_suffix;
223
286
        }
224
110
        else if (a == b) {
225
22
            if (k + 1 != period) {
226
                // Keep scanning the equal strings
227
0
                k++;
228
0
            }
229
22
            else {
230
                // Matched a whole period.
231
                // Start matching the next period.
232
22
                candidate += period;
233
22
                k = 0;
234
22
            }
235
22
        }
236
88
        else {
237
            // Did better than max_suffix, so replace it.
238
88
            max_suffix = candidate;
239
88
            candidate++;
240
88
            k = 0;
241
88
            period = 1;
242
88
        }
243
396
    }
244
44
    *return_period = period;
245
44
    return max_suffix;
246
44
}
Unexecuted instantiation: bytesobject.c:stringlib__lex_search
Unexecuted instantiation: unicodeobject.c:asciilib__lex_search
unicodeobject.c:ucs1lib__lex_search
Line
Count
Source
199
44
{
200
    /* Do a lexicographic search. Essentially this:
201
           >>> max(needle[i:] for i in range(len(needle)+1))
202
       Also find the period of the right half.   */
203
44
    Py_ssize_t max_suffix = 0;
204
44
    Py_ssize_t candidate = 1;
205
44
    Py_ssize_t k = 0;
206
    // The period of the right half.
207
44
    Py_ssize_t period = 1;
208
209
440
    while (candidate + k < len_needle) {
210
        // each loop increases candidate + k + max_suffix
211
396
        STRINGLIB_CHAR a = needle[candidate + k];
212
396
        STRINGLIB_CHAR b = needle[max_suffix + k];
213
        // check if the suffix at candidate is better than max_suffix
214
396
        if (invert_alphabet ? (b < a) : (a < b)) {
215
            // Fell short of max_suffix.
216
            // The next k + 1 characters are non-increasing
217
            // from candidate, so they won't start a maximal suffix.
218
286
            candidate += k + 1;
219
286
            k = 0;
220
            // We've ruled out any period smaller than what's
221
            // been scanned since max_suffix.
222
286
            period = candidate - max_suffix;
223
286
        }
224
110
        else if (a == b) {
225
22
            if (k + 1 != period) {
226
                // Keep scanning the equal strings
227
0
                k++;
228
0
            }
229
22
            else {
230
                // Matched a whole period.
231
                // Start matching the next period.
232
22
                candidate += period;
233
22
                k = 0;
234
22
            }
235
22
        }
236
88
        else {
237
            // Did better than max_suffix, so replace it.
238
88
            max_suffix = candidate;
239
88
            candidate++;
240
88
            k = 0;
241
88
            period = 1;
242
88
        }
243
396
    }
244
44
    *return_period = period;
245
44
    return max_suffix;
246
44
}
Unexecuted instantiation: unicodeobject.c:ucs2lib__lex_search
Unexecuted instantiation: unicodeobject.c:ucs4lib__lex_search
Unexecuted instantiation: bytes_methods.c:stringlib__lex_search
Unexecuted instantiation: bytearrayobject.c:stringlib__lex_search
247
248
Py_LOCAL_INLINE(Py_ssize_t)
249
STRINGLIB(_factorize)(const STRINGLIB_CHAR *needle,
250
                      Py_ssize_t len_needle,
251
                      Py_ssize_t *return_period)
252
22
{
253
    /* Do a "critical factorization", making it so that:
254
       >>> needle = (left := needle[:cut]) + (right := needle[cut:])
255
       where the "local period" of the cut is maximal.
256
257
       The local period of the cut is the minimal length of a string w
258
       such that (left endswith w or w endswith left)
259
       and (right startswith w or w startswith right).
260
261
       The Critical Factorization Theorem says that this maximal local
262
       period is the global period of the string.
263
264
       Crochemore and Perrin (1991) show that this cut can be computed
265
       as the later of two cuts: one that gives a lexicographically
266
       maximal right half, and one that gives the same with the
267
       with respect to a reversed alphabet-ordering.
268
269
       This is what we want to happen:
270
           >>> x = "GCAGAGAG"
271
           >>> cut, period = factorize(x)
272
           >>> x[:cut], (right := x[cut:])
273
           ('GC', 'AGAGAG')
274
           >>> period  # right half period
275
           2
276
           >>> right[period:] == right[:-period]
277
           True
278
279
       This is how the local period lines up in the above example:
280
                GC | AGAGAG
281
           AGAGAGC = AGAGAGC
282
       The length of this minimal repetition is 7, which is indeed the
283
       period of the original string. */
284
285
22
    Py_ssize_t cut1, period1, cut2, period2, cut, period;
286
22
    cut1 = STRINGLIB(_lex_search)(needle, len_needle, &period1, 0);
287
22
    cut2 = STRINGLIB(_lex_search)(needle, len_needle, &period2, 1);
288
289
    // Take the later cut.
290
22
    if (cut1 > cut2) {
291
22
        period = period1;
292
22
        cut = cut1;
293
22
    }
294
0
    else {
295
0
        period = period2;
296
0
        cut = cut2;
297
0
    }
298
299
22
    LOG("split: "); LOG_STRING(needle, cut);
300
22
    LOG(" + "); LOG_STRING(needle + cut, len_needle - cut);
301
22
    LOG("\n");
302
303
22
    *return_period = period;
304
22
    return cut;
305
22
}
Unexecuted instantiation: bytesobject.c:stringlib__factorize
Unexecuted instantiation: unicodeobject.c:asciilib__factorize
unicodeobject.c:ucs1lib__factorize
Line
Count
Source
252
22
{
253
    /* Do a "critical factorization", making it so that:
254
       >>> needle = (left := needle[:cut]) + (right := needle[cut:])
255
       where the "local period" of the cut is maximal.
256
257
       The local period of the cut is the minimal length of a string w
258
       such that (left endswith w or w endswith left)
259
       and (right startswith w or w startswith right).
260
261
       The Critical Factorization Theorem says that this maximal local
262
       period is the global period of the string.
263
264
       Crochemore and Perrin (1991) show that this cut can be computed
265
       as the later of two cuts: one that gives a lexicographically
266
       maximal right half, and one that gives the same with the
267
       with respect to a reversed alphabet-ordering.
268
269
       This is what we want to happen:
270
           >>> x = "GCAGAGAG"
271
           >>> cut, period = factorize(x)
272
           >>> x[:cut], (right := x[cut:])
273
           ('GC', 'AGAGAG')
274
           >>> period  # right half period
275
           2
276
           >>> right[period:] == right[:-period]
277
           True
278
279
       This is how the local period lines up in the above example:
280
                GC | AGAGAG
281
           AGAGAGC = AGAGAGC
282
       The length of this minimal repetition is 7, which is indeed the
283
       period of the original string. */
284
285
22
    Py_ssize_t cut1, period1, cut2, period2, cut, period;
286
22
    cut1 = STRINGLIB(_lex_search)(needle, len_needle, &period1, 0);
287
22
    cut2 = STRINGLIB(_lex_search)(needle, len_needle, &period2, 1);
288
289
    // Take the later cut.
290
22
    if (cut1 > cut2) {
291
22
        period = period1;
292
22
        cut = cut1;
293
22
    }
294
0
    else {
295
0
        period = period2;
296
0
        cut = cut2;
297
0
    }
298
299
22
    LOG("split: "); LOG_STRING(needle, cut);
300
22
    LOG(" + "); LOG_STRING(needle + cut, len_needle - cut);
301
22
    LOG("\n");
302
303
22
    *return_period = period;
304
22
    return cut;
305
22
}
Unexecuted instantiation: unicodeobject.c:ucs2lib__factorize
Unexecuted instantiation: unicodeobject.c:ucs4lib__factorize
Unexecuted instantiation: bytes_methods.c:stringlib__factorize
Unexecuted instantiation: bytearrayobject.c:stringlib__factorize
306
307
308
242
#define SHIFT_TYPE uint8_t
309
#define MAX_SHIFT UINT8_MAX
310
311
79.7k
#define TABLE_SIZE_BITS 6u
312
79.7k
#define TABLE_SIZE (1U << TABLE_SIZE_BITS)
313
78.2k
#define TABLE_MASK (TABLE_SIZE - 1U)
314
315
typedef struct STRINGLIB(_pre) {
316
    const STRINGLIB_CHAR *needle;
317
    Py_ssize_t len_needle;
318
    Py_ssize_t cut;
319
    Py_ssize_t period;
320
    Py_ssize_t gap;
321
    int is_periodic;
322
    SHIFT_TYPE table[TABLE_SIZE];
323
} STRINGLIB(prework);
324
325
326
static void
327
STRINGLIB(_preprocess)(const STRINGLIB_CHAR *needle, Py_ssize_t len_needle,
328
                       STRINGLIB(prework) *p)
329
22
{
330
22
    p->needle = needle;
331
22
    p->len_needle = len_needle;
332
22
    p->cut = STRINGLIB(_factorize)(needle, len_needle, &(p->period));
333
22
    assert(p->period + p->cut <= len_needle);
334
22
    p->is_periodic = (0 == memcmp(needle,
335
22
                                  needle + p->period,
336
22
                                  p->cut * STRINGLIB_SIZEOF_CHAR));
337
22
    if (p->is_periodic) {
338
0
        assert(p->cut <= len_needle/2);
339
0
        assert(p->cut < p->period);
340
0
    }
341
22
    else {
342
        // A lower bound on the period
343
22
        p->period = Py_MAX(p->cut, len_needle - p->cut) + 1;
344
22
    }
345
    // The gap between the last character and the previous
346
    // occurrence of an equivalent character (modulo TABLE_SIZE)
347
22
    p->gap = len_needle;
348
22
    STRINGLIB_CHAR last = needle[len_needle - 1] & TABLE_MASK;
349
154
    for (Py_ssize_t i = len_needle - 2; i >= 0; i--) {
350
154
        STRINGLIB_CHAR x = needle[i] & TABLE_MASK;
351
154
        if (x == last) {
352
22
            p->gap = len_needle - 1 - i;
353
22
            break;
354
22
        }
355
154
    }
356
    // Fill up a compressed Boyer-Moore "Bad Character" table
357
22
    Py_ssize_t not_found_shift = Py_MIN(len_needle, MAX_SHIFT);
358
1.43k
    for (Py_ssize_t i = 0; i < (Py_ssize_t)TABLE_SIZE; i++) {
359
1.40k
        p->table[i] = Py_SAFE_DOWNCAST(not_found_shift,
360
1.40k
                                       Py_ssize_t, SHIFT_TYPE);
361
1.40k
    }
362
242
    for (Py_ssize_t i = len_needle - not_found_shift; i < len_needle; i++) {
363
220
        SHIFT_TYPE shift = Py_SAFE_DOWNCAST(len_needle - 1 - i,
364
220
                                            Py_ssize_t, SHIFT_TYPE);
365
220
        p->table[needle[i] & TABLE_MASK] = shift;
366
220
    }
367
22
}
Unexecuted instantiation: bytesobject.c:stringlib__preprocess
Unexecuted instantiation: unicodeobject.c:asciilib__preprocess
unicodeobject.c:ucs1lib__preprocess
Line
Count
Source
329
22
{
330
22
    p->needle = needle;
331
22
    p->len_needle = len_needle;
332
22
    p->cut = STRINGLIB(_factorize)(needle, len_needle, &(p->period));
333
22
    assert(p->period + p->cut <= len_needle);
334
22
    p->is_periodic = (0 == memcmp(needle,
335
22
                                  needle + p->period,
336
22
                                  p->cut * STRINGLIB_SIZEOF_CHAR));
337
22
    if (p->is_periodic) {
338
0
        assert(p->cut <= len_needle/2);
339
0
        assert(p->cut < p->period);
340
0
    }
341
22
    else {
342
        // A lower bound on the period
343
22
        p->period = Py_MAX(p->cut, len_needle - p->cut) + 1;
344
22
    }
345
    // The gap between the last character and the previous
346
    // occurrence of an equivalent character (modulo TABLE_SIZE)
347
22
    p->gap = len_needle;
348
22
    STRINGLIB_CHAR last = needle[len_needle - 1] & TABLE_MASK;
349
154
    for (Py_ssize_t i = len_needle - 2; i >= 0; i--) {
350
154
        STRINGLIB_CHAR x = needle[i] & TABLE_MASK;
351
154
        if (x == last) {
352
22
            p->gap = len_needle - 1 - i;
353
22
            break;
354
22
        }
355
154
    }
356
    // Fill up a compressed Boyer-Moore "Bad Character" table
357
22
    Py_ssize_t not_found_shift = Py_MIN(len_needle, MAX_SHIFT);
358
1.43k
    for (Py_ssize_t i = 0; i < (Py_ssize_t)TABLE_SIZE; i++) {
359
1.40k
        p->table[i] = Py_SAFE_DOWNCAST(not_found_shift,
360
1.40k
                                       Py_ssize_t, SHIFT_TYPE);
361
1.40k
    }
362
242
    for (Py_ssize_t i = len_needle - not_found_shift; i < len_needle; i++) {
363
220
        SHIFT_TYPE shift = Py_SAFE_DOWNCAST(len_needle - 1 - i,
364
220
                                            Py_ssize_t, SHIFT_TYPE);
365
220
        p->table[needle[i] & TABLE_MASK] = shift;
366
220
    }
367
22
}
Unexecuted instantiation: unicodeobject.c:ucs2lib__preprocess
Unexecuted instantiation: unicodeobject.c:ucs4lib__preprocess
Unexecuted instantiation: bytes_methods.c:stringlib__preprocess
Unexecuted instantiation: bytearrayobject.c:stringlib__preprocess
368
369
static Py_ssize_t
370
STRINGLIB(_two_way)(const STRINGLIB_CHAR *haystack, Py_ssize_t len_haystack,
371
                    STRINGLIB(prework) *p)
372
22
{
373
    // Crochemore and Perrin's (1991) Two-Way algorithm.
374
    // See http://www-igm.univ-mlv.fr/~lecroq/string/node26.html#SECTION00260
375
22
    const Py_ssize_t len_needle = p->len_needle;
376
22
    const Py_ssize_t cut = p->cut;
377
22
    Py_ssize_t period = p->period;
378
22
    const STRINGLIB_CHAR *const needle = p->needle;
379
22
    const STRINGLIB_CHAR *window_last = haystack + len_needle - 1;
380
22
    const STRINGLIB_CHAR *const haystack_end = haystack + len_haystack;
381
22
    SHIFT_TYPE *table = p->table;
382
22
    const STRINGLIB_CHAR *window;
383
22
    LOG("===== Two-way: \"%s\" in \"%s\". =====\n", needle, haystack);
384
385
22
    Py_ssize_t gap = p->gap;
386
22
    Py_ssize_t gap_jump_end = Py_MIN(len_needle, cut + gap);
387
22
    if (p->is_periodic) {
388
0
        LOG("Needle is periodic.\n");
389
0
        Py_ssize_t memory = 0;
390
0
      periodicwindowloop:
391
0
        while (window_last < haystack_end) {
392
0
            assert(memory == 0);
393
0
            for (;;) {
394
0
                LOG_LINEUP();
395
0
                Py_ssize_t shift = table[(*window_last) & TABLE_MASK];
396
0
                window_last += shift;
397
0
                if (shift == 0) {
398
0
                    break;
399
0
                }
400
0
                if (window_last >= haystack_end) {
401
0
                    return -1;
402
0
                }
403
0
                LOG("Horspool skip\n");
404
0
            }
405
0
          no_shift:
406
0
            window = window_last - len_needle + 1;
407
0
            assert((window[len_needle - 1] & TABLE_MASK) ==
408
0
                   (needle[len_needle - 1] & TABLE_MASK));
409
0
            Py_ssize_t i = Py_MAX(cut, memory);
410
0
            for (; i < len_needle; i++) {
411
0
                if (needle[i] != window[i]) {
412
0
                    if (i < gap_jump_end) {
413
0
                        LOG("Early right half mismatch: jump by gap.\n");
414
0
                        assert(gap >= i - cut + 1);
415
0
                        window_last += gap;
416
0
                    }
417
0
                    else {
418
0
                        LOG("Late right half mismatch: jump by n (>gap)\n");
419
0
                        assert(i - cut + 1 > gap);
420
0
                        window_last += i - cut + 1;
421
0
                    }
422
0
                    memory = 0;
423
0
                    goto periodicwindowloop;
424
0
                }
425
0
            }
426
0
            for (i = memory; i < cut; i++) {
427
0
                if (needle[i] != window[i]) {
428
0
                    LOG("Left half does not match.\n");
429
0
                    window_last += period;
430
0
                    memory = len_needle - period;
431
0
                    if (window_last >= haystack_end) {
432
0
                        return -1;
433
0
                    }
434
0
                    Py_ssize_t shift = table[(*window_last) & TABLE_MASK];
435
0
                    if (shift) {
436
                        // A mismatch has been identified to the right
437
                        // of where i will next start, so we can jump
438
                        // at least as far as if the mismatch occurred
439
                        // on the first comparison.
440
0
                        Py_ssize_t mem_jump = Py_MAX(cut, memory) - cut + 1;
441
0
                        LOG("Skip with Memory.\n");
442
0
                        memory = 0;
443
0
                        window_last += Py_MAX(shift, mem_jump);
444
0
                        goto periodicwindowloop;
445
0
                    }
446
0
                    goto no_shift;
447
0
                }
448
0
            }
449
0
            LOG("Found a match!\n");
450
0
            return window - haystack;
451
0
        }
452
0
    }
453
22
    else {
454
22
        period = Py_MAX(gap, period);
455
22
        LOG("Needle is not periodic.\n");
456
14.1k
      windowloop:
457
14.1k
        while (window_last < haystack_end) {
458
77.9k
            for (;;) {
459
77.9k
                LOG_LINEUP();
460
77.9k
                Py_ssize_t shift = table[(*window_last) & TABLE_MASK];
461
77.9k
                window_last += shift;
462
77.9k
                if (shift == 0) {
463
14.0k
                    break;
464
14.0k
                }
465
63.8k
                if (window_last >= haystack_end) {
466
17
                    return -1;
467
17
                }
468
63.8k
                LOG("Horspool skip\n");
469
63.8k
            }
470
14.0k
            window = window_last - len_needle + 1;
471
14.0k
            assert((window[len_needle - 1] & TABLE_MASK) ==
472
14.0k
                   (needle[len_needle - 1] & TABLE_MASK));
473
14.0k
            Py_ssize_t i = cut;
474
14.3k
            for (; i < len_needle; i++) {
475
14.2k
                if (needle[i] != window[i]) {
476
13.9k
                    if (i < gap_jump_end) {
477
13.9k
                        LOG("Early right half mismatch: jump by gap.\n");
478
13.9k
                        assert(gap >= i - cut + 1);
479
13.9k
                        window_last += gap;
480
13.9k
                    }
481
0
                    else {
482
0
                        LOG("Late right half mismatch: jump by n (>gap)\n");
483
0
                        assert(i - cut + 1 > gap);
484
0
                        window_last += i - cut + 1;
485
0
                    }
486
13.9k
                    goto windowloop;
487
13.9k
                }
488
14.2k
            }
489
176
            for (Py_ssize_t i = 0; i < cut; i++) {
490
174
                if (needle[i] != window[i]) {
491
118
                    LOG("Left half does not match.\n");
492
118
                    window_last += period;
493
118
                    goto windowloop;
494
118
                }
495
174
            }
496
2
            LOG("Found a match!\n");
497
2
            return window - haystack;
498
120
        }
499
14.1k
    }
500
3
    LOG("Not found. Returning -1.\n");
501
3
    return -1;
502
22
}
Unexecuted instantiation: bytesobject.c:stringlib__two_way
Unexecuted instantiation: unicodeobject.c:asciilib__two_way
unicodeobject.c:ucs1lib__two_way
Line
Count
Source
372
22
{
373
    // Crochemore and Perrin's (1991) Two-Way algorithm.
374
    // See http://www-igm.univ-mlv.fr/~lecroq/string/node26.html#SECTION00260
375
22
    const Py_ssize_t len_needle = p->len_needle;
376
22
    const Py_ssize_t cut = p->cut;
377
22
    Py_ssize_t period = p->period;
378
22
    const STRINGLIB_CHAR *const needle = p->needle;
379
22
    const STRINGLIB_CHAR *window_last = haystack + len_needle - 1;
380
22
    const STRINGLIB_CHAR *const haystack_end = haystack + len_haystack;
381
22
    SHIFT_TYPE *table = p->table;
382
22
    const STRINGLIB_CHAR *window;
383
22
    LOG("===== Two-way: \"%s\" in \"%s\". =====\n", needle, haystack);
384
385
22
    Py_ssize_t gap = p->gap;
386
22
    Py_ssize_t gap_jump_end = Py_MIN(len_needle, cut + gap);
387
22
    if (p->is_periodic) {
388
0
        LOG("Needle is periodic.\n");
389
0
        Py_ssize_t memory = 0;
390
0
      periodicwindowloop:
391
0
        while (window_last < haystack_end) {
392
0
            assert(memory == 0);
393
0
            for (;;) {
394
0
                LOG_LINEUP();
395
0
                Py_ssize_t shift = table[(*window_last) & TABLE_MASK];
396
0
                window_last += shift;
397
0
                if (shift == 0) {
398
0
                    break;
399
0
                }
400
0
                if (window_last >= haystack_end) {
401
0
                    return -1;
402
0
                }
403
0
                LOG("Horspool skip\n");
404
0
            }
405
0
          no_shift:
406
0
            window = window_last - len_needle + 1;
407
0
            assert((window[len_needle - 1] & TABLE_MASK) ==
408
0
                   (needle[len_needle - 1] & TABLE_MASK));
409
0
            Py_ssize_t i = Py_MAX(cut, memory);
410
0
            for (; i < len_needle; i++) {
411
0
                if (needle[i] != window[i]) {
412
0
                    if (i < gap_jump_end) {
413
0
                        LOG("Early right half mismatch: jump by gap.\n");
414
0
                        assert(gap >= i - cut + 1);
415
0
                        window_last += gap;
416
0
                    }
417
0
                    else {
418
0
                        LOG("Late right half mismatch: jump by n (>gap)\n");
419
0
                        assert(i - cut + 1 > gap);
420
0
                        window_last += i - cut + 1;
421
0
                    }
422
0
                    memory = 0;
423
0
                    goto periodicwindowloop;
424
0
                }
425
0
            }
426
0
            for (i = memory; i < cut; i++) {
427
0
                if (needle[i] != window[i]) {
428
0
                    LOG("Left half does not match.\n");
429
0
                    window_last += period;
430
0
                    memory = len_needle - period;
431
0
                    if (window_last >= haystack_end) {
432
0
                        return -1;
433
0
                    }
434
0
                    Py_ssize_t shift = table[(*window_last) & TABLE_MASK];
435
0
                    if (shift) {
436
                        // A mismatch has been identified to the right
437
                        // of where i will next start, so we can jump
438
                        // at least as far as if the mismatch occurred
439
                        // on the first comparison.
440
0
                        Py_ssize_t mem_jump = Py_MAX(cut, memory) - cut + 1;
441
0
                        LOG("Skip with Memory.\n");
442
0
                        memory = 0;
443
0
                        window_last += Py_MAX(shift, mem_jump);
444
0
                        goto periodicwindowloop;
445
0
                    }
446
0
                    goto no_shift;
447
0
                }
448
0
            }
449
0
            LOG("Found a match!\n");
450
0
            return window - haystack;
451
0
        }
452
0
    }
453
22
    else {
454
22
        period = Py_MAX(gap, period);
455
22
        LOG("Needle is not periodic.\n");
456
14.1k
      windowloop:
457
14.1k
        while (window_last < haystack_end) {
458
77.9k
            for (;;) {
459
77.9k
                LOG_LINEUP();
460
77.9k
                Py_ssize_t shift = table[(*window_last) & TABLE_MASK];
461
77.9k
                window_last += shift;
462
77.9k
                if (shift == 0) {
463
14.0k
                    break;
464
14.0k
                }
465
63.8k
                if (window_last >= haystack_end) {
466
17
                    return -1;
467
17
                }
468
63.8k
                LOG("Horspool skip\n");
469
63.8k
            }
470
14.0k
            window = window_last - len_needle + 1;
471
14.0k
            assert((window[len_needle - 1] & TABLE_MASK) ==
472
14.0k
                   (needle[len_needle - 1] & TABLE_MASK));
473
14.0k
            Py_ssize_t i = cut;
474
14.3k
            for (; i < len_needle; i++) {
475
14.2k
                if (needle[i] != window[i]) {
476
13.9k
                    if (i < gap_jump_end) {
477
13.9k
                        LOG("Early right half mismatch: jump by gap.\n");
478
13.9k
                        assert(gap >= i - cut + 1);
479
13.9k
                        window_last += gap;
480
13.9k
                    }
481
0
                    else {
482
0
                        LOG("Late right half mismatch: jump by n (>gap)\n");
483
0
                        assert(i - cut + 1 > gap);
484
0
                        window_last += i - cut + 1;
485
0
                    }
486
13.9k
                    goto windowloop;
487
13.9k
                }
488
14.2k
            }
489
176
            for (Py_ssize_t i = 0; i < cut; i++) {
490
174
                if (needle[i] != window[i]) {
491
118
                    LOG("Left half does not match.\n");
492
118
                    window_last += period;
493
118
                    goto windowloop;
494
118
                }
495
174
            }
496
2
            LOG("Found a match!\n");
497
2
            return window - haystack;
498
120
        }
499
14.1k
    }
500
3
    LOG("Not found. Returning -1.\n");
501
3
    return -1;
502
22
}
Unexecuted instantiation: unicodeobject.c:ucs2lib__two_way
Unexecuted instantiation: unicodeobject.c:ucs4lib__two_way
Unexecuted instantiation: bytes_methods.c:stringlib__two_way
Unexecuted instantiation: bytearrayobject.c:stringlib__two_way
503
504
505
static Py_ssize_t
506
STRINGLIB(_two_way_find)(const STRINGLIB_CHAR *haystack,
507
                         Py_ssize_t len_haystack,
508
                         const STRINGLIB_CHAR *needle,
509
                         Py_ssize_t len_needle)
510
22
{
511
22
    LOG("###### Finding \"%s\" in \"%s\".\n", needle, haystack);
512
22
    STRINGLIB(prework) p;
513
22
    STRINGLIB(_preprocess)(needle, len_needle, &p);
514
22
    return STRINGLIB(_two_way)(haystack, len_haystack, &p);
515
22
}
Unexecuted instantiation: bytesobject.c:stringlib__two_way_find
Unexecuted instantiation: unicodeobject.c:asciilib__two_way_find
unicodeobject.c:ucs1lib__two_way_find
Line
Count
Source
510
22
{
511
22
    LOG("###### Finding \"%s\" in \"%s\".\n", needle, haystack);
512
22
    STRINGLIB(prework) p;
513
22
    STRINGLIB(_preprocess)(needle, len_needle, &p);
514
22
    return STRINGLIB(_two_way)(haystack, len_haystack, &p);
515
22
}
Unexecuted instantiation: unicodeobject.c:ucs2lib__two_way_find
Unexecuted instantiation: unicodeobject.c:ucs4lib__two_way_find
Unexecuted instantiation: bytes_methods.c:stringlib__two_way_find
Unexecuted instantiation: bytearrayobject.c:stringlib__two_way_find
516
517
518
static Py_ssize_t
519
STRINGLIB(_two_way_count)(const STRINGLIB_CHAR *haystack,
520
                          Py_ssize_t len_haystack,
521
                          const STRINGLIB_CHAR *needle,
522
                          Py_ssize_t len_needle,
523
                          Py_ssize_t maxcount)
524
0
{
525
0
    LOG("###### Counting \"%s\" in \"%s\".\n", needle, haystack);
526
0
    STRINGLIB(prework) p;
527
0
    STRINGLIB(_preprocess)(needle, len_needle, &p);
528
0
    Py_ssize_t index = 0, count = 0;
529
0
    while (1) {
530
0
        Py_ssize_t result;
531
0
        result = STRINGLIB(_two_way)(haystack + index,
532
0
                                     len_haystack - index, &p);
533
0
        if (result == -1) {
534
0
            return count;
535
0
        }
536
0
        count++;
537
0
        if (count == maxcount) {
538
0
            return maxcount;
539
0
        }
540
0
        index += result + len_needle;
541
0
    }
542
0
    return count;
543
0
}
Unexecuted instantiation: bytesobject.c:stringlib__two_way_count
Unexecuted instantiation: unicodeobject.c:asciilib__two_way_count
Unexecuted instantiation: unicodeobject.c:ucs1lib__two_way_count
Unexecuted instantiation: unicodeobject.c:ucs2lib__two_way_count
Unexecuted instantiation: unicodeobject.c:ucs4lib__two_way_count
Unexecuted instantiation: bytes_methods.c:stringlib__two_way_count
Unexecuted instantiation: bytearrayobject.c:stringlib__two_way_count
544
545
#undef SHIFT_TYPE
546
#undef NOT_FOUND
547
#undef SHIFT_OVERFLOW
548
#undef TABLE_SIZE_BITS
549
#undef TABLE_SIZE
550
#undef TABLE_MASK
551
552
#undef LOG
553
#undef LOG_STRING
554
#undef LOG_LINEUP
555
556
static inline Py_ssize_t
557
STRINGLIB(default_find)(const STRINGLIB_CHAR* s, Py_ssize_t n,
558
                        const STRINGLIB_CHAR* p, Py_ssize_t m,
559
                        Py_ssize_t maxcount, int mode)
560
34.2M
{
561
34.2M
    const Py_ssize_t w = n - m;
562
34.2M
    Py_ssize_t mlast = m - 1, count = 0;
563
34.2M
    Py_ssize_t gap = mlast;
564
34.2M
    const STRINGLIB_CHAR last = p[mlast];
565
34.2M
    const STRINGLIB_CHAR *const ss = &s[mlast];
566
567
34.2M
    unsigned long mask = 0;
568
164M
    for (Py_ssize_t i = 0; i < mlast; i++) {
569
130M
        STRINGLIB_BLOOM_ADD(mask, p[i]);
570
130M
        if (p[i] == last) {
571
565k
            gap = mlast - i - 1;
572
565k
        }
573
130M
    }
574
34.2M
    STRINGLIB_BLOOM_ADD(mask, last);
575
576
3.37G
    for (Py_ssize_t i = 0; i <= w; i++) {
577
3.35G
        if (ss[i] == last) {
578
            /* candidate match */
579
48.1M
            Py_ssize_t j;
580
76.8M
            for (j = 0; j < mlast; j++) {
581
49.1M
                if (s[i+j] != p[j]) {
582
20.4M
                    break;
583
20.4M
                }
584
49.1M
            }
585
48.1M
            if (j == mlast) {
586
                /* got a match! */
587
27.7M
                if (mode != FAST_COUNT) {
588
14.4M
                    return i;
589
14.4M
                }
590
13.3M
                count++;
591
13.3M
                if (count == maxcount) {
592
0
                    return maxcount;
593
0
                }
594
13.3M
                i = i + mlast;
595
13.3M
                continue;
596
13.3M
            }
597
            /* miss: check if next character is part of pattern */
598
20.4M
            if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) {
599
5.19M
                i = i + m;
600
5.19M
            }
601
15.2M
            else {
602
15.2M
                i = i + gap;
603
15.2M
            }
604
20.4M
        }
605
3.31G
        else {
606
            /* skip: check if next character is part of pattern */
607
3.31G
            if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) {
608
3.19G
                i = i + m;
609
3.19G
            }
610
3.31G
        }
611
3.35G
    }
612
19.8M
    return mode == FAST_COUNT ? count : -1;
613
34.2M
}
Unexecuted instantiation: bytesobject.c:stringlib_default_find
unicodeobject.c:asciilib_default_find
Line
Count
Source
560
2.36M
{
561
2.36M
    const Py_ssize_t w = n - m;
562
2.36M
    Py_ssize_t mlast = m - 1, count = 0;
563
2.36M
    Py_ssize_t gap = mlast;
564
2.36M
    const STRINGLIB_CHAR last = p[mlast];
565
2.36M
    const STRINGLIB_CHAR *const ss = &s[mlast];
566
567
2.36M
    unsigned long mask = 0;
568
5.12M
    for (Py_ssize_t i = 0; i < mlast; i++) {
569
2.75M
        STRINGLIB_BLOOM_ADD(mask, p[i]);
570
2.75M
        if (p[i] == last) {
571
23.3k
            gap = mlast - i - 1;
572
23.3k
        }
573
2.75M
    }
574
2.36M
    STRINGLIB_BLOOM_ADD(mask, last);
575
576
129M
    for (Py_ssize_t i = 0; i <= w; i++) {
577
129M
        if (ss[i] == last) {
578
            /* candidate match */
579
4.46M
            Py_ssize_t j;
580
7.20M
            for (j = 0; j < mlast; j++) {
581
4.88M
                if (s[i+j] != p[j]) {
582
2.13M
                    break;
583
2.13M
                }
584
4.88M
            }
585
4.46M
            if (j == mlast) {
586
                /* got a match! */
587
2.32M
                if (mode != FAST_COUNT) {
588
2.32M
                    return i;
589
2.32M
                }
590
0
                count++;
591
0
                if (count == maxcount) {
592
0
                    return maxcount;
593
0
                }
594
0
                i = i + mlast;
595
0
                continue;
596
0
            }
597
            /* miss: check if next character is part of pattern */
598
2.13M
            if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) {
599
116k
                i = i + m;
600
116k
            }
601
2.02M
            else {
602
2.02M
                i = i + gap;
603
2.02M
            }
604
2.13M
        }
605
124M
        else {
606
            /* skip: check if next character is part of pattern */
607
124M
            if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) {
608
120M
                i = i + m;
609
120M
            }
610
124M
        }
611
129M
    }
612
35.0k
    return mode == FAST_COUNT ? count : -1;
613
2.36M
}
unicodeobject.c:ucs1lib_default_find
Line
Count
Source
560
21.8M
{
561
21.8M
    const Py_ssize_t w = n - m;
562
21.8M
    Py_ssize_t mlast = m - 1, count = 0;
563
21.8M
    Py_ssize_t gap = mlast;
564
21.8M
    const STRINGLIB_CHAR last = p[mlast];
565
21.8M
    const STRINGLIB_CHAR *const ss = &s[mlast];
566
567
21.8M
    unsigned long mask = 0;
568
139M
    for (Py_ssize_t i = 0; i < mlast; i++) {
569
117M
        STRINGLIB_BLOOM_ADD(mask, p[i]);
570
117M
        if (p[i] == last) {
571
475k
            gap = mlast - i - 1;
572
475k
        }
573
117M
    }
574
21.8M
    STRINGLIB_BLOOM_ADD(mask, last);
575
576
616M
    for (Py_ssize_t i = 0; i <= w; i++) {
577
597M
        if (ss[i] == last) {
578
            /* candidate match */
579
14.0M
            Py_ssize_t j;
580
20.2M
            for (j = 0; j < mlast; j++) {
581
14.4M
                if (s[i+j] != p[j]) {
582
8.27M
                    break;
583
8.27M
                }
584
14.4M
            }
585
14.0M
            if (j == mlast) {
586
                /* got a match! */
587
5.81M
                if (mode != FAST_COUNT) {
588
2.12M
                    return i;
589
2.12M
                }
590
3.68M
                count++;
591
3.68M
                if (count == maxcount) {
592
0
                    return maxcount;
593
0
                }
594
3.68M
                i = i + mlast;
595
3.68M
                continue;
596
3.68M
            }
597
            /* miss: check if next character is part of pattern */
598
8.27M
            if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) {
599
1.96M
                i = i + m;
600
1.96M
            }
601
6.31M
            else {
602
6.31M
                i = i + gap;
603
6.31M
            }
604
8.27M
        }
605
582M
        else {
606
            /* skip: check if next character is part of pattern */
607
582M
            if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) {
608
504M
                i = i + m;
609
504M
            }
610
582M
        }
611
597M
    }
612
19.6M
    return mode == FAST_COUNT ? count : -1;
613
21.8M
}
unicodeobject.c:ucs2lib_default_find
Line
Count
Source
560
3.89M
{
561
3.89M
    const Py_ssize_t w = n - m;
562
3.89M
    Py_ssize_t mlast = m - 1, count = 0;
563
3.89M
    Py_ssize_t gap = mlast;
564
3.89M
    const STRINGLIB_CHAR last = p[mlast];
565
3.89M
    const STRINGLIB_CHAR *const ss = &s[mlast];
566
567
3.89M
    unsigned long mask = 0;
568
8.00M
    for (Py_ssize_t i = 0; i < mlast; i++) {
569
4.11M
        STRINGLIB_BLOOM_ADD(mask, p[i]);
570
4.11M
        if (p[i] == last) {
571
39.1k
            gap = mlast - i - 1;
572
39.1k
        }
573
4.11M
    }
574
3.89M
    STRINGLIB_BLOOM_ADD(mask, last);
575
576
1.02G
    for (Py_ssize_t i = 0; i <= w; i++) {
577
1.02G
        if (ss[i] == last) {
578
            /* candidate match */
579
12.0M
            Py_ssize_t j;
580
19.5M
            for (j = 0; j < mlast; j++) {
581
12.2M
                if (s[i+j] != p[j]) {
582
4.80M
                    break;
583
4.80M
                }
584
12.2M
            }
585
12.0M
            if (j == mlast) {
586
                /* got a match! */
587
7.24M
                if (mode != FAST_COUNT) {
588
3.77M
                    return i;
589
3.77M
                }
590
3.47M
                count++;
591
3.47M
                if (count == maxcount) {
592
0
                    return maxcount;
593
0
                }
594
3.47M
                i = i + mlast;
595
3.47M
                continue;
596
3.47M
            }
597
            /* miss: check if next character is part of pattern */
598
4.80M
            if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) {
599
1.08M
                i = i + m;
600
1.08M
            }
601
3.71M
            else {
602
3.71M
                i = i + gap;
603
3.71M
            }
604
4.80M
        }
605
1.01G
        else {
606
            /* skip: check if next character is part of pattern */
607
1.01G
            if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) {
608
1.00G
                i = i + m;
609
1.00G
            }
610
1.01G
        }
611
1.02G
    }
612
121k
    return mode == FAST_COUNT ? count : -1;
613
3.89M
}
unicodeobject.c:ucs4lib_default_find
Line
Count
Source
560
6.23M
{
561
6.23M
    const Py_ssize_t w = n - m;
562
6.23M
    Py_ssize_t mlast = m - 1, count = 0;
563
6.23M
    Py_ssize_t gap = mlast;
564
6.23M
    const STRINGLIB_CHAR last = p[mlast];
565
6.23M
    const STRINGLIB_CHAR *const ss = &s[mlast];
566
567
6.23M
    unsigned long mask = 0;
568
12.4M
    for (Py_ssize_t i = 0; i < mlast; i++) {
569
6.26M
        STRINGLIB_BLOOM_ADD(mask, p[i]);
570
6.26M
        if (p[i] == last) {
571
24.5k
            gap = mlast - i - 1;
572
24.5k
        }
573
6.26M
    }
574
6.23M
    STRINGLIB_BLOOM_ADD(mask, last);
575
576
1.60G
    for (Py_ssize_t i = 0; i <= w; i++) {
577
1.60G
        if (ss[i] == last) {
578
            /* candidate match */
579
17.5M
            Py_ssize_t j;
580
29.9M
            for (j = 0; j < mlast; j++) {
581
17.5M
                if (s[i+j] != p[j]) {
582
5.17M
                    break;
583
5.17M
                }
584
17.5M
            }
585
17.5M
            if (j == mlast) {
586
                /* got a match! */
587
12.3M
                if (mode != FAST_COUNT) {
588
6.20M
                    return i;
589
6.20M
                }
590
6.14M
                count++;
591
6.14M
                if (count == maxcount) {
592
0
                    return maxcount;
593
0
                }
594
6.14M
                i = i + mlast;
595
6.14M
                continue;
596
6.14M
            }
597
            /* miss: check if next character is part of pattern */
598
5.17M
            if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) {
599
2.02M
                i = i + m;
600
2.02M
            }
601
3.14M
            else {
602
3.14M
                i = i + gap;
603
3.14M
            }
604
5.17M
        }
605
1.58G
        else {
606
            /* skip: check if next character is part of pattern */
607
1.58G
            if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) {
608
1.56G
                i = i + m;
609
1.56G
            }
610
1.58G
        }
611
1.60G
    }
612
30.0k
    return mode == FAST_COUNT ? count : -1;
613
6.23M
}
bytes_methods.c:stringlib_default_find
Line
Count
Source
560
2.82k
{
561
2.82k
    const Py_ssize_t w = n - m;
562
2.82k
    Py_ssize_t mlast = m - 1, count = 0;
563
2.82k
    Py_ssize_t gap = mlast;
564
2.82k
    const STRINGLIB_CHAR last = p[mlast];
565
2.82k
    const STRINGLIB_CHAR *const ss = &s[mlast];
566
567
2.82k
    unsigned long mask = 0;
568
11.3k
    for (Py_ssize_t i = 0; i < mlast; i++) {
569
8.48k
        STRINGLIB_BLOOM_ADD(mask, p[i]);
570
8.48k
        if (p[i] == last) {
571
2.82k
            gap = mlast - i - 1;
572
2.82k
        }
573
8.48k
    }
574
2.82k
    STRINGLIB_BLOOM_ADD(mask, last);
575
576
1.31M
    for (Py_ssize_t i = 0; i <= w; i++) {
577
1.31M
        if (ss[i] == last) {
578
            /* candidate match */
579
9.90k
            Py_ssize_t j;
580
17.9k
            for (j = 0; j < mlast; j++) {
581
15.3k
                if (s[i+j] != p[j]) {
582
7.30k
                    break;
583
7.30k
                }
584
15.3k
            }
585
9.90k
            if (j == mlast) {
586
                /* got a match! */
587
2.60k
                if (mode != FAST_COUNT) {
588
2.60k
                    return i;
589
2.60k
                }
590
0
                count++;
591
0
                if (count == maxcount) {
592
0
                    return maxcount;
593
0
                }
594
0
                i = i + mlast;
595
0
                continue;
596
0
            }
597
            /* miss: check if next character is part of pattern */
598
7.30k
            if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) {
599
548
                i = i + m;
600
548
            }
601
6.75k
            else {
602
6.75k
                i = i + gap;
603
6.75k
            }
604
7.30k
        }
605
1.30M
        else {
606
            /* skip: check if next character is part of pattern */
607
1.30M
            if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) {
608
68.6k
                i = i + m;
609
68.6k
            }
610
1.30M
        }
611
1.31M
    }
612
221
    return mode == FAST_COUNT ? count : -1;
613
2.82k
}
Unexecuted instantiation: bytearrayobject.c:stringlib_default_find
614
615
616
static Py_ssize_t
617
STRINGLIB(adaptive_find)(const STRINGLIB_CHAR* s, Py_ssize_t n,
618
                         const STRINGLIB_CHAR* p, Py_ssize_t m,
619
                         Py_ssize_t maxcount, int mode)
620
0
{
621
0
    const Py_ssize_t w = n - m;
622
0
    Py_ssize_t mlast = m - 1, count = 0;
623
0
    Py_ssize_t gap = mlast;
624
0
    Py_ssize_t hits = 0, res;
625
0
    const STRINGLIB_CHAR last = p[mlast];
626
0
    const STRINGLIB_CHAR *const ss = &s[mlast];
627
628
0
    unsigned long mask = 0;
629
0
    for (Py_ssize_t i = 0; i < mlast; i++) {
630
0
        STRINGLIB_BLOOM_ADD(mask, p[i]);
631
0
        if (p[i] == last) {
632
0
            gap = mlast - i - 1;
633
0
        }
634
0
    }
635
0
    STRINGLIB_BLOOM_ADD(mask, last);
636
637
0
    for (Py_ssize_t i = 0; i <= w; i++) {
638
0
        if (ss[i] == last) {
639
            /* candidate match */
640
0
            Py_ssize_t j;
641
0
            for (j = 0; j < mlast; j++) {
642
0
                if (s[i+j] != p[j]) {
643
0
                    break;
644
0
                }
645
0
            }
646
0
            if (j == mlast) {
647
                /* got a match! */
648
0
                if (mode != FAST_COUNT) {
649
0
                    return i;
650
0
                }
651
0
                count++;
652
0
                if (count == maxcount) {
653
0
                    return maxcount;
654
0
                }
655
0
                i = i + mlast;
656
0
                continue;
657
0
            }
658
0
            hits += j + 1;
659
0
            if (hits > m / 4 && w - i > 2000) {
660
0
                if (mode == FAST_SEARCH) {
661
0
                    res = STRINGLIB(_two_way_find)(s + i, n - i, p, m);
662
0
                    return res == -1 ? -1 : res + i;
663
0
                }
664
0
                else {
665
0
                    res = STRINGLIB(_two_way_count)(s + i, n - i, p, m,
666
0
                                                    maxcount - count);
667
0
                    return res + count;
668
0
                }
669
0
            }
670
            /* miss: check if next character is part of pattern */
671
0
            if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) {
672
0
                i = i + m;
673
0
            }
674
0
            else {
675
0
                i = i + gap;
676
0
            }
677
0
        }
678
0
        else {
679
            /* skip: check if next character is part of pattern */
680
0
            if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) {
681
0
                i = i + m;
682
0
            }
683
0
        }
684
0
    }
685
0
    return mode == FAST_COUNT ? count : -1;
686
0
}
Unexecuted instantiation: bytesobject.c:stringlib_adaptive_find
Unexecuted instantiation: unicodeobject.c:asciilib_adaptive_find
Unexecuted instantiation: unicodeobject.c:ucs1lib_adaptive_find
Unexecuted instantiation: unicodeobject.c:ucs2lib_adaptive_find
Unexecuted instantiation: unicodeobject.c:ucs4lib_adaptive_find
Unexecuted instantiation: bytes_methods.c:stringlib_adaptive_find
Unexecuted instantiation: bytearrayobject.c:stringlib_adaptive_find
687
688
689
static Py_ssize_t
690
STRINGLIB(default_rfind)(const STRINGLIB_CHAR* s, Py_ssize_t n,
691
                         const STRINGLIB_CHAR* p, Py_ssize_t m,
692
                         Py_ssize_t maxcount, int mode)
693
7.09k
{
694
    /* create compressed boyer-moore delta 1 table */
695
7.09k
    unsigned long mask = 0;
696
7.09k
    Py_ssize_t i, j, mlast = m - 1, skip = m - 1, w = n - m;
697
698
    /* process pattern[0] outside the loop */
699
7.09k
    STRINGLIB_BLOOM_ADD(mask, p[0]);
700
    /* process pattern[:0:-1] */
701
28.3k
    for (i = mlast; i > 0; i--) {
702
21.2k
        STRINGLIB_BLOOM_ADD(mask, p[i]);
703
21.2k
        if (p[i] == p[0]) {
704
0
            skip = i - 1;
705
0
        }
706
21.2k
    }
707
708
1.65M
    for (i = w; i >= 0; i--) {
709
1.65M
        if (s[i] == p[0]) {
710
            /* candidate match */
711
100k
            for (j = mlast; j > 0; j--) {
712
93.1k
                if (s[i+j] != p[j]) {
713
71.4k
                    break;
714
71.4k
                }
715
93.1k
            }
716
78.4k
            if (j == 0) {
717
                /* got a match! */
718
7.00k
                return i;
719
7.00k
            }
720
            /* miss: check if previous character is part of pattern */
721
71.4k
            if (i > 0 && !STRINGLIB_BLOOM(mask, s[i-1])) {
722
69.6k
                i = i - m;
723
69.6k
            }
724
1.79k
            else {
725
1.79k
                i = i - skip;
726
1.79k
            }
727
71.4k
        }
728
1.58M
        else {
729
            /* skip: check if previous character is part of pattern */
730
1.58M
            if (i > 0 && !STRINGLIB_BLOOM(mask, s[i-1])) {
731
1.43M
                i = i - m;
732
1.43M
            }
733
1.58M
        }
734
1.65M
    }
735
90
    return -1;
736
7.09k
}
Unexecuted instantiation: bytesobject.c:stringlib_default_rfind
Unexecuted instantiation: unicodeobject.c:asciilib_default_rfind
Unexecuted instantiation: unicodeobject.c:ucs1lib_default_rfind
Unexecuted instantiation: unicodeobject.c:ucs2lib_default_rfind
Unexecuted instantiation: unicodeobject.c:ucs4lib_default_rfind
bytes_methods.c:stringlib_default_rfind
Line
Count
Source
693
7.09k
{
694
    /* create compressed boyer-moore delta 1 table */
695
7.09k
    unsigned long mask = 0;
696
7.09k
    Py_ssize_t i, j, mlast = m - 1, skip = m - 1, w = n - m;
697
698
    /* process pattern[0] outside the loop */
699
7.09k
    STRINGLIB_BLOOM_ADD(mask, p[0]);
700
    /* process pattern[:0:-1] */
701
28.3k
    for (i = mlast; i > 0; i--) {
702
21.2k
        STRINGLIB_BLOOM_ADD(mask, p[i]);
703
21.2k
        if (p[i] == p[0]) {
704
0
            skip = i - 1;
705
0
        }
706
21.2k
    }
707
708
1.65M
    for (i = w; i >= 0; i--) {
709
1.65M
        if (s[i] == p[0]) {
710
            /* candidate match */
711
100k
            for (j = mlast; j > 0; j--) {
712
93.1k
                if (s[i+j] != p[j]) {
713
71.4k
                    break;
714
71.4k
                }
715
93.1k
            }
716
78.4k
            if (j == 0) {
717
                /* got a match! */
718
7.00k
                return i;
719
7.00k
            }
720
            /* miss: check if previous character is part of pattern */
721
71.4k
            if (i > 0 && !STRINGLIB_BLOOM(mask, s[i-1])) {
722
69.6k
                i = i - m;
723
69.6k
            }
724
1.79k
            else {
725
1.79k
                i = i - skip;
726
1.79k
            }
727
71.4k
        }
728
1.58M
        else {
729
            /* skip: check if previous character is part of pattern */
730
1.58M
            if (i > 0 && !STRINGLIB_BLOOM(mask, s[i-1])) {
731
1.43M
                i = i - m;
732
1.43M
            }
733
1.58M
        }
734
1.65M
    }
735
90
    return -1;
736
7.09k
}
Unexecuted instantiation: bytearrayobject.c:stringlib_default_rfind
737
738
739
static inline Py_ssize_t
740
STRINGLIB(count_char)(const STRINGLIB_CHAR *s, Py_ssize_t n,
741
                      const STRINGLIB_CHAR p0, Py_ssize_t maxcount)
742
0
{
743
0
    Py_ssize_t i, count = 0;
744
0
    for (i = 0; i < n; i++) {
745
0
        if (s[i] == p0) {
746
0
            count++;
747
0
            if (count == maxcount) {
748
0
                return maxcount;
749
0
            }
750
0
        }
751
0
    }
752
0
    return count;
753
0
}
Unexecuted instantiation: bytesobject.c:stringlib_count_char
Unexecuted instantiation: unicodeobject.c:asciilib_count_char
Unexecuted instantiation: unicodeobject.c:ucs1lib_count_char
Unexecuted instantiation: unicodeobject.c:ucs2lib_count_char
Unexecuted instantiation: unicodeobject.c:ucs4lib_count_char
Unexecuted instantiation: bytes_methods.c:stringlib_count_char
Unexecuted instantiation: bytearrayobject.c:stringlib_count_char
754
755
756
static inline Py_ssize_t
757
STRINGLIB(count_char_no_maxcount)(const STRINGLIB_CHAR *s, Py_ssize_t n,
758
                                  const STRINGLIB_CHAR p0)
759
/* A specialized function of count_char that does not cut off at a maximum.
760
   As a result, the compiler is able to vectorize the loop. */
761
70.0M
{
762
70.0M
    Py_ssize_t count = 0;
763
9.35G
    for (Py_ssize_t i = 0; i < n; i++) {
764
9.28G
        if (s[i] == p0) {
765
1.05G
            count++;
766
1.05G
        }
767
9.28G
    }
768
70.0M
    return count;
769
70.0M
}
Unexecuted instantiation: bytesobject.c:stringlib_count_char_no_maxcount
Unexecuted instantiation: unicodeobject.c:asciilib_count_char_no_maxcount
unicodeobject.c:ucs1lib_count_char_no_maxcount
Line
Count
Source
761
50.9M
{
762
50.9M
    Py_ssize_t count = 0;
763
1.48G
    for (Py_ssize_t i = 0; i < n; i++) {
764
1.43G
        if (s[i] == p0) {
765
48.6M
            count++;
766
48.6M
        }
767
1.43G
    }
768
50.9M
    return count;
769
50.9M
}
unicodeobject.c:ucs2lib_count_char_no_maxcount
Line
Count
Source
761
10.2M
{
762
10.2M
    Py_ssize_t count = 0;
763
2.06G
    for (Py_ssize_t i = 0; i < n; i++) {
764
2.05G
        if (s[i] == p0) {
765
70.1M
            count++;
766
70.1M
        }
767
2.05G
    }
768
10.2M
    return count;
769
10.2M
}
unicodeobject.c:ucs4lib_count_char_no_maxcount
Line
Count
Source
761
1.99M
{
762
1.99M
    Py_ssize_t count = 0;
763
2.27G
    for (Py_ssize_t i = 0; i < n; i++) {
764
2.27G
        if (s[i] == p0) {
765
51.8M
            count++;
766
51.8M
        }
767
2.27G
    }
768
1.99M
    return count;
769
1.99M
}
bytes_methods.c:stringlib_count_char_no_maxcount
Line
Count
Source
761
6.90M
{
762
6.90M
    Py_ssize_t count = 0;
763
3.54G
    for (Py_ssize_t i = 0; i < n; i++) {
764
3.53G
        if (s[i] == p0) {
765
885M
            count++;
766
885M
        }
767
3.53G
    }
768
6.90M
    return count;
769
6.90M
}
Unexecuted instantiation: bytearrayobject.c:stringlib_count_char_no_maxcount
770
771
772
Py_LOCAL_INLINE(Py_ssize_t)
773
FASTSEARCH(const STRINGLIB_CHAR* s, Py_ssize_t n,
774
           const STRINGLIB_CHAR* p, Py_ssize_t m,
775
           Py_ssize_t maxcount, int mode)
776
237M
{
777
237M
    if (n < m || (mode == FAST_COUNT && maxcount == 0)) {
778
15.5k
        return -1;
779
15.5k
    }
780
781
    /* look for special cases */
782
237M
    if (m <= 1) {
783
203M
        if (m <= 0) {
784
0
            return -1;
785
0
        }
786
        /* use special case for 1-character strings */
787
203M
        if (mode == FAST_SEARCH)
788
133M
            return STRINGLIB(find_char)(s, n, p[0]);
789
70.1M
        else if (mode == FAST_RSEARCH)
790
49.8k
            return STRINGLIB(rfind_char)(s, n, p[0]);
791
70.0M
        else {
792
70.0M
            if (maxcount == PY_SSIZE_T_MAX) {
793
70.0M
                return STRINGLIB(count_char_no_maxcount)(s, n, p[0]);
794
70.0M
            }
795
0
            return STRINGLIB(count_char)(s, n, p[0], maxcount);
796
70.0M
        }
797
203M
    }
798
799
34.2M
    if (mode != FAST_RSEARCH) {
800
34.2M
        if (n < 2500 || (m < 100 && n < 30000) || m < 6) {
801
34.2M
            return STRINGLIB(default_find)(s, n, p, m, maxcount, mode);
802
34.2M
        }
803
22
        else if ((m >> 2) * 3 < (n >> 2)) {
804
            /* 33% threshold, but don't overflow. */
805
            /* For larger problems where the needle isn't a huge
806
               percentage of the size of the haystack, the relatively
807
               expensive O(m) startup cost of the two-way algorithm
808
               will surely pay off. */
809
22
            if (mode == FAST_SEARCH) {
810
22
                return STRINGLIB(_two_way_find)(s, n, p, m);
811
22
            }
812
0
            else {
813
0
                return STRINGLIB(_two_way_count)(s, n, p, m, maxcount);
814
0
            }
815
22
        }
816
0
        else {
817
            /* To ensure that we have good worst-case behavior,
818
               here's an adaptive version of the algorithm, where if
819
               we match O(m) characters without any matches of the
820
               entire needle, then we predict that the startup cost of
821
               the two-way algorithm will probably be worth it. */
822
0
            return STRINGLIB(adaptive_find)(s, n, p, m, maxcount, mode);
823
0
        }
824
34.2M
    }
825
7.09k
    else {
826
        /* FAST_RSEARCH */
827
7.09k
        return STRINGLIB(default_rfind)(s, n, p, m, maxcount, mode);
828
7.09k
    }
829
34.2M
}
bytesobject.c:fastsearch
Line
Count
Source
776
591k
{
777
591k
    if (n < m || (mode == FAST_COUNT && maxcount == 0)) {
778
0
        return -1;
779
0
    }
780
781
    /* look for special cases */
782
591k
    if (m <= 1) {
783
591k
        if (m <= 0) {
784
0
            return -1;
785
0
        }
786
        /* use special case for 1-character strings */
787
591k
        if (mode == FAST_SEARCH)
788
591k
            return STRINGLIB(find_char)(s, n, p[0]);
789
0
        else if (mode == FAST_RSEARCH)
790
0
            return STRINGLIB(rfind_char)(s, n, p[0]);
791
0
        else {
792
0
            if (maxcount == PY_SSIZE_T_MAX) {
793
0
                return STRINGLIB(count_char_no_maxcount)(s, n, p[0]);
794
0
            }
795
0
            return STRINGLIB(count_char)(s, n, p[0], maxcount);
796
0
        }
797
591k
    }
798
799
0
    if (mode != FAST_RSEARCH) {
800
0
        if (n < 2500 || (m < 100 && n < 30000) || m < 6) {
801
0
            return STRINGLIB(default_find)(s, n, p, m, maxcount, mode);
802
0
        }
803
0
        else if ((m >> 2) * 3 < (n >> 2)) {
804
            /* 33% threshold, but don't overflow. */
805
            /* For larger problems where the needle isn't a huge
806
               percentage of the size of the haystack, the relatively
807
               expensive O(m) startup cost of the two-way algorithm
808
               will surely pay off. */
809
0
            if (mode == FAST_SEARCH) {
810
0
                return STRINGLIB(_two_way_find)(s, n, p, m);
811
0
            }
812
0
            else {
813
0
                return STRINGLIB(_two_way_count)(s, n, p, m, maxcount);
814
0
            }
815
0
        }
816
0
        else {
817
            /* To ensure that we have good worst-case behavior,
818
               here's an adaptive version of the algorithm, where if
819
               we match O(m) characters without any matches of the
820
               entire needle, then we predict that the startup cost of
821
               the two-way algorithm will probably be worth it. */
822
0
            return STRINGLIB(adaptive_find)(s, n, p, m, maxcount, mode);
823
0
        }
824
0
    }
825
0
    else {
826
        /* FAST_RSEARCH */
827
0
        return STRINGLIB(default_rfind)(s, n, p, m, maxcount, mode);
828
0
    }
829
0
}
unicodeobject.c:asciilib_fastsearch
Line
Count
Source
776
26.4M
{
777
26.4M
    if (n < m || (mode == FAST_COUNT && maxcount == 0)) {
778
0
        return -1;
779
0
    }
780
781
    /* look for special cases */
782
26.4M
    if (m <= 1) {
783
24.1M
        if (m <= 0) {
784
0
            return -1;
785
0
        }
786
        /* use special case for 1-character strings */
787
24.1M
        if (mode == FAST_SEARCH)
788
24.0M
            return STRINGLIB(find_char)(s, n, p[0]);
789
49.8k
        else if (mode == FAST_RSEARCH)
790
49.8k
            return STRINGLIB(rfind_char)(s, n, p[0]);
791
0
        else {
792
0
            if (maxcount == PY_SSIZE_T_MAX) {
793
0
                return STRINGLIB(count_char_no_maxcount)(s, n, p[0]);
794
0
            }
795
0
            return STRINGLIB(count_char)(s, n, p[0], maxcount);
796
0
        }
797
24.1M
    }
798
799
2.36M
    if (mode != FAST_RSEARCH) {
800
2.36M
        if (n < 2500 || (m < 100 && n < 30000) || m < 6) {
801
2.36M
            return STRINGLIB(default_find)(s, n, p, m, maxcount, mode);
802
2.36M
        }
803
0
        else if ((m >> 2) * 3 < (n >> 2)) {
804
            /* 33% threshold, but don't overflow. */
805
            /* For larger problems where the needle isn't a huge
806
               percentage of the size of the haystack, the relatively
807
               expensive O(m) startup cost of the two-way algorithm
808
               will surely pay off. */
809
0
            if (mode == FAST_SEARCH) {
810
0
                return STRINGLIB(_two_way_find)(s, n, p, m);
811
0
            }
812
0
            else {
813
0
                return STRINGLIB(_two_way_count)(s, n, p, m, maxcount);
814
0
            }
815
0
        }
816
0
        else {
817
            /* To ensure that we have good worst-case behavior,
818
               here's an adaptive version of the algorithm, where if
819
               we match O(m) characters without any matches of the
820
               entire needle, then we predict that the startup cost of
821
               the two-way algorithm will probably be worth it. */
822
0
            return STRINGLIB(adaptive_find)(s, n, p, m, maxcount, mode);
823
0
        }
824
2.36M
    }
825
0
    else {
826
        /* FAST_RSEARCH */
827
0
        return STRINGLIB(default_rfind)(s, n, p, m, maxcount, mode);
828
0
    }
829
2.36M
}
unicodeobject.c:ucs1lib_fastsearch
Line
Count
Source
776
81.6M
{
777
81.6M
    if (n < m || (mode == FAST_COUNT && maxcount == 0)) {
778
0
        return -1;
779
0
    }
780
781
    /* look for special cases */
782
81.6M
    if (m <= 1) {
783
59.8M
        if (m <= 0) {
784
0
            return -1;
785
0
        }
786
        /* use special case for 1-character strings */
787
59.8M
        if (mode == FAST_SEARCH)
788
8.91M
            return STRINGLIB(find_char)(s, n, p[0]);
789
50.9M
        else if (mode == FAST_RSEARCH)
790
0
            return STRINGLIB(rfind_char)(s, n, p[0]);
791
50.9M
        else {
792
50.9M
            if (maxcount == PY_SSIZE_T_MAX) {
793
50.9M
                return STRINGLIB(count_char_no_maxcount)(s, n, p[0]);
794
50.9M
            }
795
0
            return STRINGLIB(count_char)(s, n, p[0], maxcount);
796
50.9M
        }
797
59.8M
    }
798
799
21.8M
    if (mode != FAST_RSEARCH) {
800
21.8M
        if (n < 2500 || (m < 100 && n < 30000) || m < 6) {
801
21.8M
            return STRINGLIB(default_find)(s, n, p, m, maxcount, mode);
802
21.8M
        }
803
22
        else if ((m >> 2) * 3 < (n >> 2)) {
804
            /* 33% threshold, but don't overflow. */
805
            /* For larger problems where the needle isn't a huge
806
               percentage of the size of the haystack, the relatively
807
               expensive O(m) startup cost of the two-way algorithm
808
               will surely pay off. */
809
22
            if (mode == FAST_SEARCH) {
810
22
                return STRINGLIB(_two_way_find)(s, n, p, m);
811
22
            }
812
0
            else {
813
0
                return STRINGLIB(_two_way_count)(s, n, p, m, maxcount);
814
0
            }
815
22
        }
816
0
        else {
817
            /* To ensure that we have good worst-case behavior,
818
               here's an adaptive version of the algorithm, where if
819
               we match O(m) characters without any matches of the
820
               entire needle, then we predict that the startup cost of
821
               the two-way algorithm will probably be worth it. */
822
0
            return STRINGLIB(adaptive_find)(s, n, p, m, maxcount, mode);
823
0
        }
824
21.8M
    }
825
0
    else {
826
        /* FAST_RSEARCH */
827
0
        return STRINGLIB(default_rfind)(s, n, p, m, maxcount, mode);
828
0
    }
829
21.8M
}
unicodeobject.c:ucs2lib_fastsearch
Line
Count
Source
776
70.9M
{
777
70.9M
    if (n < m || (mode == FAST_COUNT && maxcount == 0)) {
778
15.4k
        return -1;
779
15.4k
    }
780
781
    /* look for special cases */
782
70.8M
    if (m <= 1) {
783
66.9M
        if (m <= 0) {
784
0
            return -1;
785
0
        }
786
        /* use special case for 1-character strings */
787
66.9M
        if (mode == FAST_SEARCH)
788
56.7M
            return STRINGLIB(find_char)(s, n, p[0]);
789
10.2M
        else if (mode == FAST_RSEARCH)
790
0
            return STRINGLIB(rfind_char)(s, n, p[0]);
791
10.2M
        else {
792
10.2M
            if (maxcount == PY_SSIZE_T_MAX) {
793
10.2M
                return STRINGLIB(count_char_no_maxcount)(s, n, p[0]);
794
10.2M
            }
795
0
            return STRINGLIB(count_char)(s, n, p[0], maxcount);
796
10.2M
        }
797
66.9M
    }
798
799
3.89M
    if (mode != FAST_RSEARCH) {
800
3.89M
        if (n < 2500 || (m < 100 && n < 30000) || m < 6) {
801
3.89M
            return STRINGLIB(default_find)(s, n, p, m, maxcount, mode);
802
3.89M
        }
803
0
        else if ((m >> 2) * 3 < (n >> 2)) {
804
            /* 33% threshold, but don't overflow. */
805
            /* For larger problems where the needle isn't a huge
806
               percentage of the size of the haystack, the relatively
807
               expensive O(m) startup cost of the two-way algorithm
808
               will surely pay off. */
809
0
            if (mode == FAST_SEARCH) {
810
0
                return STRINGLIB(_two_way_find)(s, n, p, m);
811
0
            }
812
0
            else {
813
0
                return STRINGLIB(_two_way_count)(s, n, p, m, maxcount);
814
0
            }
815
0
        }
816
0
        else {
817
            /* To ensure that we have good worst-case behavior,
818
               here's an adaptive version of the algorithm, where if
819
               we match O(m) characters without any matches of the
820
               entire needle, then we predict that the startup cost of
821
               the two-way algorithm will probably be worth it. */
822
0
            return STRINGLIB(adaptive_find)(s, n, p, m, maxcount, mode);
823
0
        }
824
3.89M
    }
825
0
    else {
826
        /* FAST_RSEARCH */
827
0
        return STRINGLIB(default_rfind)(s, n, p, m, maxcount, mode);
828
0
    }
829
3.89M
}
unicodeobject.c:ucs4lib_fastsearch
Line
Count
Source
776
51.0M
{
777
51.0M
    if (n < m || (mode == FAST_COUNT && maxcount == 0)) {
778
0
        return -1;
779
0
    }
780
781
    /* look for special cases */
782
51.0M
    if (m <= 1) {
783
44.8M
        if (m <= 0) {
784
0
            return -1;
785
0
        }
786
        /* use special case for 1-character strings */
787
44.8M
        if (mode == FAST_SEARCH)
788
42.8M
            return STRINGLIB(find_char)(s, n, p[0]);
789
1.99M
        else if (mode == FAST_RSEARCH)
790
0
            return STRINGLIB(rfind_char)(s, n, p[0]);
791
1.99M
        else {
792
1.99M
            if (maxcount == PY_SSIZE_T_MAX) {
793
1.99M
                return STRINGLIB(count_char_no_maxcount)(s, n, p[0]);
794
1.99M
            }
795
0
            return STRINGLIB(count_char)(s, n, p[0], maxcount);
796
1.99M
        }
797
44.8M
    }
798
799
6.23M
    if (mode != FAST_RSEARCH) {
800
6.23M
        if (n < 2500 || (m < 100 && n < 30000) || m < 6) {
801
6.23M
            return STRINGLIB(default_find)(s, n, p, m, maxcount, mode);
802
6.23M
        }
803
0
        else if ((m >> 2) * 3 < (n >> 2)) {
804
            /* 33% threshold, but don't overflow. */
805
            /* For larger problems where the needle isn't a huge
806
               percentage of the size of the haystack, the relatively
807
               expensive O(m) startup cost of the two-way algorithm
808
               will surely pay off. */
809
0
            if (mode == FAST_SEARCH) {
810
0
                return STRINGLIB(_two_way_find)(s, n, p, m);
811
0
            }
812
0
            else {
813
0
                return STRINGLIB(_two_way_count)(s, n, p, m, maxcount);
814
0
            }
815
0
        }
816
0
        else {
817
            /* To ensure that we have good worst-case behavior,
818
               here's an adaptive version of the algorithm, where if
819
               we match O(m) characters without any matches of the
820
               entire needle, then we predict that the startup cost of
821
               the two-way algorithm will probably be worth it. */
822
0
            return STRINGLIB(adaptive_find)(s, n, p, m, maxcount, mode);
823
0
        }
824
6.23M
    }
825
0
    else {
826
        /* FAST_RSEARCH */
827
0
        return STRINGLIB(default_rfind)(s, n, p, m, maxcount, mode);
828
0
    }
829
6.23M
}
bytes_methods.c:fastsearch
Line
Count
Source
776
6.91M
{
777
6.91M
    if (n < m || (mode == FAST_COUNT && maxcount == 0)) {
778
10
        return -1;
779
10
    }
780
781
    /* look for special cases */
782
6.91M
    if (m <= 1) {
783
6.90M
        if (m <= 0) {
784
0
            return -1;
785
0
        }
786
        /* use special case for 1-character strings */
787
6.90M
        if (mode == FAST_SEARCH)
788
0
            return STRINGLIB(find_char)(s, n, p[0]);
789
6.90M
        else if (mode == FAST_RSEARCH)
790
0
            return STRINGLIB(rfind_char)(s, n, p[0]);
791
6.90M
        else {
792
6.90M
            if (maxcount == PY_SSIZE_T_MAX) {
793
6.90M
                return STRINGLIB(count_char_no_maxcount)(s, n, p[0]);
794
6.90M
            }
795
0
            return STRINGLIB(count_char)(s, n, p[0], maxcount);
796
6.90M
        }
797
6.90M
    }
798
799
9.92k
    if (mode != FAST_RSEARCH) {
800
2.82k
        if (n < 2500 || (m < 100 && n < 30000) || m < 6) {
801
2.82k
            return STRINGLIB(default_find)(s, n, p, m, maxcount, mode);
802
2.82k
        }
803
0
        else if ((m >> 2) * 3 < (n >> 2)) {
804
            /* 33% threshold, but don't overflow. */
805
            /* For larger problems where the needle isn't a huge
806
               percentage of the size of the haystack, the relatively
807
               expensive O(m) startup cost of the two-way algorithm
808
               will surely pay off. */
809
0
            if (mode == FAST_SEARCH) {
810
0
                return STRINGLIB(_two_way_find)(s, n, p, m);
811
0
            }
812
0
            else {
813
0
                return STRINGLIB(_two_way_count)(s, n, p, m, maxcount);
814
0
            }
815
0
        }
816
0
        else {
817
            /* To ensure that we have good worst-case behavior,
818
               here's an adaptive version of the algorithm, where if
819
               we match O(m) characters without any matches of the
820
               entire needle, then we predict that the startup cost of
821
               the two-way algorithm will probably be worth it. */
822
0
            return STRINGLIB(adaptive_find)(s, n, p, m, maxcount, mode);
823
0
        }
824
2.82k
    }
825
7.09k
    else {
826
        /* FAST_RSEARCH */
827
7.09k
        return STRINGLIB(default_rfind)(s, n, p, m, maxcount, mode);
828
7.09k
    }
829
9.92k
}
Unexecuted instantiation: bytearrayobject.c:fastsearch
830