Coverage Report

Created: 2026-03-19 06:23

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/cpython3/Objects/stringlib/fastsearch.h
Line
Count
Source
1
/* stringlib: fastsearch implementation */
2
3
#define STRINGLIB_FASTSEARCH_H
4
5
/* fast search/count implementation, based on a mix between boyer-
6
   moore and horspool, with a few more bells and whistles on the top.
7
   for some more background, see:
8
   https://web.archive.org/web/20201107074620/http://effbot.org/zone/stringlib.htm */
9
10
/* note: fastsearch may access s[n], which isn't a problem when using
11
   Python's ordinary string types, but may cause problems if you're
12
   using this code in other contexts.  also, the count mode returns -1
13
   if there cannot possibly be a match in the target string, and 0 if
14
   it has actually checked for matches, but didn't find any.  callers
15
   beware! */
16
17
/* If the strings are long enough, use Crochemore and Perrin's Two-Way
18
   algorithm, which has worst-case O(n) runtime and best-case O(n/k).
19
   Also compute a table of shifts to achieve O(n/k) in more cases,
20
   and often (data dependent) deduce larger shifts than pure C&P can
21
   deduce. See stringlib_find_two_way_notes.txt in this folder for a
22
   detailed explanation. */
23
24
1.20G
#define FAST_COUNT 0
25
400M
#define FAST_SEARCH 1
26
400M
#define FAST_RSEARCH 2
27
28
#if LONG_BIT >= 128
29
#define STRINGLIB_BLOOM_WIDTH 128
30
#elif LONG_BIT >= 64
31
8.00G
#define STRINGLIB_BLOOM_WIDTH 64
32
#elif LONG_BIT >= 32
33
#define STRINGLIB_BLOOM_WIDTH 32
34
#else
35
#error "LONG_BIT is smaller than 32"
36
#endif
37
38
#define STRINGLIB_BLOOM_ADD(mask, ch) \
39
3.59G
    ((mask |= (1UL << ((ch) & (STRINGLIB_BLOOM_WIDTH -1)))))
40
#define STRINGLIB_BLOOM(mask, ch)     \
41
4.40G
    ((mask &  (1UL << ((ch) & (STRINGLIB_BLOOM_WIDTH -1)))))
42
43
#ifdef STRINGLIB_FAST_MEMCHR
44
26.8M
#  define MEMCHR_CUT_OFF 15
45
#else
46
19.0k
#  define MEMCHR_CUT_OFF 40
47
#endif
48
49
Py_LOCAL_INLINE(Py_ssize_t)
50
STRINGLIB(find_char)(const STRINGLIB_CHAR* s, Py_ssize_t n, STRINGLIB_CHAR ch)
51
26.8M
{
52
26.8M
    const STRINGLIB_CHAR *p, *e;
53
54
26.8M
    p = s;
55
26.8M
    e = s + n;
56
26.8M
    if (n > MEMCHR_CUT_OFF) {
57
#ifdef STRINGLIB_FAST_MEMCHR
58
630k
        p = STRINGLIB_FAST_MEMCHR(s, ch, n);
59
630k
        if (p != NULL)
60
566k
            return (p - s);
61
63.6k
        return -1;
62
#else
63
        /* use memchr if we can choose a needle without too many likely
64
           false positives */
65
        const STRINGLIB_CHAR *s1, *e1;
66
        unsigned char needle = ch & 0xff;
67
        /* If looking for a multiple of 256, we'd have too
68
           many false positives looking for the '\0' byte in UCS2
69
           and UCS4 representations. */
70
1.03k
        if (needle != 0) {
71
2.03k
            do {
72
2.03k
                const void *candidate = memchr(p, needle,
73
2.03k
                                               (e - p) * sizeof(STRINGLIB_CHAR));
74
2.03k
                if (candidate == NULL)
75
141
                    return -1;
76
1.89k
                s1 = p;
77
1.89k
                p = (const STRINGLIB_CHAR *)
78
1.89k
                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
79
1.89k
                if (*p == ch)
80
57
                    return (p - s);
81
                /* False positive */
82
1.83k
                p++;
83
1.83k
                if (p - s1 > MEMCHR_CUT_OFF)
84
180
                    continue;
85
1.65k
                if (e - p <= MEMCHR_CUT_OFF)
86
538
                    break;
87
1.11k
                e1 = p + MEMCHR_CUT_OFF;
88
40.3k
                while (p != e1) {
89
39.4k
                    if (*p == ch)
90
200
                        return (p - s);
91
39.2k
                    p++;
92
39.2k
                }
93
1.11k
            }
94
1.09k
            while (e - p > MEMCHR_CUT_OFF);
95
1.03k
        }
96
#endif
97
631k
    }
98
149M
    while (p < e) {
99
132M
        if (*p == ch)
100
8.55M
            return (p - s);
101
123M
        p++;
102
123M
    }
103
17.6M
    return -1;
104
26.2M
}
unicodeobject.c:ucs1lib_find_char
Line
Count
Source
51
26.3M
{
52
26.3M
    const STRINGLIB_CHAR *p, *e;
53
54
26.3M
    p = s;
55
26.3M
    e = s + n;
56
26.3M
    if (n > MEMCHR_CUT_OFF) {
57
153k
#ifdef STRINGLIB_FAST_MEMCHR
58
153k
        p = STRINGLIB_FAST_MEMCHR(s, ch, n);
59
153k
        if (p != NULL)
60
152k
            return (p - s);
61
1.06k
        return -1;
62
#else
63
        /* use memchr if we can choose a needle without too many likely
64
           false positives */
65
        const STRINGLIB_CHAR *s1, *e1;
66
        unsigned char needle = ch & 0xff;
67
        /* If looking for a multiple of 256, we'd have too
68
           many false positives looking for the '\0' byte in UCS2
69
           and UCS4 representations. */
70
        if (needle != 0) {
71
            do {
72
                const void *candidate = memchr(p, needle,
73
                                               (e - p) * sizeof(STRINGLIB_CHAR));
74
                if (candidate == NULL)
75
                    return -1;
76
                s1 = p;
77
                p = (const STRINGLIB_CHAR *)
78
                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
79
                if (*p == ch)
80
                    return (p - s);
81
                /* False positive */
82
                p++;
83
                if (p - s1 > MEMCHR_CUT_OFF)
84
                    continue;
85
                if (e - p <= MEMCHR_CUT_OFF)
86
                    break;
87
                e1 = p + MEMCHR_CUT_OFF;
88
                while (p != e1) {
89
                    if (*p == ch)
90
                        return (p - s);
91
                    p++;
92
                }
93
            }
94
            while (e - p > MEMCHR_CUT_OFF);
95
        }
96
#endif
97
153k
    }
98
149M
    while (p < e) {
99
131M
        if (*p == ch)
100
8.54M
            return (p - s);
101
122M
        p++;
102
122M
    }
103
17.6M
    return -1;
104
26.1M
}
unicodeobject.c:ucs2lib_find_char
Line
Count
Source
51
13.3k
{
52
13.3k
    const STRINGLIB_CHAR *p, *e;
53
54
13.3k
    p = s;
55
13.3k
    e = s + n;
56
13.3k
    if (n > MEMCHR_CUT_OFF) {
57
#ifdef STRINGLIB_FAST_MEMCHR
58
        p = STRINGLIB_FAST_MEMCHR(s, ch, n);
59
        if (p != NULL)
60
            return (p - s);
61
        return -1;
62
#else
63
        /* use memchr if we can choose a needle without too many likely
64
           false positives */
65
1.03k
        const STRINGLIB_CHAR *s1, *e1;
66
1.03k
        unsigned char needle = ch & 0xff;
67
        /* If looking for a multiple of 256, we'd have too
68
           many false positives looking for the '\0' byte in UCS2
69
           and UCS4 representations. */
70
1.03k
        if (needle != 0) {
71
2.03k
            do {
72
2.03k
                const void *candidate = memchr(p, needle,
73
2.03k
                                               (e - p) * sizeof(STRINGLIB_CHAR));
74
2.03k
                if (candidate == NULL)
75
141
                    return -1;
76
1.89k
                s1 = p;
77
1.89k
                p = (const STRINGLIB_CHAR *)
78
1.89k
                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
79
1.89k
                if (*p == ch)
80
57
                    return (p - s);
81
                /* False positive */
82
1.83k
                p++;
83
1.83k
                if (p - s1 > MEMCHR_CUT_OFF)
84
180
                    continue;
85
1.65k
                if (e - p <= MEMCHR_CUT_OFF)
86
538
                    break;
87
1.11k
                e1 = p + MEMCHR_CUT_OFF;
88
40.3k
                while (p != e1) {
89
39.4k
                    if (*p == ch)
90
200
                        return (p - s);
91
39.2k
                    p++;
92
39.2k
                }
93
1.11k
            }
94
1.09k
            while (e - p > MEMCHR_CUT_OFF);
95
1.03k
        }
96
1.03k
#endif
97
1.03k
    }
98
164k
    while (p < e) {
99
153k
        if (*p == ch)
100
2.01k
            return (p - s);
101
151k
        p++;
102
151k
    }
103
10.9k
    return -1;
104
12.9k
}
unicodeobject.c:ucs4lib_find_char
Line
Count
Source
51
9
{
52
9
    const STRINGLIB_CHAR *p, *e;
53
54
9
    p = s;
55
9
    e = s + n;
56
9
    if (n > MEMCHR_CUT_OFF) {
57
0
#ifdef STRINGLIB_FAST_MEMCHR
58
0
        p = STRINGLIB_FAST_MEMCHR(s, ch, n);
59
0
        if (p != NULL)
60
0
            return (p - s);
61
0
        return -1;
62
#else
63
        /* use memchr if we can choose a needle without too many likely
64
           false positives */
65
        const STRINGLIB_CHAR *s1, *e1;
66
        unsigned char needle = ch & 0xff;
67
        /* If looking for a multiple of 256, we'd have too
68
           many false positives looking for the '\0' byte in UCS2
69
           and UCS4 representations. */
70
        if (needle != 0) {
71
            do {
72
                const void *candidate = memchr(p, needle,
73
                                               (e - p) * sizeof(STRINGLIB_CHAR));
74
                if (candidate == NULL)
75
                    return -1;
76
                s1 = p;
77
                p = (const STRINGLIB_CHAR *)
78
                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
79
                if (*p == ch)
80
                    return (p - s);
81
                /* False positive */
82
                p++;
83
                if (p - s1 > MEMCHR_CUT_OFF)
84
                    continue;
85
                if (e - p <= MEMCHR_CUT_OFF)
86
                    break;
87
                e1 = p + MEMCHR_CUT_OFF;
88
                while (p != e1) {
89
                    if (*p == ch)
90
                        return (p - s);
91
                    p++;
92
                }
93
            }
94
            while (e - p > MEMCHR_CUT_OFF);
95
        }
96
#endif
97
0
    }
98
39
    while (p < e) {
99
30
        if (*p == ch)
100
0
            return (p - s);
101
30
        p++;
102
30
    }
103
9
    return -1;
104
9
}
unicodeobject.c:asciilib_find_char
Line
Count
Source
51
73
{
52
73
    const STRINGLIB_CHAR *p, *e;
53
54
73
    p = s;
55
73
    e = s + n;
56
73
    if (n > MEMCHR_CUT_OFF) {
57
0
#ifdef STRINGLIB_FAST_MEMCHR
58
0
        p = STRINGLIB_FAST_MEMCHR(s, ch, n);
59
0
        if (p != NULL)
60
0
            return (p - s);
61
0
        return -1;
62
#else
63
        /* use memchr if we can choose a needle without too many likely
64
           false positives */
65
        const STRINGLIB_CHAR *s1, *e1;
66
        unsigned char needle = ch & 0xff;
67
        /* If looking for a multiple of 256, we'd have too
68
           many false positives looking for the '\0' byte in UCS2
69
           and UCS4 representations. */
70
        if (needle != 0) {
71
            do {
72
                const void *candidate = memchr(p, needle,
73
                                               (e - p) * sizeof(STRINGLIB_CHAR));
74
                if (candidate == NULL)
75
                    return -1;
76
                s1 = p;
77
                p = (const STRINGLIB_CHAR *)
78
                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
79
                if (*p == ch)
80
                    return (p - s);
81
                /* False positive */
82
                p++;
83
                if (p - s1 > MEMCHR_CUT_OFF)
84
                    continue;
85
                if (e - p <= MEMCHR_CUT_OFF)
86
                    break;
87
                e1 = p + MEMCHR_CUT_OFF;
88
                while (p != e1) {
89
                    if (*p == ch)
90
                        return (p - s);
91
                    p++;
92
                }
93
            }
94
            while (e - p > MEMCHR_CUT_OFF);
95
        }
96
#endif
97
0
    }
98
503
    while (p < e) {
99
503
        if (*p == ch)
100
73
            return (p - s);
101
430
        p++;
102
430
    }
103
0
    return -1;
104
73
}
bytes_methods.c:stringlib_find_char
Line
Count
Source
51
515k
{
52
515k
    const STRINGLIB_CHAR *p, *e;
53
54
515k
    p = s;
55
515k
    e = s + n;
56
515k
    if (n > MEMCHR_CUT_OFF) {
57
477k
#ifdef STRINGLIB_FAST_MEMCHR
58
477k
        p = STRINGLIB_FAST_MEMCHR(s, ch, n);
59
477k
        if (p != NULL)
60
414k
            return (p - s);
61
62.5k
        return -1;
62
#else
63
        /* use memchr if we can choose a needle without too many likely
64
           false positives */
65
        const STRINGLIB_CHAR *s1, *e1;
66
        unsigned char needle = ch & 0xff;
67
        /* If looking for a multiple of 256, we'd have too
68
           many false positives looking for the '\0' byte in UCS2
69
           and UCS4 representations. */
70
        if (needle != 0) {
71
            do {
72
                const void *candidate = memchr(p, needle,
73
                                               (e - p) * sizeof(STRINGLIB_CHAR));
74
                if (candidate == NULL)
75
                    return -1;
76
                s1 = p;
77
                p = (const STRINGLIB_CHAR *)
78
                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
79
                if (*p == ch)
80
                    return (p - s);
81
                /* False positive */
82
                p++;
83
                if (p - s1 > MEMCHR_CUT_OFF)
84
                    continue;
85
                if (e - p <= MEMCHR_CUT_OFF)
86
                    break;
87
                e1 = p + MEMCHR_CUT_OFF;
88
                while (p != e1) {
89
                    if (*p == ch)
90
                        return (p - s);
91
                    p++;
92
                }
93
            }
94
            while (e - p > MEMCHR_CUT_OFF);
95
        }
96
#endif
97
477k
    }
98
602k
    while (p < e) {
99
565k
        if (*p == ch)
100
726
            return (p - s);
101
564k
        p++;
102
564k
    }
103
37.8k
    return -1;
104
38.5k
}
Unexecuted instantiation: bytearrayobject.c:stringlib_find_char
Unexecuted instantiation: bytesobject.c:stringlib_find_char
105
106
#undef MEMCHR_CUT_OFF
107
108
#if STRINGLIB_SIZEOF_CHAR == 1
109
10.7k
#  define MEMRCHR_CUT_OFF 15
110
#else
111
26.3k
#  define MEMRCHR_CUT_OFF 40
112
#endif
113
114
115
Py_LOCAL_INLINE(Py_ssize_t)
116
STRINGLIB(rfind_char)(const STRINGLIB_CHAR* s, Py_ssize_t n, STRINGLIB_CHAR ch)
117
13.0k
{
118
13.0k
    const STRINGLIB_CHAR *p;
119
13.0k
#ifdef HAVE_MEMRCHR
120
    /* memrchr() is a GNU extension, available since glibc 2.1.91.  it
121
       doesn't seem as optimized as memchr(), but is still quite
122
       faster than our hand-written loop below. There is no wmemrchr
123
       for 4-byte chars. */
124
125
13.0k
    if (n > MEMRCHR_CUT_OFF) {
126
#if STRINGLIB_SIZEOF_CHAR == 1
127
        p = memrchr(s, ch, n);
128
5.94k
        if (p != NULL)
129
2.40k
            return (p - s);
130
3.54k
        return -1;
131
#else
132
        /* use memrchr if we can choose a needle without too many likely
133
           false positives */
134
        const STRINGLIB_CHAR *s1;
135
        Py_ssize_t n1;
136
        unsigned char needle = ch & 0xff;
137
        /* If looking for a multiple of 256, we'd have too
138
           many false positives looking for the '\0' byte in UCS2
139
           and UCS4 representations. */
140
1.39k
        if (needle != 0) {
141
8.87k
            do {
142
8.87k
                void *candidate = memrchr(s, needle,
143
8.87k
                                          n * sizeof(STRINGLIB_CHAR));
144
8.87k
                if (candidate == NULL)
145
920
                    return -1;
146
7.95k
                n1 = n;
147
7.95k
                p = (const STRINGLIB_CHAR *)
148
7.95k
                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
149
7.95k
                n = p - s;
150
7.95k
                if (*p == ch)
151
347
                    return n;
152
                /* False positive */
153
7.60k
                if (n1 - n > MEMRCHR_CUT_OFF)
154
3.16k
                    continue;
155
4.43k
                if (n <= MEMRCHR_CUT_OFF)
156
14
                    break;
157
4.42k
                s1 = p - MEMRCHR_CUT_OFF;
158
180k
                while (p > s1) {
159
176k
                    p--;
160
176k
                    if (*p == ch)
161
16
                        return (p - s);
162
176k
                }
163
4.40k
                n = p - s;
164
4.40k
            }
165
7.57k
            while (n > MEMRCHR_CUT_OFF);
166
1.39k
        }
167
#endif
168
7.33k
    }
169
5.79k
#endif  /* HAVE_MEMRCHR */
170
5.79k
    p = s + n;
171
44.1k
    while (p > s) {
172
39.7k
        p--;
173
39.7k
        if (*p == ch)
174
1.37k
            return (p - s);
175
39.7k
    }
176
4.41k
    return -1;
177
5.79k
}
unicodeobject.c:ucs1lib_rfind_char
Line
Count
Source
117
3.44k
{
118
3.44k
    const STRINGLIB_CHAR *p;
119
3.44k
#ifdef HAVE_MEMRCHR
120
    /* memrchr() is a GNU extension, available since glibc 2.1.91.  it
121
       doesn't seem as optimized as memchr(), but is still quite
122
       faster than our hand-written loop below. There is no wmemrchr
123
       for 4-byte chars. */
124
125
3.44k
    if (n > MEMRCHR_CUT_OFF) {
126
2.81k
#if STRINGLIB_SIZEOF_CHAR == 1
127
2.81k
        p = memrchr(s, ch, n);
128
2.81k
        if (p != NULL)
129
910
            return (p - s);
130
1.90k
        return -1;
131
#else
132
        /* use memrchr if we can choose a needle without too many likely
133
           false positives */
134
        const STRINGLIB_CHAR *s1;
135
        Py_ssize_t n1;
136
        unsigned char needle = ch & 0xff;
137
        /* If looking for a multiple of 256, we'd have too
138
           many false positives looking for the '\0' byte in UCS2
139
           and UCS4 representations. */
140
        if (needle != 0) {
141
            do {
142
                void *candidate = memrchr(s, needle,
143
                                          n * sizeof(STRINGLIB_CHAR));
144
                if (candidate == NULL)
145
                    return -1;
146
                n1 = n;
147
                p = (const STRINGLIB_CHAR *)
148
                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
149
                n = p - s;
150
                if (*p == ch)
151
                    return n;
152
                /* False positive */
153
                if (n1 - n > MEMRCHR_CUT_OFF)
154
                    continue;
155
                if (n <= MEMRCHR_CUT_OFF)
156
                    break;
157
                s1 = p - MEMRCHR_CUT_OFF;
158
                while (p > s1) {
159
                    p--;
160
                    if (*p == ch)
161
                        return (p - s);
162
                }
163
                n = p - s;
164
            }
165
            while (n > MEMRCHR_CUT_OFF);
166
        }
167
#endif
168
2.81k
    }
169
631
#endif  /* HAVE_MEMRCHR */
170
631
    p = s + n;
171
4.99k
    while (p > s) {
172
4.46k
        p--;
173
4.46k
        if (*p == ch)
174
103
            return (p - s);
175
4.46k
    }
176
528
    return -1;
177
631
}
unicodeobject.c:ucs2lib_rfind_char
Line
Count
Source
117
952
{
118
952
    const STRINGLIB_CHAR *p;
119
952
#ifdef HAVE_MEMRCHR
120
    /* memrchr() is a GNU extension, available since glibc 2.1.91.  it
121
       doesn't seem as optimized as memchr(), but is still quite
122
       faster than our hand-written loop below. There is no wmemrchr
123
       for 4-byte chars. */
124
125
952
    if (n > MEMRCHR_CUT_OFF) {
126
#if STRINGLIB_SIZEOF_CHAR == 1
127
        p = memrchr(s, ch, n);
128
        if (p != NULL)
129
            return (p - s);
130
        return -1;
131
#else
132
        /* use memrchr if we can choose a needle without too many likely
133
           false positives */
134
512
        const STRINGLIB_CHAR *s1;
135
512
        Py_ssize_t n1;
136
512
        unsigned char needle = ch & 0xff;
137
        /* If looking for a multiple of 256, we'd have too
138
           many false positives looking for the '\0' byte in UCS2
139
           and UCS4 representations. */
140
512
        if (needle != 0) {
141
4.34k
            do {
142
4.34k
                void *candidate = memrchr(s, needle,
143
4.34k
                                          n * sizeof(STRINGLIB_CHAR));
144
4.34k
                if (candidate == NULL)
145
306
                    return -1;
146
4.03k
                n1 = n;
147
4.03k
                p = (const STRINGLIB_CHAR *)
148
4.03k
                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
149
4.03k
                n = p - s;
150
4.03k
                if (*p == ch)
151
133
                    return n;
152
                /* False positive */
153
3.90k
                if (n1 - n > MEMRCHR_CUT_OFF)
154
1.18k
                    continue;
155
2.71k
                if (n <= MEMRCHR_CUT_OFF)
156
6
                    break;
157
2.71k
                s1 = p - MEMRCHR_CUT_OFF;
158
110k
                while (p > s1) {
159
108k
                    p--;
160
108k
                    if (*p == ch)
161
7
                        return (p - s);
162
108k
                }
163
2.70k
                n = p - s;
164
2.70k
            }
165
3.89k
            while (n > MEMRCHR_CUT_OFF);
166
512
        }
167
512
#endif
168
512
    }
169
506
#endif  /* HAVE_MEMRCHR */
170
506
    p = s + n;
171
5.63k
    while (p > s) {
172
5.14k
        p--;
173
5.14k
        if (*p == ch)
174
15
            return (p - s);
175
5.14k
    }
176
491
    return -1;
177
506
}
unicodeobject.c:ucs4lib_rfind_char
Line
Count
Source
117
1.35k
{
118
1.35k
    const STRINGLIB_CHAR *p;
119
1.35k
#ifdef HAVE_MEMRCHR
120
    /* memrchr() is a GNU extension, available since glibc 2.1.91.  it
121
       doesn't seem as optimized as memchr(), but is still quite
122
       faster than our hand-written loop below. There is no wmemrchr
123
       for 4-byte chars. */
124
125
1.35k
    if (n > MEMRCHR_CUT_OFF) {
126
#if STRINGLIB_SIZEOF_CHAR == 1
127
        p = memrchr(s, ch, n);
128
        if (p != NULL)
129
            return (p - s);
130
        return -1;
131
#else
132
        /* use memrchr if we can choose a needle without too many likely
133
           false positives */
134
878
        const STRINGLIB_CHAR *s1;
135
878
        Py_ssize_t n1;
136
878
        unsigned char needle = ch & 0xff;
137
        /* If looking for a multiple of 256, we'd have too
138
           many false positives looking for the '\0' byte in UCS2
139
           and UCS4 representations. */
140
878
        if (needle != 0) {
141
4.52k
            do {
142
4.52k
                void *candidate = memrchr(s, needle,
143
4.52k
                                          n * sizeof(STRINGLIB_CHAR));
144
4.52k
                if (candidate == NULL)
145
614
                    return -1;
146
3.91k
                n1 = n;
147
3.91k
                p = (const STRINGLIB_CHAR *)
148
3.91k
                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
149
3.91k
                n = p - s;
150
3.91k
                if (*p == ch)
151
214
                    return n;
152
                /* False positive */
153
3.70k
                if (n1 - n > MEMRCHR_CUT_OFF)
154
1.97k
                    continue;
155
1.72k
                if (n <= MEMRCHR_CUT_OFF)
156
8
                    break;
157
1.71k
                s1 = p - MEMRCHR_CUT_OFF;
158
70.0k
                while (p > s1) {
159
68.2k
                    p--;
160
68.2k
                    if (*p == ch)
161
9
                        return (p - s);
162
68.2k
                }
163
1.70k
                n = p - s;
164
1.70k
            }
165
3.68k
            while (n > MEMRCHR_CUT_OFF);
166
878
        }
167
878
#endif
168
878
    }
169
519
#endif  /* HAVE_MEMRCHR */
170
519
    p = s + n;
171
5.95k
    while (p > s) {
172
5.44k
        p--;
173
5.44k
        if (*p == ch)
174
13
            return (p - s);
175
5.44k
    }
176
506
    return -1;
177
519
}
unicodeobject.c:asciilib_rfind_char
Line
Count
Source
117
3.98k
{
118
3.98k
    const STRINGLIB_CHAR *p;
119
3.98k
#ifdef HAVE_MEMRCHR
120
    /* memrchr() is a GNU extension, available since glibc 2.1.91.  it
121
       doesn't seem as optimized as memchr(), but is still quite
122
       faster than our hand-written loop below. There is no wmemrchr
123
       for 4-byte chars. */
124
125
3.98k
    if (n > MEMRCHR_CUT_OFF) {
126
1.16k
#if STRINGLIB_SIZEOF_CHAR == 1
127
1.16k
        p = memrchr(s, ch, n);
128
1.16k
        if (p != NULL)
129
1.05k
            return (p - s);
130
110
        return -1;
131
#else
132
        /* use memrchr if we can choose a needle without too many likely
133
           false positives */
134
        const STRINGLIB_CHAR *s1;
135
        Py_ssize_t n1;
136
        unsigned char needle = ch & 0xff;
137
        /* If looking for a multiple of 256, we'd have too
138
           many false positives looking for the '\0' byte in UCS2
139
           and UCS4 representations. */
140
        if (needle != 0) {
141
            do {
142
                void *candidate = memrchr(s, needle,
143
                                          n * sizeof(STRINGLIB_CHAR));
144
                if (candidate == NULL)
145
                    return -1;
146
                n1 = n;
147
                p = (const STRINGLIB_CHAR *)
148
                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
149
                n = p - s;
150
                if (*p == ch)
151
                    return n;
152
                /* False positive */
153
                if (n1 - n > MEMRCHR_CUT_OFF)
154
                    continue;
155
                if (n <= MEMRCHR_CUT_OFF)
156
                    break;
157
                s1 = p - MEMRCHR_CUT_OFF;
158
                while (p > s1) {
159
                    p--;
160
                    if (*p == ch)
161
                        return (p - s);
162
                }
163
                n = p - s;
164
            }
165
            while (n > MEMRCHR_CUT_OFF);
166
        }
167
#endif
168
1.16k
    }
169
2.82k
#endif  /* HAVE_MEMRCHR */
170
2.82k
    p = s + n;
171
19.4k
    while (p > s) {
172
17.8k
        p--;
173
17.8k
        if (*p == ch)
174
1.22k
            return (p - s);
175
17.8k
    }
176
1.60k
    return -1;
177
2.82k
}
bytes_methods.c:stringlib_rfind_char
Line
Count
Source
117
3.28k
{
118
3.28k
    const STRINGLIB_CHAR *p;
119
3.28k
#ifdef HAVE_MEMRCHR
120
    /* memrchr() is a GNU extension, available since glibc 2.1.91.  it
121
       doesn't seem as optimized as memchr(), but is still quite
122
       faster than our hand-written loop below. There is no wmemrchr
123
       for 4-byte chars. */
124
125
3.28k
    if (n > MEMRCHR_CUT_OFF) {
126
1.97k
#if STRINGLIB_SIZEOF_CHAR == 1
127
1.97k
        p = memrchr(s, ch, n);
128
1.97k
        if (p != NULL)
129
444
            return (p - s);
130
1.52k
        return -1;
131
#else
132
        /* use memrchr if we can choose a needle without too many likely
133
           false positives */
134
        const STRINGLIB_CHAR *s1;
135
        Py_ssize_t n1;
136
        unsigned char needle = ch & 0xff;
137
        /* If looking for a multiple of 256, we'd have too
138
           many false positives looking for the '\0' byte in UCS2
139
           and UCS4 representations. */
140
        if (needle != 0) {
141
            do {
142
                void *candidate = memrchr(s, needle,
143
                                          n * sizeof(STRINGLIB_CHAR));
144
                if (candidate == NULL)
145
                    return -1;
146
                n1 = n;
147
                p = (const STRINGLIB_CHAR *)
148
                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
149
                n = p - s;
150
                if (*p == ch)
151
                    return n;
152
                /* False positive */
153
                if (n1 - n > MEMRCHR_CUT_OFF)
154
                    continue;
155
                if (n <= MEMRCHR_CUT_OFF)
156
                    break;
157
                s1 = p - MEMRCHR_CUT_OFF;
158
                while (p > s1) {
159
                    p--;
160
                    if (*p == ch)
161
                        return (p - s);
162
                }
163
                n = p - s;
164
            }
165
            while (n > MEMRCHR_CUT_OFF);
166
        }
167
#endif
168
1.97k
    }
169
1.31k
#endif  /* HAVE_MEMRCHR */
170
1.31k
    p = s + n;
171
8.10k
    while (p > s) {
172
6.82k
        p--;
173
6.82k
        if (*p == ch)
174
26
            return (p - s);
175
6.82k
    }
176
1.28k
    return -1;
177
1.31k
}
Unexecuted instantiation: bytearrayobject.c:stringlib_rfind_char
Unexecuted instantiation: bytesobject.c:stringlib_rfind_char
178
179
#undef MEMRCHR_CUT_OFF
180
181
/* Change to a 1 to see logging comments walk through the algorithm. */
182
#if 0 && STRINGLIB_SIZEOF_CHAR == 1
183
# define LOG(...) printf(__VA_ARGS__)
184
# define LOG_STRING(s, n) printf("\"%.*s\"", (int)(n), s)
185
# define LOG_LINEUP() do {                                         \
186
    LOG("> "); LOG_STRING(haystack, len_haystack); LOG("\n> ");    \
187
    LOG("%*s",(int)(window_last - haystack + 1 - len_needle), ""); \
188
    LOG_STRING(needle, len_needle); LOG("\n");                     \
189
} while(0)
190
#else
191
# define LOG(...)
192
# define LOG_STRING(s, n)
193
# define LOG_LINEUP()
194
#endif
195
196
Py_LOCAL_INLINE(Py_ssize_t)
197
STRINGLIB(_lex_search)(const STRINGLIB_CHAR *needle, Py_ssize_t len_needle,
198
                       Py_ssize_t *return_period, int invert_alphabet)
199
0
{
200
    /* Do a lexicographic search. Essentially this:
201
           >>> max(needle[i:] for i in range(len(needle)+1))
202
       Also find the period of the right half.   */
203
0
    Py_ssize_t max_suffix = 0;
204
0
    Py_ssize_t candidate = 1;
205
0
    Py_ssize_t k = 0;
206
    // The period of the right half.
207
0
    Py_ssize_t period = 1;
208
209
0
    while (candidate + k < len_needle) {
210
        // each loop increases candidate + k + max_suffix
211
0
        STRINGLIB_CHAR a = needle[candidate + k];
212
0
        STRINGLIB_CHAR b = needle[max_suffix + k];
213
        // check if the suffix at candidate is better than max_suffix
214
0
        if (invert_alphabet ? (b < a) : (a < b)) {
215
            // Fell short of max_suffix.
216
            // The next k + 1 characters are non-increasing
217
            // from candidate, so they won't start a maximal suffix.
218
0
            candidate += k + 1;
219
0
            k = 0;
220
            // We've ruled out any period smaller than what's
221
            // been scanned since max_suffix.
222
0
            period = candidate - max_suffix;
223
0
        }
224
0
        else if (a == b) {
225
0
            if (k + 1 != period) {
226
                // Keep scanning the equal strings
227
0
                k++;
228
0
            }
229
0
            else {
230
                // Matched a whole period.
231
                // Start matching the next period.
232
0
                candidate += period;
233
0
                k = 0;
234
0
            }
235
0
        }
236
0
        else {
237
            // Did better than max_suffix, so replace it.
238
0
            max_suffix = candidate;
239
0
            candidate++;
240
0
            k = 0;
241
0
            period = 1;
242
0
        }
243
0
    }
244
0
    *return_period = period;
245
0
    return max_suffix;
246
0
}
Unexecuted instantiation: unicodeobject.c:asciilib__lex_search
Unexecuted instantiation: unicodeobject.c:ucs1lib__lex_search
Unexecuted instantiation: unicodeobject.c:ucs2lib__lex_search
Unexecuted instantiation: unicodeobject.c:ucs4lib__lex_search
Unexecuted instantiation: bytes_methods.c:stringlib__lex_search
Unexecuted instantiation: bytearrayobject.c:stringlib__lex_search
Unexecuted instantiation: bytesobject.c:stringlib__lex_search
247
248
Py_LOCAL_INLINE(Py_ssize_t)
249
STRINGLIB(_factorize)(const STRINGLIB_CHAR *needle,
250
                      Py_ssize_t len_needle,
251
                      Py_ssize_t *return_period)
252
0
{
253
    /* Do a "critical factorization", making it so that:
254
       >>> needle = (left := needle[:cut]) + (right := needle[cut:])
255
       where the "local period" of the cut is maximal.
256
257
       The local period of the cut is the minimal length of a string w
258
       such that (left endswith w or w endswith left)
259
       and (right startswith w or w startswith right).
260
261
       The Critical Factorization Theorem says that this maximal local
262
       period is the global period of the string.
263
264
       Crochemore and Perrin (1991) show that this cut can be computed
265
       as the later of two cuts: one that gives a lexicographically
266
       maximal right half, and one that gives the same with the
267
       with respect to a reversed alphabet-ordering.
268
269
       This is what we want to happen:
270
           >>> x = "GCAGAGAG"
271
           >>> cut, period = factorize(x)
272
           >>> x[:cut], (right := x[cut:])
273
           ('GC', 'AGAGAG')
274
           >>> period  # right half period
275
           2
276
           >>> right[period:] == right[:-period]
277
           True
278
279
       This is how the local period lines up in the above example:
280
                GC | AGAGAG
281
           AGAGAGC = AGAGAGC
282
       The length of this minimal repetition is 7, which is indeed the
283
       period of the original string. */
284
285
0
    Py_ssize_t cut1, period1, cut2, period2, cut, period;
286
0
    cut1 = STRINGLIB(_lex_search)(needle, len_needle, &period1, 0);
287
0
    cut2 = STRINGLIB(_lex_search)(needle, len_needle, &period2, 1);
288
289
    // Take the later cut.
290
0
    if (cut1 > cut2) {
291
0
        period = period1;
292
0
        cut = cut1;
293
0
    }
294
0
    else {
295
0
        period = period2;
296
0
        cut = cut2;
297
0
    }
298
299
0
    LOG("split: "); LOG_STRING(needle, cut);
300
0
    LOG(" + "); LOG_STRING(needle + cut, len_needle - cut);
301
0
    LOG("\n");
302
303
0
    *return_period = period;
304
0
    return cut;
305
0
}
Unexecuted instantiation: unicodeobject.c:asciilib__factorize
Unexecuted instantiation: unicodeobject.c:ucs1lib__factorize
Unexecuted instantiation: unicodeobject.c:ucs2lib__factorize
Unexecuted instantiation: unicodeobject.c:ucs4lib__factorize
Unexecuted instantiation: bytes_methods.c:stringlib__factorize
Unexecuted instantiation: bytearrayobject.c:stringlib__factorize
Unexecuted instantiation: bytesobject.c:stringlib__factorize
306
307
308
0
#define SHIFT_TYPE uint8_t
309
#define MAX_SHIFT UINT8_MAX
310
311
0
#define TABLE_SIZE_BITS 6u
312
0
#define TABLE_SIZE (1U << TABLE_SIZE_BITS)
313
0
#define TABLE_MASK (TABLE_SIZE - 1U)
314
315
typedef struct STRINGLIB(_pre) {
316
    const STRINGLIB_CHAR *needle;
317
    Py_ssize_t len_needle;
318
    Py_ssize_t cut;
319
    Py_ssize_t period;
320
    Py_ssize_t gap;
321
    int is_periodic;
322
    SHIFT_TYPE table[TABLE_SIZE];
323
} STRINGLIB(prework);
324
325
326
static void
327
STRINGLIB(_preprocess)(const STRINGLIB_CHAR *needle, Py_ssize_t len_needle,
328
                       STRINGLIB(prework) *p)
329
0
{
330
0
    p->needle = needle;
331
0
    p->len_needle = len_needle;
332
0
    p->cut = STRINGLIB(_factorize)(needle, len_needle, &(p->period));
333
0
    assert(p->period + p->cut <= len_needle);
334
0
    p->is_periodic = (0 == memcmp(needle,
335
0
                                  needle + p->period,
336
0
                                  p->cut * STRINGLIB_SIZEOF_CHAR));
337
0
    if (p->is_periodic) {
338
0
        assert(p->cut <= len_needle/2);
339
0
        assert(p->cut < p->period);
340
0
    }
341
0
    else {
342
        // A lower bound on the period
343
0
        p->period = Py_MAX(p->cut, len_needle - p->cut) + 1;
344
0
    }
345
    // The gap between the last character and the previous
346
    // occurrence of an equivalent character (modulo TABLE_SIZE)
347
0
    p->gap = len_needle;
348
0
    STRINGLIB_CHAR last = needle[len_needle - 1] & TABLE_MASK;
349
0
    for (Py_ssize_t i = len_needle - 2; i >= 0; i--) {
350
0
        STRINGLIB_CHAR x = needle[i] & TABLE_MASK;
351
0
        if (x == last) {
352
0
            p->gap = len_needle - 1 - i;
353
0
            break;
354
0
        }
355
0
    }
356
    // Fill up a compressed Boyer-Moore "Bad Character" table
357
0
    Py_ssize_t not_found_shift = Py_MIN(len_needle, MAX_SHIFT);
358
0
    for (Py_ssize_t i = 0; i < (Py_ssize_t)TABLE_SIZE; i++) {
359
0
        p->table[i] = Py_SAFE_DOWNCAST(not_found_shift,
360
0
                                       Py_ssize_t, SHIFT_TYPE);
361
0
    }
362
0
    for (Py_ssize_t i = len_needle - not_found_shift; i < len_needle; i++) {
363
0
        SHIFT_TYPE shift = Py_SAFE_DOWNCAST(len_needle - 1 - i,
364
0
                                            Py_ssize_t, SHIFT_TYPE);
365
0
        p->table[needle[i] & TABLE_MASK] = shift;
366
0
    }
367
0
}
Unexecuted instantiation: unicodeobject.c:asciilib__preprocess
Unexecuted instantiation: unicodeobject.c:ucs1lib__preprocess
Unexecuted instantiation: unicodeobject.c:ucs2lib__preprocess
Unexecuted instantiation: unicodeobject.c:ucs4lib__preprocess
Unexecuted instantiation: bytes_methods.c:stringlib__preprocess
Unexecuted instantiation: bytearrayobject.c:stringlib__preprocess
Unexecuted instantiation: bytesobject.c:stringlib__preprocess
368
369
static Py_ssize_t
370
STRINGLIB(_two_way)(const STRINGLIB_CHAR *haystack, Py_ssize_t len_haystack,
371
                    STRINGLIB(prework) *p)
372
0
{
373
    // Crochemore and Perrin's (1991) Two-Way algorithm.
374
    // See http://www-igm.univ-mlv.fr/~lecroq/string/node26.html#SECTION00260
375
0
    const Py_ssize_t len_needle = p->len_needle;
376
0
    const Py_ssize_t cut = p->cut;
377
0
    Py_ssize_t period = p->period;
378
0
    const STRINGLIB_CHAR *const needle = p->needle;
379
0
    const STRINGLIB_CHAR *window_last = haystack + len_needle - 1;
380
0
    const STRINGLIB_CHAR *const haystack_end = haystack + len_haystack;
381
0
    SHIFT_TYPE *table = p->table;
382
0
    const STRINGLIB_CHAR *window;
383
0
    LOG("===== Two-way: \"%s\" in \"%s\". =====\n", needle, haystack);
384
385
0
    Py_ssize_t gap = p->gap;
386
0
    Py_ssize_t gap_jump_end = Py_MIN(len_needle, cut + gap);
387
0
    if (p->is_periodic) {
388
0
        LOG("Needle is periodic.\n");
389
0
        Py_ssize_t memory = 0;
390
0
      periodicwindowloop:
391
0
        while (window_last < haystack_end) {
392
0
            assert(memory == 0);
393
0
            for (;;) {
394
0
                LOG_LINEUP();
395
0
                Py_ssize_t shift = table[(*window_last) & TABLE_MASK];
396
0
                window_last += shift;
397
0
                if (shift == 0) {
398
0
                    break;
399
0
                }
400
0
                if (window_last >= haystack_end) {
401
0
                    return -1;
402
0
                }
403
0
                LOG("Horspool skip\n");
404
0
            }
405
0
          no_shift:
406
0
            window = window_last - len_needle + 1;
407
0
            assert((window[len_needle - 1] & TABLE_MASK) ==
408
0
                   (needle[len_needle - 1] & TABLE_MASK));
409
0
            Py_ssize_t i = Py_MAX(cut, memory);
410
0
            for (; i < len_needle; i++) {
411
0
                if (needle[i] != window[i]) {
412
0
                    if (i < gap_jump_end) {
413
0
                        LOG("Early right half mismatch: jump by gap.\n");
414
0
                        assert(gap >= i - cut + 1);
415
0
                        window_last += gap;
416
0
                    }
417
0
                    else {
418
0
                        LOG("Late right half mismatch: jump by n (>gap)\n");
419
0
                        assert(i - cut + 1 > gap);
420
0
                        window_last += i - cut + 1;
421
0
                    }
422
0
                    memory = 0;
423
0
                    goto periodicwindowloop;
424
0
                }
425
0
            }
426
0
            for (i = memory; i < cut; i++) {
427
0
                if (needle[i] != window[i]) {
428
0
                    LOG("Left half does not match.\n");
429
0
                    window_last += period;
430
0
                    memory = len_needle - period;
431
0
                    if (window_last >= haystack_end) {
432
0
                        return -1;
433
0
                    }
434
0
                    Py_ssize_t shift = table[(*window_last) & TABLE_MASK];
435
0
                    if (shift) {
436
                        // A mismatch has been identified to the right
437
                        // of where i will next start, so we can jump
438
                        // at least as far as if the mismatch occurred
439
                        // on the first comparison.
440
0
                        Py_ssize_t mem_jump = Py_MAX(cut, memory) - cut + 1;
441
0
                        LOG("Skip with Memory.\n");
442
0
                        memory = 0;
443
0
                        window_last += Py_MAX(shift, mem_jump);
444
0
                        goto periodicwindowloop;
445
0
                    }
446
0
                    goto no_shift;
447
0
                }
448
0
            }
449
0
            LOG("Found a match!\n");
450
0
            return window - haystack;
451
0
        }
452
0
    }
453
0
    else {
454
0
        period = Py_MAX(gap, period);
455
0
        LOG("Needle is not periodic.\n");
456
0
      windowloop:
457
0
        while (window_last < haystack_end) {
458
0
            for (;;) {
459
0
                LOG_LINEUP();
460
0
                Py_ssize_t shift = table[(*window_last) & TABLE_MASK];
461
0
                window_last += shift;
462
0
                if (shift == 0) {
463
0
                    break;
464
0
                }
465
0
                if (window_last >= haystack_end) {
466
0
                    return -1;
467
0
                }
468
0
                LOG("Horspool skip\n");
469
0
            }
470
0
            window = window_last - len_needle + 1;
471
0
            assert((window[len_needle - 1] & TABLE_MASK) ==
472
0
                   (needle[len_needle - 1] & TABLE_MASK));
473
0
            Py_ssize_t i = cut;
474
0
            for (; i < len_needle; i++) {
475
0
                if (needle[i] != window[i]) {
476
0
                    if (i < gap_jump_end) {
477
0
                        LOG("Early right half mismatch: jump by gap.\n");
478
0
                        assert(gap >= i - cut + 1);
479
0
                        window_last += gap;
480
0
                    }
481
0
                    else {
482
0
                        LOG("Late right half mismatch: jump by n (>gap)\n");
483
0
                        assert(i - cut + 1 > gap);
484
0
                        window_last += i - cut + 1;
485
0
                    }
486
0
                    goto windowloop;
487
0
                }
488
0
            }
489
0
            for (Py_ssize_t i = 0; i < cut; i++) {
490
0
                if (needle[i] != window[i]) {
491
0
                    LOG("Left half does not match.\n");
492
0
                    window_last += period;
493
0
                    goto windowloop;
494
0
                }
495
0
            }
496
0
            LOG("Found a match!\n");
497
0
            return window - haystack;
498
0
        }
499
0
    }
500
0
    LOG("Not found. Returning -1.\n");
501
0
    return -1;
502
0
}
Unexecuted instantiation: unicodeobject.c:asciilib__two_way
Unexecuted instantiation: unicodeobject.c:ucs1lib__two_way
Unexecuted instantiation: unicodeobject.c:ucs2lib__two_way
Unexecuted instantiation: unicodeobject.c:ucs4lib__two_way
Unexecuted instantiation: bytes_methods.c:stringlib__two_way
Unexecuted instantiation: bytearrayobject.c:stringlib__two_way
Unexecuted instantiation: bytesobject.c:stringlib__two_way
503
504
505
static Py_ssize_t
506
STRINGLIB(_two_way_find)(const STRINGLIB_CHAR *haystack,
507
                         Py_ssize_t len_haystack,
508
                         const STRINGLIB_CHAR *needle,
509
                         Py_ssize_t len_needle)
510
0
{
511
0
    LOG("###### Finding \"%s\" in \"%s\".\n", needle, haystack);
512
0
    STRINGLIB(prework) p;
513
0
    STRINGLIB(_preprocess)(needle, len_needle, &p);
514
0
    return STRINGLIB(_two_way)(haystack, len_haystack, &p);
515
0
}
Unexecuted instantiation: unicodeobject.c:asciilib__two_way_find
Unexecuted instantiation: unicodeobject.c:ucs1lib__two_way_find
Unexecuted instantiation: unicodeobject.c:ucs2lib__two_way_find
Unexecuted instantiation: unicodeobject.c:ucs4lib__two_way_find
Unexecuted instantiation: bytes_methods.c:stringlib__two_way_find
Unexecuted instantiation: bytearrayobject.c:stringlib__two_way_find
Unexecuted instantiation: bytesobject.c:stringlib__two_way_find
516
517
518
static Py_ssize_t
519
STRINGLIB(_two_way_count)(const STRINGLIB_CHAR *haystack,
520
                          Py_ssize_t len_haystack,
521
                          const STRINGLIB_CHAR *needle,
522
                          Py_ssize_t len_needle,
523
                          Py_ssize_t maxcount)
524
0
{
525
0
    LOG("###### Counting \"%s\" in \"%s\".\n", needle, haystack);
526
0
    STRINGLIB(prework) p;
527
0
    STRINGLIB(_preprocess)(needle, len_needle, &p);
528
0
    Py_ssize_t index = 0, count = 0;
529
0
    while (1) {
530
0
        Py_ssize_t result;
531
0
        result = STRINGLIB(_two_way)(haystack + index,
532
0
                                     len_haystack - index, &p);
533
0
        if (result == -1) {
534
0
            return count;
535
0
        }
536
0
        count++;
537
0
        if (count == maxcount) {
538
0
            return maxcount;
539
0
        }
540
0
        index += result + len_needle;
541
0
    }
542
0
    return count;
543
0
}
Unexecuted instantiation: unicodeobject.c:asciilib__two_way_count
Unexecuted instantiation: unicodeobject.c:ucs1lib__two_way_count
Unexecuted instantiation: unicodeobject.c:ucs2lib__two_way_count
Unexecuted instantiation: unicodeobject.c:ucs4lib__two_way_count
Unexecuted instantiation: bytes_methods.c:stringlib__two_way_count
Unexecuted instantiation: bytearrayobject.c:stringlib__two_way_count
Unexecuted instantiation: bytesobject.c:stringlib__two_way_count
544
545
#undef SHIFT_TYPE
546
#undef NOT_FOUND
547
#undef SHIFT_OVERFLOW
548
#undef TABLE_SIZE_BITS
549
#undef TABLE_SIZE
550
#undef TABLE_MASK
551
552
#undef LOG
553
#undef LOG_STRING
554
#undef LOG_LINEUP
555
556
static inline Py_ssize_t
557
STRINGLIB(default_find)(const STRINGLIB_CHAR* s, Py_ssize_t n,
558
                        const STRINGLIB_CHAR* p, Py_ssize_t m,
559
                        Py_ssize_t maxcount, int mode)
560
399M
{
561
399M
    const Py_ssize_t w = n - m;
562
399M
    Py_ssize_t mlast = m - 1, count = 0;
563
399M
    Py_ssize_t gap = mlast;
564
399M
    const STRINGLIB_CHAR last = p[mlast];
565
399M
    const STRINGLIB_CHAR *const ss = &s[mlast];
566
567
399M
    unsigned long mask = 0;
568
3.59G
    for (Py_ssize_t i = 0; i < mlast; i++) {
569
3.19G
        STRINGLIB_BLOOM_ADD(mask, p[i]);
570
3.19G
        if (p[i] == last) {
571
25.4k
            gap = mlast - i - 1;
572
25.4k
        }
573
3.19G
    }
574
399M
    STRINGLIB_BLOOM_ADD(mask, last);
575
576
4.80G
    for (Py_ssize_t i = 0; i <= w; i++) {
577
4.40G
        if (ss[i] == last) {
578
            /* candidate match */
579
399M
            Py_ssize_t j;
580
399M
            for (j = 0; j < mlast; j++) {
581
399M
                if (s[i+j] != p[j]) {
582
399M
                    break;
583
399M
                }
584
399M
            }
585
399M
            if (j == mlast) {
586
                /* got a match! */
587
3.13k
                if (mode != FAST_COUNT) {
588
1.85k
                    return i;
589
1.85k
                }
590
1.28k
                count++;
591
1.28k
                if (count == maxcount) {
592
0
                    return maxcount;
593
0
                }
594
1.28k
                i = i + mlast;
595
1.28k
                continue;
596
1.28k
            }
597
            /* miss: check if next character is part of pattern */
598
399M
            if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) {
599
348
                i = i + m;
600
348
            }
601
399M
            else {
602
399M
                i = i + gap;
603
399M
            }
604
399M
        }
605
4.00G
        else {
606
            /* skip: check if next character is part of pattern */
607
4.00G
            if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) {
608
1.99G
                i = i + m;
609
1.99G
            }
610
4.00G
        }
611
4.40G
    }
612
399M
    return mode == FAST_COUNT ? count : -1;
613
399M
}
unicodeobject.c:asciilib_default_find
Line
Count
Source
560
943
{
561
943
    const Py_ssize_t w = n - m;
562
943
    Py_ssize_t mlast = m - 1, count = 0;
563
943
    Py_ssize_t gap = mlast;
564
943
    const STRINGLIB_CHAR last = p[mlast];
565
943
    const STRINGLIB_CHAR *const ss = &s[mlast];
566
567
943
    unsigned long mask = 0;
568
2.15k
    for (Py_ssize_t i = 0; i < mlast; i++) {
569
1.21k
        STRINGLIB_BLOOM_ADD(mask, p[i]);
570
1.21k
        if (p[i] == last) {
571
673
            gap = mlast - i - 1;
572
673
        }
573
1.21k
    }
574
943
    STRINGLIB_BLOOM_ADD(mask, last);
575
576
5.31k
    for (Py_ssize_t i = 0; i <= w; i++) {
577
5.31k
        if (ss[i] == last) {
578
            /* candidate match */
579
1.07k
            Py_ssize_t j;
580
2.29k
            for (j = 0; j < mlast; j++) {
581
1.34k
                if (s[i+j] != p[j]) {
582
134
                    break;
583
134
                }
584
1.34k
            }
585
1.07k
            if (j == mlast) {
586
                /* got a match! */
587
943
                if (mode != FAST_COUNT) {
588
943
                    return i;
589
943
                }
590
0
                count++;
591
0
                if (count == maxcount) {
592
0
                    return maxcount;
593
0
                }
594
0
                i = i + mlast;
595
0
                continue;
596
0
            }
597
            /* miss: check if next character is part of pattern */
598
134
            if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) {
599
0
                i = i + m;
600
0
            }
601
134
            else {
602
134
                i = i + gap;
603
134
            }
604
134
        }
605
4.23k
        else {
606
            /* skip: check if next character is part of pattern */
607
4.23k
            if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) {
608
3.98k
                i = i + m;
609
3.98k
            }
610
4.23k
        }
611
5.31k
    }
612
0
    return mode == FAST_COUNT ? count : -1;
613
943
}
unicodeobject.c:ucs1lib_default_find
Line
Count
Source
560
399M
{
561
399M
    const Py_ssize_t w = n - m;
562
399M
    Py_ssize_t mlast = m - 1, count = 0;
563
399M
    Py_ssize_t gap = mlast;
564
399M
    const STRINGLIB_CHAR last = p[mlast];
565
399M
    const STRINGLIB_CHAR *const ss = &s[mlast];
566
567
399M
    unsigned long mask = 0;
568
3.59G
    for (Py_ssize_t i = 0; i < mlast; i++) {
569
3.19G
        STRINGLIB_BLOOM_ADD(mask, p[i]);
570
3.19G
        if (p[i] == last) {
571
23.8k
            gap = mlast - i - 1;
572
23.8k
        }
573
3.19G
    }
574
399M
    STRINGLIB_BLOOM_ADD(mask, last);
575
576
4.80G
    for (Py_ssize_t i = 0; i <= w; i++) {
577
4.40G
        if (ss[i] == last) {
578
            /* candidate match */
579
399M
            Py_ssize_t j;
580
399M
            for (j = 0; j < mlast; j++) {
581
399M
                if (s[i+j] != p[j]) {
582
399M
                    break;
583
399M
                }
584
399M
            }
585
399M
            if (j == mlast) {
586
                /* got a match! */
587
1.05k
                if (mode != FAST_COUNT) {
588
55
                    return i;
589
55
                }
590
998
                count++;
591
998
                if (count == maxcount) {
592
0
                    return maxcount;
593
0
                }
594
998
                i = i + mlast;
595
998
                continue;
596
998
            }
597
            /* miss: check if next character is part of pattern */
598
399M
            if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) {
599
1
                i = i + m;
600
1
            }
601
399M
            else {
602
399M
                i = i + gap;
603
399M
            }
604
399M
        }
605
4.00G
        else {
606
            /* skip: check if next character is part of pattern */
607
4.00G
            if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) {
608
1.99G
                i = i + m;
609
1.99G
            }
610
4.00G
        }
611
4.40G
    }
612
399M
    return mode == FAST_COUNT ? count : -1;
613
399M
}
unicodeobject.c:ucs2lib_default_find
Line
Count
Source
560
144
{
561
144
    const Py_ssize_t w = n - m;
562
144
    Py_ssize_t mlast = m - 1, count = 0;
563
144
    Py_ssize_t gap = mlast;
564
144
    const STRINGLIB_CHAR last = p[mlast];
565
144
    const STRINGLIB_CHAR *const ss = &s[mlast];
566
567
144
    unsigned long mask = 0;
568
288
    for (Py_ssize_t i = 0; i < mlast; i++) {
569
144
        STRINGLIB_BLOOM_ADD(mask, p[i]);
570
144
        if (p[i] == last) {
571
144
            gap = mlast - i - 1;
572
144
        }
573
144
    }
574
144
    STRINGLIB_BLOOM_ADD(mask, last);
575
576
12.8k
    for (Py_ssize_t i = 0; i <= w; i++) {
577
12.7k
        if (ss[i] == last) {
578
            /* candidate match */
579
304
            Py_ssize_t j;
580
534
            for (j = 0; j < mlast; j++) {
581
304
                if (s[i+j] != p[j]) {
582
74
                    break;
583
74
                }
584
304
            }
585
304
            if (j == mlast) {
586
                /* got a match! */
587
230
                if (mode != FAST_COUNT) {
588
115
                    return i;
589
115
                }
590
115
                count++;
591
115
                if (count == maxcount) {
592
0
                    return maxcount;
593
0
                }
594
115
                i = i + mlast;
595
115
                continue;
596
115
            }
597
            /* miss: check if next character is part of pattern */
598
74
            if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) {
599
0
                i = i + m;
600
0
            }
601
74
            else {
602
74
                i = i + gap;
603
74
            }
604
74
        }
605
12.4k
        else {
606
            /* skip: check if next character is part of pattern */
607
12.4k
            if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) {
608
12.4k
                i = i + m;
609
12.4k
            }
610
12.4k
        }
611
12.7k
    }
612
29
    return mode == FAST_COUNT ? count : -1;
613
144
}
unicodeobject.c:ucs4lib_default_find
Line
Count
Source
560
202
{
561
202
    const Py_ssize_t w = n - m;
562
202
    Py_ssize_t mlast = m - 1, count = 0;
563
202
    Py_ssize_t gap = mlast;
564
202
    const STRINGLIB_CHAR last = p[mlast];
565
202
    const STRINGLIB_CHAR *const ss = &s[mlast];
566
567
202
    unsigned long mask = 0;
568
404
    for (Py_ssize_t i = 0; i < mlast; i++) {
569
202
        STRINGLIB_BLOOM_ADD(mask, p[i]);
570
202
        if (p[i] == last) {
571
202
            gap = mlast - i - 1;
572
202
        }
573
202
    }
574
202
    STRINGLIB_BLOOM_ADD(mask, last);
575
576
11.7k
    for (Py_ssize_t i = 0; i <= w; i++) {
577
11.7k
        if (ss[i] == last) {
578
            /* candidate match */
579
412
            Py_ssize_t j;
580
762
            for (j = 0; j < mlast; j++) {
581
412
                if (s[i+j] != p[j]) {
582
62
                    break;
583
62
                }
584
412
            }
585
412
            if (j == mlast) {
586
                /* got a match! */
587
350
                if (mode != FAST_COUNT) {
588
175
                    return i;
589
175
                }
590
175
                count++;
591
175
                if (count == maxcount) {
592
0
                    return maxcount;
593
0
                }
594
175
                i = i + mlast;
595
175
                continue;
596
175
            }
597
            /* miss: check if next character is part of pattern */
598
62
            if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) {
599
0
                i = i + m;
600
0
            }
601
62
            else {
602
62
                i = i + gap;
603
62
            }
604
62
        }
605
11.3k
        else {
606
            /* skip: check if next character is part of pattern */
607
11.3k
            if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) {
608
10.9k
                i = i + m;
609
10.9k
            }
610
11.3k
        }
611
11.7k
    }
612
27
    return mode == FAST_COUNT ? count : -1;
613
202
}
bytes_methods.c:stringlib_default_find
Line
Count
Source
560
614
{
561
614
    const Py_ssize_t w = n - m;
562
614
    Py_ssize_t mlast = m - 1, count = 0;
563
614
    Py_ssize_t gap = mlast;
564
614
    const STRINGLIB_CHAR last = p[mlast];
565
614
    const STRINGLIB_CHAR *const ss = &s[mlast];
566
567
614
    unsigned long mask = 0;
568
2.45k
    for (Py_ssize_t i = 0; i < mlast; i++) {
569
1.84k
        STRINGLIB_BLOOM_ADD(mask, p[i]);
570
1.84k
        if (p[i] == last) {
571
614
            gap = mlast - i - 1;
572
614
        }
573
1.84k
    }
574
614
    STRINGLIB_BLOOM_ADD(mask, last);
575
576
10.7k
    for (Py_ssize_t i = 0; i <= w; i++) {
577
10.6k
        if (ss[i] == last) {
578
            /* candidate match */
579
1.85k
            Py_ssize_t j;
580
3.57k
            for (j = 0; j < mlast; j++) {
581
3.01k
                if (s[i+j] != p[j]) {
582
1.29k
                    break;
583
1.29k
                }
584
3.01k
            }
585
1.85k
            if (j == mlast) {
586
                /* got a match! */
587
563
                if (mode != FAST_COUNT) {
588
563
                    return i;
589
563
                }
590
0
                count++;
591
0
                if (count == maxcount) {
592
0
                    return maxcount;
593
0
                }
594
0
                i = i + mlast;
595
0
                continue;
596
0
            }
597
            /* miss: check if next character is part of pattern */
598
1.29k
            if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) {
599
347
                i = i + m;
600
347
            }
601
945
            else {
602
945
                i = i + gap;
603
945
            }
604
1.29k
        }
605
8.80k
        else {
606
            /* skip: check if next character is part of pattern */
607
8.80k
            if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) {
608
5.72k
                i = i + m;
609
5.72k
            }
610
8.80k
        }
611
10.6k
    }
612
51
    return mode == FAST_COUNT ? count : -1;
613
614
}
Unexecuted instantiation: bytearrayobject.c:stringlib_default_find
Unexecuted instantiation: bytesobject.c:stringlib_default_find
614
615
616
static Py_ssize_t
617
STRINGLIB(adaptive_find)(const STRINGLIB_CHAR* s, Py_ssize_t n,
618
                         const STRINGLIB_CHAR* p, Py_ssize_t m,
619
                         Py_ssize_t maxcount, int mode)
620
0
{
621
0
    const Py_ssize_t w = n - m;
622
0
    Py_ssize_t mlast = m - 1, count = 0;
623
0
    Py_ssize_t gap = mlast;
624
0
    Py_ssize_t hits = 0, res;
625
0
    const STRINGLIB_CHAR last = p[mlast];
626
0
    const STRINGLIB_CHAR *const ss = &s[mlast];
627
628
0
    unsigned long mask = 0;
629
0
    for (Py_ssize_t i = 0; i < mlast; i++) {
630
0
        STRINGLIB_BLOOM_ADD(mask, p[i]);
631
0
        if (p[i] == last) {
632
0
            gap = mlast - i - 1;
633
0
        }
634
0
    }
635
0
    STRINGLIB_BLOOM_ADD(mask, last);
636
637
0
    for (Py_ssize_t i = 0; i <= w; i++) {
638
0
        if (ss[i] == last) {
639
            /* candidate match */
640
0
            Py_ssize_t j;
641
0
            for (j = 0; j < mlast; j++) {
642
0
                if (s[i+j] != p[j]) {
643
0
                    break;
644
0
                }
645
0
            }
646
0
            if (j == mlast) {
647
                /* got a match! */
648
0
                if (mode != FAST_COUNT) {
649
0
                    return i;
650
0
                }
651
0
                count++;
652
0
                if (count == maxcount) {
653
0
                    return maxcount;
654
0
                }
655
0
                i = i + mlast;
656
0
                continue;
657
0
            }
658
0
            hits += j + 1;
659
0
            if (hits > m / 4 && w - i > 2000) {
660
0
                if (mode == FAST_SEARCH) {
661
0
                    res = STRINGLIB(_two_way_find)(s + i, n - i, p, m);
662
0
                    return res == -1 ? -1 : res + i;
663
0
                }
664
0
                else {
665
0
                    res = STRINGLIB(_two_way_count)(s + i, n - i, p, m,
666
0
                                                    maxcount - count);
667
0
                    return res + count;
668
0
                }
669
0
            }
670
            /* miss: check if next character is part of pattern */
671
0
            if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) {
672
0
                i = i + m;
673
0
            }
674
0
            else {
675
0
                i = i + gap;
676
0
            }
677
0
        }
678
0
        else {
679
            /* skip: check if next character is part of pattern */
680
0
            if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) {
681
0
                i = i + m;
682
0
            }
683
0
        }
684
0
    }
685
0
    return mode == FAST_COUNT ? count : -1;
686
0
}
Unexecuted instantiation: unicodeobject.c:asciilib_adaptive_find
Unexecuted instantiation: unicodeobject.c:ucs1lib_adaptive_find
Unexecuted instantiation: unicodeobject.c:ucs2lib_adaptive_find
Unexecuted instantiation: unicodeobject.c:ucs4lib_adaptive_find
Unexecuted instantiation: bytes_methods.c:stringlib_adaptive_find
Unexecuted instantiation: bytearrayobject.c:stringlib_adaptive_find
Unexecuted instantiation: bytesobject.c:stringlib_adaptive_find
687
688
689
static Py_ssize_t
690
STRINGLIB(default_rfind)(const STRINGLIB_CHAR* s, Py_ssize_t n,
691
                         const STRINGLIB_CHAR* p, Py_ssize_t m,
692
                         Py_ssize_t maxcount, int mode)
693
0
{
694
    /* create compressed boyer-moore delta 1 table */
695
0
    unsigned long mask = 0;
696
0
    Py_ssize_t i, j, mlast = m - 1, skip = m - 1, w = n - m;
697
698
    /* process pattern[0] outside the loop */
699
0
    STRINGLIB_BLOOM_ADD(mask, p[0]);
700
    /* process pattern[:0:-1] */
701
0
    for (i = mlast; i > 0; i--) {
702
0
        STRINGLIB_BLOOM_ADD(mask, p[i]);
703
0
        if (p[i] == p[0]) {
704
0
            skip = i - 1;
705
0
        }
706
0
    }
707
708
0
    for (i = w; i >= 0; i--) {
709
0
        if (s[i] == p[0]) {
710
            /* candidate match */
711
0
            for (j = mlast; j > 0; j--) {
712
0
                if (s[i+j] != p[j]) {
713
0
                    break;
714
0
                }
715
0
            }
716
0
            if (j == 0) {
717
                /* got a match! */
718
0
                return i;
719
0
            }
720
            /* miss: check if previous character is part of pattern */
721
0
            if (i > 0 && !STRINGLIB_BLOOM(mask, s[i-1])) {
722
0
                i = i - m;
723
0
            }
724
0
            else {
725
0
                i = i - skip;
726
0
            }
727
0
        }
728
0
        else {
729
            /* skip: check if previous character is part of pattern */
730
0
            if (i > 0 && !STRINGLIB_BLOOM(mask, s[i-1])) {
731
0
                i = i - m;
732
0
            }
733
0
        }
734
0
    }
735
0
    return -1;
736
0
}
Unexecuted instantiation: unicodeobject.c:asciilib_default_rfind
Unexecuted instantiation: unicodeobject.c:ucs1lib_default_rfind
Unexecuted instantiation: unicodeobject.c:ucs2lib_default_rfind
Unexecuted instantiation: unicodeobject.c:ucs4lib_default_rfind
Unexecuted instantiation: bytes_methods.c:stringlib_default_rfind
Unexecuted instantiation: bytearrayobject.c:stringlib_default_rfind
Unexecuted instantiation: bytesobject.c:stringlib_default_rfind
737
738
739
static inline Py_ssize_t
740
STRINGLIB(count_char)(const STRINGLIB_CHAR *s, Py_ssize_t n,
741
                      const STRINGLIB_CHAR p0, Py_ssize_t maxcount)
742
0
{
743
0
    Py_ssize_t i, count = 0;
744
0
    for (i = 0; i < n; i++) {
745
0
        if (s[i] == p0) {
746
0
            count++;
747
0
            if (count == maxcount) {
748
0
                return maxcount;
749
0
            }
750
0
        }
751
0
    }
752
0
    return count;
753
0
}
Unexecuted instantiation: unicodeobject.c:asciilib_count_char
Unexecuted instantiation: unicodeobject.c:ucs1lib_count_char
Unexecuted instantiation: unicodeobject.c:ucs2lib_count_char
Unexecuted instantiation: unicodeobject.c:ucs4lib_count_char
Unexecuted instantiation: bytes_methods.c:stringlib_count_char
Unexecuted instantiation: bytearrayobject.c:stringlib_count_char
Unexecuted instantiation: bytesobject.c:stringlib_count_char
754
755
756
static inline Py_ssize_t
757
STRINGLIB(count_char_no_maxcount)(const STRINGLIB_CHAR *s, Py_ssize_t n,
758
                                  const STRINGLIB_CHAR p0)
759
/* A specialized function of count_char that does not cut off at a maximum.
760
   As a result, the compiler is able to vectorize the loop. */
761
28.2k
{
762
28.2k
    Py_ssize_t count = 0;
763
326M
    for (Py_ssize_t i = 0; i < n; i++) {
764
326M
        if (s[i] == p0) {
765
17.5M
            count++;
766
17.5M
        }
767
326M
    }
768
28.2k
    return count;
769
28.2k
}
Unexecuted instantiation: unicodeobject.c:asciilib_count_char_no_maxcount
unicodeobject.c:ucs1lib_count_char_no_maxcount
Line
Count
Source
761
23.9k
{
762
23.9k
    Py_ssize_t count = 0;
763
218M
    for (Py_ssize_t i = 0; i < n; i++) {
764
218M
        if (s[i] == p0) {
765
9.92M
            count++;
766
9.92M
        }
767
218M
    }
768
23.9k
    return count;
769
23.9k
}
unicodeobject.c:ucs2lib_count_char_no_maxcount
Line
Count
Source
761
994
{
762
994
    Py_ssize_t count = 0;
763
27.1M
    for (Py_ssize_t i = 0; i < n; i++) {
764
27.1M
        if (s[i] == p0) {
765
4.77M
            count++;
766
4.77M
        }
767
27.1M
    }
768
994
    return count;
769
994
}
unicodeobject.c:ucs4lib_count_char_no_maxcount
Line
Count
Source
761
1.35k
{
762
1.35k
    Py_ssize_t count = 0;
763
72.0M
    for (Py_ssize_t i = 0; i < n; i++) {
764
72.0M
        if (s[i] == p0) {
765
2.88M
            count++;
766
2.88M
        }
767
72.0M
    }
768
1.35k
    return count;
769
1.35k
}
bytes_methods.c:stringlib_count_char_no_maxcount
Line
Count
Source
761
1.95k
{
762
1.95k
    Py_ssize_t count = 0;
763
8.21M
    for (Py_ssize_t i = 0; i < n; i++) {
764
8.20M
        if (s[i] == p0) {
765
15.3k
            count++;
766
15.3k
        }
767
8.20M
    }
768
1.95k
    return count;
769
1.95k
}
Unexecuted instantiation: bytearrayobject.c:stringlib_count_char_no_maxcount
Unexecuted instantiation: bytesobject.c:stringlib_count_char_no_maxcount
770
771
772
Py_LOCAL_INLINE(Py_ssize_t)
773
FASTSEARCH(const STRINGLIB_CHAR* s, Py_ssize_t n,
774
           const STRINGLIB_CHAR* p, Py_ssize_t m,
775
           Py_ssize_t maxcount, int mode)
776
400M
{
777
400M
    if (n < m || (mode == FAST_COUNT && maxcount == 0)) {
778
220
        return -1;
779
220
    }
780
781
    /* look for special cases */
782
400M
    if (m <= 1) {
783
34.4k
        if (m <= 0) {
784
0
            return -1;
785
0
        }
786
        /* use special case for 1-character strings */
787
34.4k
        if (mode == FAST_SEARCH)
788
2.27k
            return STRINGLIB(find_char)(s, n, p[0]);
789
32.1k
        else if (mode == FAST_RSEARCH)
790
3.98k
            return STRINGLIB(rfind_char)(s, n, p[0]);
791
28.2k
        else {
792
28.2k
            if (maxcount == PY_SSIZE_T_MAX) {
793
28.2k
                return STRINGLIB(count_char_no_maxcount)(s, n, p[0]);
794
28.2k
            }
795
0
            return STRINGLIB(count_char)(s, n, p[0], maxcount);
796
28.2k
        }
797
34.4k
    }
798
799
399M
    if (mode != FAST_RSEARCH) {
800
399M
        if (n < 2500 || (m < 100 && n < 30000) || m < 6) {
801
399M
            return STRINGLIB(default_find)(s, n, p, m, maxcount, mode);
802
399M
        }
803
0
        else if ((m >> 2) * 3 < (n >> 2)) {
804
            /* 33% threshold, but don't overflow. */
805
            /* For larger problems where the needle isn't a huge
806
               percentage of the size of the haystack, the relatively
807
               expensive O(m) startup cost of the two-way algorithm
808
               will surely pay off. */
809
0
            if (mode == FAST_SEARCH) {
810
0
                return STRINGLIB(_two_way_find)(s, n, p, m);
811
0
            }
812
0
            else {
813
0
                return STRINGLIB(_two_way_count)(s, n, p, m, maxcount);
814
0
            }
815
0
        }
816
0
        else {
817
            /* To ensure that we have good worst-case behavior,
818
               here's an adaptive version of the algorithm, where if
819
               we match O(m) characters without any matches of the
820
               entire needle, then we predict that the startup cost of
821
               the two-way algorithm will probably be worth it. */
822
0
            return STRINGLIB(adaptive_find)(s, n, p, m, maxcount, mode);
823
0
        }
824
399M
    }
825
0
    else {
826
        /* FAST_RSEARCH */
827
0
        return STRINGLIB(default_rfind)(s, n, p, m, maxcount, mode);
828
0
    }
829
399M
}
unicodeobject.c:asciilib_fastsearch
Line
Count
Source
776
5.00k
{
777
5.00k
    if (n < m || (mode == FAST_COUNT && maxcount == 0)) {
778
0
        return -1;
779
0
    }
780
781
    /* look for special cases */
782
5.00k
    if (m <= 1) {
783
4.06k
        if (m <= 0) {
784
0
            return -1;
785
0
        }
786
        /* use special case for 1-character strings */
787
4.06k
        if (mode == FAST_SEARCH)
788
73
            return STRINGLIB(find_char)(s, n, p[0]);
789
3.98k
        else if (mode == FAST_RSEARCH)
790
3.98k
            return STRINGLIB(rfind_char)(s, n, p[0]);
791
0
        else {
792
0
            if (maxcount == PY_SSIZE_T_MAX) {
793
0
                return STRINGLIB(count_char_no_maxcount)(s, n, p[0]);
794
0
            }
795
0
            return STRINGLIB(count_char)(s, n, p[0], maxcount);
796
0
        }
797
4.06k
    }
798
799
943
    if (mode != FAST_RSEARCH) {
800
943
        if (n < 2500 || (m < 100 && n < 30000) || m < 6) {
801
943
            return STRINGLIB(default_find)(s, n, p, m, maxcount, mode);
802
943
        }
803
0
        else if ((m >> 2) * 3 < (n >> 2)) {
804
            /* 33% threshold, but don't overflow. */
805
            /* For larger problems where the needle isn't a huge
806
               percentage of the size of the haystack, the relatively
807
               expensive O(m) startup cost of the two-way algorithm
808
               will surely pay off. */
809
0
            if (mode == FAST_SEARCH) {
810
0
                return STRINGLIB(_two_way_find)(s, n, p, m);
811
0
            }
812
0
            else {
813
0
                return STRINGLIB(_two_way_count)(s, n, p, m, maxcount);
814
0
            }
815
0
        }
816
0
        else {
817
            /* To ensure that we have good worst-case behavior,
818
               here's an adaptive version of the algorithm, where if
819
               we match O(m) characters without any matches of the
820
               entire needle, then we predict that the startup cost of
821
               the two-way algorithm will probably be worth it. */
822
0
            return STRINGLIB(adaptive_find)(s, n, p, m, maxcount, mode);
823
0
        }
824
943
    }
825
0
    else {
826
        /* FAST_RSEARCH */
827
0
        return STRINGLIB(default_rfind)(s, n, p, m, maxcount, mode);
828
0
    }
829
943
}
unicodeobject.c:ucs1lib_fastsearch
Line
Count
Source
776
400M
{
777
400M
    if (n < m || (mode == FAST_COUNT && maxcount == 0)) {
778
0
        return -1;
779
0
    }
780
781
    /* look for special cases */
782
400M
    if (m <= 1) {
783
23.9k
        if (m <= 0) {
784
0
            return -1;
785
0
        }
786
        /* use special case for 1-character strings */
787
23.9k
        if (mode == FAST_SEARCH)
788
11
            return STRINGLIB(find_char)(s, n, p[0]);
789
23.9k
        else if (mode == FAST_RSEARCH)
790
0
            return STRINGLIB(rfind_char)(s, n, p[0]);
791
23.9k
        else {
792
23.9k
            if (maxcount == PY_SSIZE_T_MAX) {
793
23.9k
                return STRINGLIB(count_char_no_maxcount)(s, n, p[0]);
794
23.9k
            }
795
0
            return STRINGLIB(count_char)(s, n, p[0], maxcount);
796
23.9k
        }
797
23.9k
    }
798
799
399M
    if (mode != FAST_RSEARCH) {
800
399M
        if (n < 2500 || (m < 100 && n < 30000) || m < 6) {
801
399M
            return STRINGLIB(default_find)(s, n, p, m, maxcount, mode);
802
399M
        }
803
0
        else if ((m >> 2) * 3 < (n >> 2)) {
804
            /* 33% threshold, but don't overflow. */
805
            /* For larger problems where the needle isn't a huge
806
               percentage of the size of the haystack, the relatively
807
               expensive O(m) startup cost of the two-way algorithm
808
               will surely pay off. */
809
0
            if (mode == FAST_SEARCH) {
810
0
                return STRINGLIB(_two_way_find)(s, n, p, m);
811
0
            }
812
0
            else {
813
0
                return STRINGLIB(_two_way_count)(s, n, p, m, maxcount);
814
0
            }
815
0
        }
816
0
        else {
817
            /* To ensure that we have good worst-case behavior,
818
               here's an adaptive version of the algorithm, where if
819
               we match O(m) characters without any matches of the
820
               entire needle, then we predict that the startup cost of
821
               the two-way algorithm will probably be worth it. */
822
0
            return STRINGLIB(adaptive_find)(s, n, p, m, maxcount, mode);
823
0
        }
824
399M
    }
825
0
    else {
826
        /* FAST_RSEARCH */
827
0
        return STRINGLIB(default_rfind)(s, n, p, m, maxcount, mode);
828
0
    }
829
399M
}
unicodeobject.c:ucs2lib_fastsearch
Line
Count
Source
776
1.15k
{
777
1.15k
    if (n < m || (mode == FAST_COUNT && maxcount == 0)) {
778
0
        return -1;
779
0
    }
780
781
    /* look for special cases */
782
1.15k
    if (m <= 1) {
783
1.01k
        if (m <= 0) {
784
0
            return -1;
785
0
        }
786
        /* use special case for 1-character strings */
787
1.01k
        if (mode == FAST_SEARCH)
788
17
            return STRINGLIB(find_char)(s, n, p[0]);
789
994
        else if (mode == FAST_RSEARCH)
790
0
            return STRINGLIB(rfind_char)(s, n, p[0]);
791
994
        else {
792
994
            if (maxcount == PY_SSIZE_T_MAX) {
793
994
                return STRINGLIB(count_char_no_maxcount)(s, n, p[0]);
794
994
            }
795
0
            return STRINGLIB(count_char)(s, n, p[0], maxcount);
796
994
        }
797
1.01k
    }
798
799
144
    if (mode != FAST_RSEARCH) {
800
144
        if (n < 2500 || (m < 100 && n < 30000) || m < 6) {
801
144
            return STRINGLIB(default_find)(s, n, p, m, maxcount, mode);
802
144
        }
803
0
        else if ((m >> 2) * 3 < (n >> 2)) {
804
            /* 33% threshold, but don't overflow. */
805
            /* For larger problems where the needle isn't a huge
806
               percentage of the size of the haystack, the relatively
807
               expensive O(m) startup cost of the two-way algorithm
808
               will surely pay off. */
809
0
            if (mode == FAST_SEARCH) {
810
0
                return STRINGLIB(_two_way_find)(s, n, p, m);
811
0
            }
812
0
            else {
813
0
                return STRINGLIB(_two_way_count)(s, n, p, m, maxcount);
814
0
            }
815
0
        }
816
0
        else {
817
            /* To ensure that we have good worst-case behavior,
818
               here's an adaptive version of the algorithm, where if
819
               we match O(m) characters without any matches of the
820
               entire needle, then we predict that the startup cost of
821
               the two-way algorithm will probably be worth it. */
822
0
            return STRINGLIB(adaptive_find)(s, n, p, m, maxcount, mode);
823
0
        }
824
144
    }
825
0
    else {
826
        /* FAST_RSEARCH */
827
0
        return STRINGLIB(default_rfind)(s, n, p, m, maxcount, mode);
828
0
    }
829
144
}
unicodeobject.c:ucs4lib_fastsearch
Line
Count
Source
776
1.55k
{
777
1.55k
    if (n < m || (mode == FAST_COUNT && maxcount == 0)) {
778
0
        return -1;
779
0
    }
780
781
    /* look for special cases */
782
1.55k
    if (m <= 1) {
783
1.35k
        if (m <= 0) {
784
0
            return -1;
785
0
        }
786
        /* use special case for 1-character strings */
787
1.35k
        if (mode == FAST_SEARCH)
788
0
            return STRINGLIB(find_char)(s, n, p[0]);
789
1.35k
        else if (mode == FAST_RSEARCH)
790
0
            return STRINGLIB(rfind_char)(s, n, p[0]);
791
1.35k
        else {
792
1.35k
            if (maxcount == PY_SSIZE_T_MAX) {
793
1.35k
                return STRINGLIB(count_char_no_maxcount)(s, n, p[0]);
794
1.35k
            }
795
0
            return STRINGLIB(count_char)(s, n, p[0], maxcount);
796
1.35k
        }
797
1.35k
    }
798
799
202
    if (mode != FAST_RSEARCH) {
800
202
        if (n < 2500 || (m < 100 && n < 30000) || m < 6) {
801
202
            return STRINGLIB(default_find)(s, n, p, m, maxcount, mode);
802
202
        }
803
0
        else if ((m >> 2) * 3 < (n >> 2)) {
804
            /* 33% threshold, but don't overflow. */
805
            /* For larger problems where the needle isn't a huge
806
               percentage of the size of the haystack, the relatively
807
               expensive O(m) startup cost of the two-way algorithm
808
               will surely pay off. */
809
0
            if (mode == FAST_SEARCH) {
810
0
                return STRINGLIB(_two_way_find)(s, n, p, m);
811
0
            }
812
0
            else {
813
0
                return STRINGLIB(_two_way_count)(s, n, p, m, maxcount);
814
0
            }
815
0
        }
816
0
        else {
817
            /* To ensure that we have good worst-case behavior,
818
               here's an adaptive version of the algorithm, where if
819
               we match O(m) characters without any matches of the
820
               entire needle, then we predict that the startup cost of
821
               the two-way algorithm will probably be worth it. */
822
0
            return STRINGLIB(adaptive_find)(s, n, p, m, maxcount, mode);
823
0
        }
824
202
    }
825
0
    else {
826
        /* FAST_RSEARCH */
827
0
        return STRINGLIB(default_rfind)(s, n, p, m, maxcount, mode);
828
0
    }
829
202
}
bytes_methods.c:fastsearch
Line
Count
Source
776
4.95k
{
777
4.95k
    if (n < m || (mode == FAST_COUNT && maxcount == 0)) {
778
220
        return -1;
779
220
    }
780
781
    /* look for special cases */
782
4.73k
    if (m <= 1) {
783
4.12k
        if (m <= 0) {
784
0
            return -1;
785
0
        }
786
        /* use special case for 1-character strings */
787
4.12k
        if (mode == FAST_SEARCH)
788
2.17k
            return STRINGLIB(find_char)(s, n, p[0]);
789
1.95k
        else if (mode == FAST_RSEARCH)
790
0
            return STRINGLIB(rfind_char)(s, n, p[0]);
791
1.95k
        else {
792
1.95k
            if (maxcount == PY_SSIZE_T_MAX) {
793
1.95k
                return STRINGLIB(count_char_no_maxcount)(s, n, p[0]);
794
1.95k
            }
795
0
            return STRINGLIB(count_char)(s, n, p[0], maxcount);
796
1.95k
        }
797
4.12k
    }
798
799
614
    if (mode != FAST_RSEARCH) {
800
614
        if (n < 2500 || (m < 100 && n < 30000) || m < 6) {
801
614
            return STRINGLIB(default_find)(s, n, p, m, maxcount, mode);
802
614
        }
803
0
        else if ((m >> 2) * 3 < (n >> 2)) {
804
            /* 33% threshold, but don't overflow. */
805
            /* For larger problems where the needle isn't a huge
806
               percentage of the size of the haystack, the relatively
807
               expensive O(m) startup cost of the two-way algorithm
808
               will surely pay off. */
809
0
            if (mode == FAST_SEARCH) {
810
0
                return STRINGLIB(_two_way_find)(s, n, p, m);
811
0
            }
812
0
            else {
813
0
                return STRINGLIB(_two_way_count)(s, n, p, m, maxcount);
814
0
            }
815
0
        }
816
0
        else {
817
            /* To ensure that we have good worst-case behavior,
818
               here's an adaptive version of the algorithm, where if
819
               we match O(m) characters without any matches of the
820
               entire needle, then we predict that the startup cost of
821
               the two-way algorithm will probably be worth it. */
822
0
            return STRINGLIB(adaptive_find)(s, n, p, m, maxcount, mode);
823
0
        }
824
614
    }
825
0
    else {
826
        /* FAST_RSEARCH */
827
0
        return STRINGLIB(default_rfind)(s, n, p, m, maxcount, mode);
828
0
    }
829
614
}
Unexecuted instantiation: bytearrayobject.c:fastsearch
Unexecuted instantiation: bytesobject.c:fastsearch
830