Coverage Report

Created: 2025-12-14 07:06

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/cpython/Objects/stringlib/fastsearch.h
Line
Count
Source
1
/* stringlib: fastsearch implementation */
2
3
#define STRINGLIB_FASTSEARCH_H
4
5
/* fast search/count implementation, based on a mix between boyer-
6
   moore and horspool, with a few more bells and whistles on the top.
7
   for some more background, see:
8
   https://web.archive.org/web/20201107074620/http://effbot.org/zone/stringlib.htm */
9
10
/* note: fastsearch may access s[n], which isn't a problem when using
11
   Python's ordinary string types, but may cause problems if you're
12
   using this code in other contexts.  also, the count mode returns -1
13
   if there cannot possibly be a match in the target string, and 0 if
14
   it has actually checked for matches, but didn't find any.  callers
15
   beware! */
16
17
/* If the strings are long enough, use Crochemore and Perrin's Two-Way
18
   algorithm, which has worst-case O(n) runtime and best-case O(n/k).
19
   Also compute a table of shifts to achieve O(n/k) in more cases,
20
   and often (data dependent) deduce larger shifts than pure C&P can
21
   deduce. See stringlib_find_two_way_notes.txt in this folder for a
22
   detailed explanation. */
23
24
583M
#define FAST_COUNT 0
25
377M
#define FAST_SEARCH 1
26
92.8M
#define FAST_RSEARCH 2
27
28
#if LONG_BIT >= 128
29
#define STRINGLIB_BLOOM_WIDTH 128
30
#elif LONG_BIT >= 64
31
3.61G
#define STRINGLIB_BLOOM_WIDTH 64
32
#elif LONG_BIT >= 32
33
#define STRINGLIB_BLOOM_WIDTH 32
34
#else
35
#error "LONG_BIT is smaller than 32"
36
#endif
37
38
#define STRINGLIB_BLOOM_ADD(mask, ch) \
39
115M
    ((mask |= (1UL << ((ch) & (STRINGLIB_BLOOM_WIDTH -1)))))
40
#define STRINGLIB_BLOOM(mask, ch)     \
41
3.50G
    ((mask &  (1UL << ((ch) & (STRINGLIB_BLOOM_WIDTH -1)))))
42
43
#ifdef STRINGLIB_FAST_MEMCHR
44
285M
#  define MEMCHR_CUT_OFF 15
45
#else
46
71.2M
#  define MEMCHR_CUT_OFF 40
47
#endif
48
49
Py_LOCAL_INLINE(Py_ssize_t)
50
STRINGLIB(find_char)(const STRINGLIB_CHAR* s, Py_ssize_t n, STRINGLIB_CHAR ch)
51
356M
{
52
356M
    const STRINGLIB_CHAR *p, *e;
53
54
356M
    p = s;
55
356M
    e = s + n;
56
356M
    if (n > MEMCHR_CUT_OFF) {
57
#ifdef STRINGLIB_FAST_MEMCHR
58
93.3M
        p = STRINGLIB_FAST_MEMCHR(s, ch, n);
59
93.3M
        if (p != NULL)
60
91.2M
            return (p - s);
61
2.11M
        return -1;
62
#else
63
        /* use memchr if we can choose a needle without too many likely
64
           false positives */
65
        const STRINGLIB_CHAR *s1, *e1;
66
        unsigned char needle = ch & 0xff;
67
        /* If looking for a multiple of 256, we'd have too
68
           many false positives looking for the '\0' byte in UCS2
69
           and UCS4 representations. */
70
61.2M
        if (needle != 0) {
71
60.7M
            do {
72
60.7M
                void *candidate = memchr(p, needle,
73
60.7M
                                         (e - p) * sizeof(STRINGLIB_CHAR));
74
60.7M
                if (candidate == NULL)
75
580k
                    return -1;
76
60.1M
                s1 = p;
77
60.1M
                p = (const STRINGLIB_CHAR *)
78
60.1M
                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
79
60.1M
                if (*p == ch)
80
60.0M
                    return (p - s);
81
                /* False positive */
82
123k
                p++;
83
123k
                if (p - s1 > MEMCHR_CUT_OFF)
84
58.9k
                    continue;
85
64.6k
                if (e - p <= MEMCHR_CUT_OFF)
86
4.28k
                    break;
87
60.3k
                e1 = p + MEMCHR_CUT_OFF;
88
1.96M
                while (p != e1) {
89
1.92M
                    if (*p == ch)
90
19.4k
                        return (p - s);
91
1.90M
                    p++;
92
1.90M
                }
93
60.3k
            }
94
60.6M
            while (e - p > MEMCHR_CUT_OFF);
95
60.6M
        }
96
#endif
97
154M
    }
98
1.16G
    while (p < e) {
99
984M
        if (*p == ch)
100
20.9M
            return (p - s);
101
963M
        p++;
102
963M
    }
103
181M
    return -1;
104
202M
}
bytesobject.c:stringlib_find_char
Line
Count
Source
51
45.8k
{
52
45.8k
    const STRINGLIB_CHAR *p, *e;
53
54
45.8k
    p = s;
55
45.8k
    e = s + n;
56
45.8k
    if (n > MEMCHR_CUT_OFF) {
57
45.8k
#ifdef STRINGLIB_FAST_MEMCHR
58
45.8k
        p = STRINGLIB_FAST_MEMCHR(s, ch, n);
59
45.8k
        if (p != NULL)
60
45.8k
            return (p - s);
61
42
        return -1;
62
#else
63
        /* use memchr if we can choose a needle without too many likely
64
           false positives */
65
        const STRINGLIB_CHAR *s1, *e1;
66
        unsigned char needle = ch & 0xff;
67
        /* If looking for a multiple of 256, we'd have too
68
           many false positives looking for the '\0' byte in UCS2
69
           and UCS4 representations. */
70
        if (needle != 0) {
71
            do {
72
                void *candidate = memchr(p, needle,
73
                                         (e - p) * sizeof(STRINGLIB_CHAR));
74
                if (candidate == NULL)
75
                    return -1;
76
                s1 = p;
77
                p = (const STRINGLIB_CHAR *)
78
                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
79
                if (*p == ch)
80
                    return (p - s);
81
                /* False positive */
82
                p++;
83
                if (p - s1 > MEMCHR_CUT_OFF)
84
                    continue;
85
                if (e - p <= MEMCHR_CUT_OFF)
86
                    break;
87
                e1 = p + MEMCHR_CUT_OFF;
88
                while (p != e1) {
89
                    if (*p == ch)
90
                        return (p - s);
91
                    p++;
92
                }
93
            }
94
            while (e - p > MEMCHR_CUT_OFF);
95
        }
96
#endif
97
45.8k
    }
98
0
    while (p < e) {
99
0
        if (*p == ch)
100
0
            return (p - s);
101
0
        p++;
102
0
    }
103
0
    return -1;
104
0
}
unicodeobject.c:ucs1lib_find_char
Line
Count
Source
51
204M
{
52
204M
    const STRINGLIB_CHAR *p, *e;
53
54
204M
    p = s;
55
204M
    e = s + n;
56
204M
    if (n > MEMCHR_CUT_OFF) {
57
20.8M
#ifdef STRINGLIB_FAST_MEMCHR
58
20.8M
        p = STRINGLIB_FAST_MEMCHR(s, ch, n);
59
20.8M
        if (p != NULL)
60
19.7M
            return (p - s);
61
1.16M
        return -1;
62
#else
63
        /* use memchr if we can choose a needle without too many likely
64
           false positives */
65
        const STRINGLIB_CHAR *s1, *e1;
66
        unsigned char needle = ch & 0xff;
67
        /* If looking for a multiple of 256, we'd have too
68
           many false positives looking for the '\0' byte in UCS2
69
           and UCS4 representations. */
70
        if (needle != 0) {
71
            do {
72
                void *candidate = memchr(p, needle,
73
                                         (e - p) * sizeof(STRINGLIB_CHAR));
74
                if (candidate == NULL)
75
                    return -1;
76
                s1 = p;
77
                p = (const STRINGLIB_CHAR *)
78
                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
79
                if (*p == ch)
80
                    return (p - s);
81
                /* False positive */
82
                p++;
83
                if (p - s1 > MEMCHR_CUT_OFF)
84
                    continue;
85
                if (e - p <= MEMCHR_CUT_OFF)
86
                    break;
87
                e1 = p + MEMCHR_CUT_OFF;
88
                while (p != e1) {
89
                    if (*p == ch)
90
                        return (p - s);
91
                    p++;
92
                }
93
            }
94
            while (e - p > MEMCHR_CUT_OFF);
95
        }
96
#endif
97
20.8M
    }
98
966M
    while (p < e) {
99
791M
        if (*p == ch)
100
9.12M
            return (p - s);
101
782M
        p++;
102
782M
    }
103
174M
    return -1;
104
183M
}
unicodeobject.c:ucs2lib_find_char
Line
Count
Source
51
70.8M
{
52
70.8M
    const STRINGLIB_CHAR *p, *e;
53
54
70.8M
    p = s;
55
70.8M
    e = s + n;
56
70.8M
    if (n > MEMCHR_CUT_OFF) {
57
#ifdef STRINGLIB_FAST_MEMCHR
58
        p = STRINGLIB_FAST_MEMCHR(s, ch, n);
59
        if (p != NULL)
60
            return (p - s);
61
        return -1;
62
#else
63
        /* use memchr if we can choose a needle without too many likely
64
           false positives */
65
61.2M
        const STRINGLIB_CHAR *s1, *e1;
66
61.2M
        unsigned char needle = ch & 0xff;
67
        /* If looking for a multiple of 256, we'd have too
68
           many false positives looking for the '\0' byte in UCS2
69
           and UCS4 representations. */
70
61.2M
        if (needle != 0) {
71
60.7M
            do {
72
60.7M
                void *candidate = memchr(p, needle,
73
60.7M
                                         (e - p) * sizeof(STRINGLIB_CHAR));
74
60.7M
                if (candidate == NULL)
75
580k
                    return -1;
76
60.1M
                s1 = p;
77
60.1M
                p = (const STRINGLIB_CHAR *)
78
60.1M
                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
79
60.1M
                if (*p == ch)
80
60.0M
                    return (p - s);
81
                /* False positive */
82
123k
                p++;
83
123k
                if (p - s1 > MEMCHR_CUT_OFF)
84
58.9k
                    continue;
85
64.6k
                if (e - p <= MEMCHR_CUT_OFF)
86
4.28k
                    break;
87
60.3k
                e1 = p + MEMCHR_CUT_OFF;
88
1.96M
                while (p != e1) {
89
1.92M
                    if (*p == ch)
90
19.4k
                        return (p - s);
91
1.90M
                    p++;
92
1.90M
                }
93
60.3k
            }
94
60.6M
            while (e - p > MEMCHR_CUT_OFF);
95
60.6M
        }
96
61.2M
#endif
97
61.2M
    }
98
155M
    while (p < e) {
99
151M
        if (*p == ch)
100
6.86M
            return (p - s);
101
145M
        p++;
102
145M
    }
103
3.36M
    return -1;
104
10.2M
}
unicodeobject.c:ucs4lib_find_char
Line
Count
Source
51
53.2M
{
52
53.2M
    const STRINGLIB_CHAR *p, *e;
53
54
53.2M
    p = s;
55
53.2M
    e = s + n;
56
53.2M
    if (n > MEMCHR_CUT_OFF) {
57
53.1M
#ifdef STRINGLIB_FAST_MEMCHR
58
53.1M
        p = STRINGLIB_FAST_MEMCHR(s, ch, n);
59
53.1M
        if (p != NULL)
60
53.0M
            return (p - s);
61
32.1k
        return -1;
62
#else
63
        /* use memchr if we can choose a needle without too many likely
64
           false positives */
65
        const STRINGLIB_CHAR *s1, *e1;
66
        unsigned char needle = ch & 0xff;
67
        /* If looking for a multiple of 256, we'd have too
68
           many false positives looking for the '\0' byte in UCS2
69
           and UCS4 representations. */
70
        if (needle != 0) {
71
            do {
72
                void *candidate = memchr(p, needle,
73
                                         (e - p) * sizeof(STRINGLIB_CHAR));
74
                if (candidate == NULL)
75
                    return -1;
76
                s1 = p;
77
                p = (const STRINGLIB_CHAR *)
78
                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
79
                if (*p == ch)
80
                    return (p - s);
81
                /* False positive */
82
                p++;
83
                if (p - s1 > MEMCHR_CUT_OFF)
84
                    continue;
85
                if (e - p <= MEMCHR_CUT_OFF)
86
                    break;
87
                e1 = p + MEMCHR_CUT_OFF;
88
                while (p != e1) {
89
                    if (*p == ch)
90
                        return (p - s);
91
                    p++;
92
                }
93
            }
94
            while (e - p > MEMCHR_CUT_OFF);
95
        }
96
#endif
97
53.1M
    }
98
285k
    while (p < e) {
99
254k
        if (*p == ch)
100
38.8k
            return (p - s);
101
215k
        p++;
102
215k
    }
103
31.1k
    return -1;
104
70.0k
}
unicodeobject.c:asciilib_find_char
Line
Count
Source
51
22.5M
{
52
22.5M
    const STRINGLIB_CHAR *p, *e;
53
54
22.5M
    p = s;
55
22.5M
    e = s + n;
56
22.5M
    if (n > MEMCHR_CUT_OFF) {
57
18.1M
#ifdef STRINGLIB_FAST_MEMCHR
58
18.1M
        p = STRINGLIB_FAST_MEMCHR(s, ch, n);
59
18.1M
        if (p != NULL)
60
17.4M
            return (p - s);
61
701k
        return -1;
62
#else
63
        /* use memchr if we can choose a needle without too many likely
64
           false positives */
65
        const STRINGLIB_CHAR *s1, *e1;
66
        unsigned char needle = ch & 0xff;
67
        /* If looking for a multiple of 256, we'd have too
68
           many false positives looking for the '\0' byte in UCS2
69
           and UCS4 representations. */
70
        if (needle != 0) {
71
            do {
72
                void *candidate = memchr(p, needle,
73
                                         (e - p) * sizeof(STRINGLIB_CHAR));
74
                if (candidate == NULL)
75
                    return -1;
76
                s1 = p;
77
                p = (const STRINGLIB_CHAR *)
78
                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
79
                if (*p == ch)
80
                    return (p - s);
81
                /* False positive */
82
                p++;
83
                if (p - s1 > MEMCHR_CUT_OFF)
84
                    continue;
85
                if (e - p <= MEMCHR_CUT_OFF)
86
                    break;
87
                e1 = p + MEMCHR_CUT_OFF;
88
                while (p != e1) {
89
                    if (*p == ch)
90
                        return (p - s);
91
                    p++;
92
                }
93
            }
94
            while (e - p > MEMCHR_CUT_OFF);
95
        }
96
#endif
97
18.1M
    }
98
12.0M
    while (p < e) {
99
12.0M
        if (*p == ch)
100
4.35M
            return (p - s);
101
7.68M
        p++;
102
7.68M
    }
103
58.1k
    return -1;
104
4.41M
}
bytes_methods.c:stringlib_find_char
Line
Count
Source
51
4.84M
{
52
4.84M
    const STRINGLIB_CHAR *p, *e;
53
54
4.84M
    p = s;
55
4.84M
    e = s + n;
56
4.84M
    if (n > MEMCHR_CUT_OFF) {
57
1.18M
#ifdef STRINGLIB_FAST_MEMCHR
58
1.18M
        p = STRINGLIB_FAST_MEMCHR(s, ch, n);
59
1.18M
        if (p != NULL)
60
968k
            return (p - s);
61
212k
        return -1;
62
#else
63
        /* use memchr if we can choose a needle without too many likely
64
           false positives */
65
        const STRINGLIB_CHAR *s1, *e1;
66
        unsigned char needle = ch & 0xff;
67
        /* If looking for a multiple of 256, we'd have too
68
           many false positives looking for the '\0' byte in UCS2
69
           and UCS4 representations. */
70
        if (needle != 0) {
71
            do {
72
                void *candidate = memchr(p, needle,
73
                                         (e - p) * sizeof(STRINGLIB_CHAR));
74
                if (candidate == NULL)
75
                    return -1;
76
                s1 = p;
77
                p = (const STRINGLIB_CHAR *)
78
                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
79
                if (*p == ch)
80
                    return (p - s);
81
                /* False positive */
82
                p++;
83
                if (p - s1 > MEMCHR_CUT_OFF)
84
                    continue;
85
                if (e - p <= MEMCHR_CUT_OFF)
86
                    break;
87
                e1 = p + MEMCHR_CUT_OFF;
88
                while (p != e1) {
89
                    if (*p == ch)
90
                        return (p - s);
91
                    p++;
92
                }
93
            }
94
            while (e - p > MEMCHR_CUT_OFF);
95
        }
96
#endif
97
1.18M
    }
98
31.5M
    while (p < e) {
99
28.4M
        if (*p == ch)
100
577k
            return (p - s);
101
27.8M
        p++;
102
27.8M
    }
103
3.08M
    return -1;
104
3.66M
}
Unexecuted instantiation: bytearrayobject.c:stringlib_find_char
105
106
#undef MEMCHR_CUT_OFF
107
108
#if STRINGLIB_SIZEOF_CHAR == 1
109
125k
#  define MEMRCHR_CUT_OFF 15
110
#else
111
358k
#  define MEMRCHR_CUT_OFF 40
112
#endif
113
114
115
Py_LOCAL_INLINE(Py_ssize_t)
116
STRINGLIB(rfind_char)(const STRINGLIB_CHAR* s, Py_ssize_t n, STRINGLIB_CHAR ch)
117
456k
{
118
456k
    const STRINGLIB_CHAR *p;
119
456k
#ifdef HAVE_MEMRCHR
120
    /* memrchr() is a GNU extension, available since glibc 2.1.91.  it
121
       doesn't seem as optimized as memchr(), but is still quite
122
       faster than our hand-written loop below. There is no wmemrchr
123
       for 4-byte chars. */
124
125
456k
    if (n > MEMRCHR_CUT_OFF) {
126
#if STRINGLIB_SIZEOF_CHAR == 1
127
        p = memrchr(s, ch, n);
128
64.6k
        if (p != NULL)
129
61.2k
            return (p - s);
130
3.38k
        return -1;
131
#else
132
        /* use memrchr if we can choose a needle without too many likely
133
           false positives */
134
        const STRINGLIB_CHAR *s1;
135
        Py_ssize_t n1;
136
        unsigned char needle = ch & 0xff;
137
        /* If looking for a multiple of 256, we'd have too
138
           many false positives looking for the '\0' byte in UCS2
139
           and UCS4 representations. */
140
282k
        if (needle != 0) {
141
289k
            do {
142
289k
                void *candidate = memrchr(s, needle,
143
289k
                                          n * sizeof(STRINGLIB_CHAR));
144
289k
                if (candidate == NULL)
145
1.03k
                    return -1;
146
288k
                n1 = n;
147
288k
                p = (const STRINGLIB_CHAR *)
148
288k
                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
149
288k
                n = p - s;
150
288k
                if (*p == ch)
151
278k
                    return n;
152
                /* False positive */
153
9.81k
                if (n1 - n > MEMRCHR_CUT_OFF)
154
4.78k
                    continue;
155
5.03k
                if (n <= MEMRCHR_CUT_OFF)
156
939
                    break;
157
4.09k
                s1 = p - MEMRCHR_CUT_OFF;
158
152k
                while (p > s1) {
159
148k
                    p--;
160
148k
                    if (*p == ch)
161
623
                        return (p - s);
162
148k
                }
163
3.46k
                n = p - s;
164
3.46k
            }
165
282k
            while (n > MEMRCHR_CUT_OFF);
166
282k
        }
167
#endif
168
346k
    }
169
111k
#endif  /* HAVE_MEMRCHR */
170
111k
    p = s + n;
171
727k
    while (p > s) {
172
680k
        p--;
173
680k
        if (*p == ch)
174
65.1k
            return (p - s);
175
680k
    }
176
46.5k
    return -1;
177
111k
}
Unexecuted instantiation: bytesobject.c:stringlib_rfind_char
unicodeobject.c:ucs1lib_rfind_char
Line
Count
Source
117
55.3k
{
118
55.3k
    const STRINGLIB_CHAR *p;
119
55.3k
#ifdef HAVE_MEMRCHR
120
    /* memrchr() is a GNU extension, available since glibc 2.1.91.  it
121
       doesn't seem as optimized as memchr(), but is still quite
122
       faster than our hand-written loop below. There is no wmemrchr
123
       for 4-byte chars. */
124
125
55.3k
    if (n > MEMRCHR_CUT_OFF) {
126
51.4k
#if STRINGLIB_SIZEOF_CHAR == 1
127
51.4k
        p = memrchr(s, ch, n);
128
51.4k
        if (p != NULL)
129
50.3k
            return (p - s);
130
1.03k
        return -1;
131
#else
132
        /* use memrchr if we can choose a needle without too many likely
133
           false positives */
134
        const STRINGLIB_CHAR *s1;
135
        Py_ssize_t n1;
136
        unsigned char needle = ch & 0xff;
137
        /* If looking for a multiple of 256, we'd have too
138
           many false positives looking for the '\0' byte in UCS2
139
           and UCS4 representations. */
140
        if (needle != 0) {
141
            do {
142
                void *candidate = memrchr(s, needle,
143
                                          n * sizeof(STRINGLIB_CHAR));
144
                if (candidate == NULL)
145
                    return -1;
146
                n1 = n;
147
                p = (const STRINGLIB_CHAR *)
148
                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
149
                n = p - s;
150
                if (*p == ch)
151
                    return n;
152
                /* False positive */
153
                if (n1 - n > MEMRCHR_CUT_OFF)
154
                    continue;
155
                if (n <= MEMRCHR_CUT_OFF)
156
                    break;
157
                s1 = p - MEMRCHR_CUT_OFF;
158
                while (p > s1) {
159
                    p--;
160
                    if (*p == ch)
161
                        return (p - s);
162
                }
163
                n = p - s;
164
            }
165
            while (n > MEMRCHR_CUT_OFF);
166
        }
167
#endif
168
51.4k
    }
169
3.93k
#endif  /* HAVE_MEMRCHR */
170
3.93k
    p = s + n;
171
11.7k
    while (p > s) {
172
10.8k
        p--;
173
10.8k
        if (*p == ch)
174
3.02k
            return (p - s);
175
10.8k
    }
176
907
    return -1;
177
3.93k
}
unicodeobject.c:ucs2lib_rfind_char
Line
Count
Source
117
225k
{
118
225k
    const STRINGLIB_CHAR *p;
119
225k
#ifdef HAVE_MEMRCHR
120
    /* memrchr() is a GNU extension, available since glibc 2.1.91.  it
121
       doesn't seem as optimized as memchr(), but is still quite
122
       faster than our hand-written loop below. There is no wmemrchr
123
       for 4-byte chars. */
124
125
225k
    if (n > MEMRCHR_CUT_OFF) {
126
#if STRINGLIB_SIZEOF_CHAR == 1
127
        p = memrchr(s, ch, n);
128
        if (p != NULL)
129
            return (p - s);
130
        return -1;
131
#else
132
        /* use memrchr if we can choose a needle without too many likely
133
           false positives */
134
216k
        const STRINGLIB_CHAR *s1;
135
216k
        Py_ssize_t n1;
136
216k
        unsigned char needle = ch & 0xff;
137
        /* If looking for a multiple of 256, we'd have too
138
           many false positives looking for the '\0' byte in UCS2
139
           and UCS4 representations. */
140
216k
        if (needle != 0) {
141
218k
            do {
142
218k
                void *candidate = memrchr(s, needle,
143
218k
                                          n * sizeof(STRINGLIB_CHAR));
144
218k
                if (candidate == NULL)
145
602
                    return -1;
146
217k
                n1 = n;
147
217k
                p = (const STRINGLIB_CHAR *)
148
217k
                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
149
217k
                n = p - s;
150
217k
                if (*p == ch)
151
214k
                    return n;
152
                /* False positive */
153
3.08k
                if (n1 - n > MEMRCHR_CUT_OFF)
154
1.31k
                    continue;
155
1.77k
                if (n <= MEMRCHR_CUT_OFF)
156
432
                    break;
157
1.34k
                s1 = p - MEMRCHR_CUT_OFF;
158
48.0k
                while (p > s1) {
159
46.9k
                    p--;
160
46.9k
                    if (*p == ch)
161
268
                        return (p - s);
162
46.9k
                }
163
1.07k
                n = p - s;
164
1.07k
            }
165
216k
            while (n > MEMRCHR_CUT_OFF);
166
216k
        }
167
216k
#endif
168
216k
    }
169
10.1k
#endif  /* HAVE_MEMRCHR */
170
10.1k
    p = s + n;
171
57.7k
    while (p > s) {
172
55.8k
        p--;
173
55.8k
        if (*p == ch)
174
8.32k
            return (p - s);
175
55.8k
    }
176
1.86k
    return -1;
177
10.1k
}
unicodeobject.c:ucs4lib_rfind_char
Line
Count
Source
117
106k
{
118
106k
    const STRINGLIB_CHAR *p;
119
106k
#ifdef HAVE_MEMRCHR
120
    /* memrchr() is a GNU extension, available since glibc 2.1.91.  it
121
       doesn't seem as optimized as memchr(), but is still quite
122
       faster than our hand-written loop below. There is no wmemrchr
123
       for 4-byte chars. */
124
125
106k
    if (n > MEMRCHR_CUT_OFF) {
126
#if STRINGLIB_SIZEOF_CHAR == 1
127
        p = memrchr(s, ch, n);
128
        if (p != NULL)
129
            return (p - s);
130
        return -1;
131
#else
132
        /* use memrchr if we can choose a needle without too many likely
133
           false positives */
134
66.2k
        const STRINGLIB_CHAR *s1;
135
66.2k
        Py_ssize_t n1;
136
66.2k
        unsigned char needle = ch & 0xff;
137
        /* If looking for a multiple of 256, we'd have too
138
           many false positives looking for the '\0' byte in UCS2
139
           and UCS4 representations. */
140
66.2k
        if (needle != 0) {
141
71.5k
            do {
142
71.5k
                void *candidate = memrchr(s, needle,
143
71.5k
                                          n * sizeof(STRINGLIB_CHAR));
144
71.5k
                if (candidate == NULL)
145
437
                    return -1;
146
71.1k
                n1 = n;
147
71.1k
                p = (const STRINGLIB_CHAR *)
148
71.1k
                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
149
71.1k
                n = p - s;
150
71.1k
                if (*p == ch)
151
64.4k
                    return n;
152
                /* False positive */
153
6.72k
                if (n1 - n > MEMRCHR_CUT_OFF)
154
3.47k
                    continue;
155
3.25k
                if (n <= MEMRCHR_CUT_OFF)
156
507
                    break;
157
2.74k
                s1 = p - MEMRCHR_CUT_OFF;
158
103k
                while (p > s1) {
159
101k
                    p--;
160
101k
                    if (*p == ch)
161
355
                        return (p - s);
162
101k
                }
163
2.39k
                n = p - s;
164
2.39k
            }
165
66.2k
            while (n > MEMRCHR_CUT_OFF);
166
66.2k
        }
167
66.2k
#endif
168
66.2k
    }
169
41.1k
#endif  /* HAVE_MEMRCHR */
170
41.1k
    p = s + n;
171
355k
    while (p > s) {
172
354k
        p--;
173
354k
        if (*p == ch)
174
39.5k
            return (p - s);
175
354k
    }
176
1.57k
    return -1;
177
41.1k
}
unicodeobject.c:asciilib_rfind_char
Line
Count
Source
117
49.2k
{
118
49.2k
    const STRINGLIB_CHAR *p;
119
49.2k
#ifdef HAVE_MEMRCHR
120
    /* memrchr() is a GNU extension, available since glibc 2.1.91.  it
121
       doesn't seem as optimized as memchr(), but is still quite
122
       faster than our hand-written loop below. There is no wmemrchr
123
       for 4-byte chars. */
124
125
49.2k
    if (n > MEMRCHR_CUT_OFF) {
126
10.2k
#if STRINGLIB_SIZEOF_CHAR == 1
127
10.2k
        p = memrchr(s, ch, n);
128
10.2k
        if (p != NULL)
129
10.1k
            return (p - s);
130
146
        return -1;
131
#else
132
        /* use memrchr if we can choose a needle without too many likely
133
           false positives */
134
        const STRINGLIB_CHAR *s1;
135
        Py_ssize_t n1;
136
        unsigned char needle = ch & 0xff;
137
        /* If looking for a multiple of 256, we'd have too
138
           many false positives looking for the '\0' byte in UCS2
139
           and UCS4 representations. */
140
        if (needle != 0) {
141
            do {
142
                void *candidate = memrchr(s, needle,
143
                                          n * sizeof(STRINGLIB_CHAR));
144
                if (candidate == NULL)
145
                    return -1;
146
                n1 = n;
147
                p = (const STRINGLIB_CHAR *)
148
                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
149
                n = p - s;
150
                if (*p == ch)
151
                    return n;
152
                /* False positive */
153
                if (n1 - n > MEMRCHR_CUT_OFF)
154
                    continue;
155
                if (n <= MEMRCHR_CUT_OFF)
156
                    break;
157
                s1 = p - MEMRCHR_CUT_OFF;
158
                while (p > s1) {
159
                    p--;
160
                    if (*p == ch)
161
                        return (p - s);
162
                }
163
                n = p - s;
164
            }
165
            while (n > MEMRCHR_CUT_OFF);
166
        }
167
#endif
168
10.2k
    }
169
38.9k
#endif  /* HAVE_MEMRCHR */
170
38.9k
    p = s + n;
171
213k
    while (p > s) {
172
184k
        p--;
173
184k
        if (*p == ch)
174
9.50k
            return (p - s);
175
184k
    }
176
29.4k
    return -1;
177
38.9k
}
bytes_methods.c:stringlib_rfind_char
Line
Count
Source
117
20.5k
{
118
20.5k
    const STRINGLIB_CHAR *p;
119
20.5k
#ifdef HAVE_MEMRCHR
120
    /* memrchr() is a GNU extension, available since glibc 2.1.91.  it
121
       doesn't seem as optimized as memchr(), but is still quite
122
       faster than our hand-written loop below. There is no wmemrchr
123
       for 4-byte chars. */
124
125
20.5k
    if (n > MEMRCHR_CUT_OFF) {
126
2.98k
#if STRINGLIB_SIZEOF_CHAR == 1
127
2.98k
        p = memrchr(s, ch, n);
128
2.98k
        if (p != NULL)
129
781
            return (p - s);
130
2.20k
        return -1;
131
#else
132
        /* use memrchr if we can choose a needle without too many likely
133
           false positives */
134
        const STRINGLIB_CHAR *s1;
135
        Py_ssize_t n1;
136
        unsigned char needle = ch & 0xff;
137
        /* If looking for a multiple of 256, we'd have too
138
           many false positives looking for the '\0' byte in UCS2
139
           and UCS4 representations. */
140
        if (needle != 0) {
141
            do {
142
                void *candidate = memrchr(s, needle,
143
                                          n * sizeof(STRINGLIB_CHAR));
144
                if (candidate == NULL)
145
                    return -1;
146
                n1 = n;
147
                p = (const STRINGLIB_CHAR *)
148
                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
149
                n = p - s;
150
                if (*p == ch)
151
                    return n;
152
                /* False positive */
153
                if (n1 - n > MEMRCHR_CUT_OFF)
154
                    continue;
155
                if (n <= MEMRCHR_CUT_OFF)
156
                    break;
157
                s1 = p - MEMRCHR_CUT_OFF;
158
                while (p > s1) {
159
                    p--;
160
                    if (*p == ch)
161
                        return (p - s);
162
                }
163
                n = p - s;
164
            }
165
            while (n > MEMRCHR_CUT_OFF);
166
        }
167
#endif
168
2.98k
    }
169
17.5k
#endif  /* HAVE_MEMRCHR */
170
17.5k
    p = s + n;
171
88.4k
    while (p > s) {
172
75.6k
        p--;
173
75.6k
        if (*p == ch)
174
4.80k
            return (p - s);
175
75.6k
    }
176
12.7k
    return -1;
177
17.5k
}
Unexecuted instantiation: bytearrayobject.c:stringlib_rfind_char
178
179
#undef MEMRCHR_CUT_OFF
180
181
/* Change to a 1 to see logging comments walk through the algorithm. */
182
#if 0 && STRINGLIB_SIZEOF_CHAR == 1
183
# define LOG(...) printf(__VA_ARGS__)
184
# define LOG_STRING(s, n) printf("\"%.*s\"", (int)(n), s)
185
# define LOG_LINEUP() do {                                         \
186
    LOG("> "); LOG_STRING(haystack, len_haystack); LOG("\n> ");    \
187
    LOG("%*s",(int)(window_last - haystack + 1 - len_needle), ""); \
188
    LOG_STRING(needle, len_needle); LOG("\n");                     \
189
} while(0)
190
#else
191
# define LOG(...)
192
# define LOG_STRING(s, n)
193
# define LOG_LINEUP()
194
#endif
195
196
Py_LOCAL_INLINE(Py_ssize_t)
197
STRINGLIB(_lex_search)(const STRINGLIB_CHAR *needle, Py_ssize_t len_needle,
198
                       Py_ssize_t *return_period, int invert_alphabet)
199
40
{
200
    /* Do a lexicographic search. Essentially this:
201
           >>> max(needle[i:] for i in range(len(needle)+1))
202
       Also find the period of the right half.   */
203
40
    Py_ssize_t max_suffix = 0;
204
40
    Py_ssize_t candidate = 1;
205
40
    Py_ssize_t k = 0;
206
    // The period of the right half.
207
40
    Py_ssize_t period = 1;
208
209
400
    while (candidate + k < len_needle) {
210
        // each loop increases candidate + k + max_suffix
211
360
        STRINGLIB_CHAR a = needle[candidate + k];
212
360
        STRINGLIB_CHAR b = needle[max_suffix + k];
213
        // check if the suffix at candidate is better than max_suffix
214
360
        if (invert_alphabet ? (b < a) : (a < b)) {
215
            // Fell short of max_suffix.
216
            // The next k + 1 characters are non-increasing
217
            // from candidate, so they won't start a maximal suffix.
218
260
            candidate += k + 1;
219
260
            k = 0;
220
            // We've ruled out any period smaller than what's
221
            // been scanned since max_suffix.
222
260
            period = candidate - max_suffix;
223
260
        }
224
100
        else if (a == b) {
225
20
            if (k + 1 != period) {
226
                // Keep scanning the equal strings
227
0
                k++;
228
0
            }
229
20
            else {
230
                // Matched a whole period.
231
                // Start matching the next period.
232
20
                candidate += period;
233
20
                k = 0;
234
20
            }
235
20
        }
236
80
        else {
237
            // Did better than max_suffix, so replace it.
238
80
            max_suffix = candidate;
239
80
            candidate++;
240
80
            k = 0;
241
80
            period = 1;
242
80
        }
243
360
    }
244
40
    *return_period = period;
245
40
    return max_suffix;
246
40
}
Unexecuted instantiation: bytesobject.c:stringlib__lex_search
Unexecuted instantiation: unicodeobject.c:asciilib__lex_search
unicodeobject.c:ucs1lib__lex_search
Line
Count
Source
199
40
{
200
    /* Do a lexicographic search. Essentially this:
201
           >>> max(needle[i:] for i in range(len(needle)+1))
202
       Also find the period of the right half.   */
203
40
    Py_ssize_t max_suffix = 0;
204
40
    Py_ssize_t candidate = 1;
205
40
    Py_ssize_t k = 0;
206
    // The period of the right half.
207
40
    Py_ssize_t period = 1;
208
209
400
    while (candidate + k < len_needle) {
210
        // each loop increases candidate + k + max_suffix
211
360
        STRINGLIB_CHAR a = needle[candidate + k];
212
360
        STRINGLIB_CHAR b = needle[max_suffix + k];
213
        // check if the suffix at candidate is better than max_suffix
214
360
        if (invert_alphabet ? (b < a) : (a < b)) {
215
            // Fell short of max_suffix.
216
            // The next k + 1 characters are non-increasing
217
            // from candidate, so they won't start a maximal suffix.
218
260
            candidate += k + 1;
219
260
            k = 0;
220
            // We've ruled out any period smaller than what's
221
            // been scanned since max_suffix.
222
260
            period = candidate - max_suffix;
223
260
        }
224
100
        else if (a == b) {
225
20
            if (k + 1 != period) {
226
                // Keep scanning the equal strings
227
0
                k++;
228
0
            }
229
20
            else {
230
                // Matched a whole period.
231
                // Start matching the next period.
232
20
                candidate += period;
233
20
                k = 0;
234
20
            }
235
20
        }
236
80
        else {
237
            // Did better than max_suffix, so replace it.
238
80
            max_suffix = candidate;
239
80
            candidate++;
240
80
            k = 0;
241
80
            period = 1;
242
80
        }
243
360
    }
244
40
    *return_period = period;
245
40
    return max_suffix;
246
40
}
Unexecuted instantiation: unicodeobject.c:ucs2lib__lex_search
Unexecuted instantiation: unicodeobject.c:ucs4lib__lex_search
Unexecuted instantiation: bytes_methods.c:stringlib__lex_search
Unexecuted instantiation: bytearrayobject.c:stringlib__lex_search
247
248
Py_LOCAL_INLINE(Py_ssize_t)
249
STRINGLIB(_factorize)(const STRINGLIB_CHAR *needle,
250
                      Py_ssize_t len_needle,
251
                      Py_ssize_t *return_period)
252
20
{
253
    /* Do a "critical factorization", making it so that:
254
       >>> needle = (left := needle[:cut]) + (right := needle[cut:])
255
       where the "local period" of the cut is maximal.
256
257
       The local period of the cut is the minimal length of a string w
258
       such that (left endswith w or w endswith left)
259
       and (right startswith w or w startswith right).
260
261
       The Critical Factorization Theorem says that this maximal local
262
       period is the global period of the string.
263
264
       Crochemore and Perrin (1991) show that this cut can be computed
265
       as the later of two cuts: one that gives a lexicographically
266
       maximal right half, and one that gives the same with the
267
       with respect to a reversed alphabet-ordering.
268
269
       This is what we want to happen:
270
           >>> x = "GCAGAGAG"
271
           >>> cut, period = factorize(x)
272
           >>> x[:cut], (right := x[cut:])
273
           ('GC', 'AGAGAG')
274
           >>> period  # right half period
275
           2
276
           >>> right[period:] == right[:-period]
277
           True
278
279
       This is how the local period lines up in the above example:
280
                GC | AGAGAG
281
           AGAGAGC = AGAGAGC
282
       The length of this minimal repetition is 7, which is indeed the
283
       period of the original string. */
284
285
20
    Py_ssize_t cut1, period1, cut2, period2, cut, period;
286
20
    cut1 = STRINGLIB(_lex_search)(needle, len_needle, &period1, 0);
287
20
    cut2 = STRINGLIB(_lex_search)(needle, len_needle, &period2, 1);
288
289
    // Take the later cut.
290
20
    if (cut1 > cut2) {
291
20
        period = period1;
292
20
        cut = cut1;
293
20
    }
294
0
    else {
295
0
        period = period2;
296
0
        cut = cut2;
297
0
    }
298
299
20
    LOG("split: "); LOG_STRING(needle, cut);
300
20
    LOG(" + "); LOG_STRING(needle + cut, len_needle - cut);
301
20
    LOG("\n");
302
303
20
    *return_period = period;
304
20
    return cut;
305
20
}
Unexecuted instantiation: bytesobject.c:stringlib__factorize
Unexecuted instantiation: unicodeobject.c:asciilib__factorize
unicodeobject.c:ucs1lib__factorize
Line
Count
Source
252
20
{
253
    /* Do a "critical factorization", making it so that:
254
       >>> needle = (left := needle[:cut]) + (right := needle[cut:])
255
       where the "local period" of the cut is maximal.
256
257
       The local period of the cut is the minimal length of a string w
258
       such that (left endswith w or w endswith left)
259
       and (right startswith w or w startswith right).
260
261
       The Critical Factorization Theorem says that this maximal local
262
       period is the global period of the string.
263
264
       Crochemore and Perrin (1991) show that this cut can be computed
265
       as the later of two cuts: one that gives a lexicographically
266
       maximal right half, and one that gives the same with the
267
       with respect to a reversed alphabet-ordering.
268
269
       This is what we want to happen:
270
           >>> x = "GCAGAGAG"
271
           >>> cut, period = factorize(x)
272
           >>> x[:cut], (right := x[cut:])
273
           ('GC', 'AGAGAG')
274
           >>> period  # right half period
275
           2
276
           >>> right[period:] == right[:-period]
277
           True
278
279
       This is how the local period lines up in the above example:
280
                GC | AGAGAG
281
           AGAGAGC = AGAGAGC
282
       The length of this minimal repetition is 7, which is indeed the
283
       period of the original string. */
284
285
20
    Py_ssize_t cut1, period1, cut2, period2, cut, period;
286
20
    cut1 = STRINGLIB(_lex_search)(needle, len_needle, &period1, 0);
287
20
    cut2 = STRINGLIB(_lex_search)(needle, len_needle, &period2, 1);
288
289
    // Take the later cut.
290
20
    if (cut1 > cut2) {
291
20
        period = period1;
292
20
        cut = cut1;
293
20
    }
294
0
    else {
295
0
        period = period2;
296
0
        cut = cut2;
297
0
    }
298
299
20
    LOG("split: "); LOG_STRING(needle, cut);
300
20
    LOG(" + "); LOG_STRING(needle + cut, len_needle - cut);
301
20
    LOG("\n");
302
303
20
    *return_period = period;
304
20
    return cut;
305
20
}
Unexecuted instantiation: unicodeobject.c:ucs2lib__factorize
Unexecuted instantiation: unicodeobject.c:ucs4lib__factorize
Unexecuted instantiation: bytes_methods.c:stringlib__factorize
Unexecuted instantiation: bytearrayobject.c:stringlib__factorize
306
307
308
220
#define SHIFT_TYPE uint8_t
309
#define MAX_SHIFT UINT8_MAX
310
311
69.9k
#define TABLE_SIZE_BITS 6u
312
69.9k
#define TABLE_SIZE (1U << TABLE_SIZE_BITS)
313
68.6k
#define TABLE_MASK (TABLE_SIZE - 1U)
314
315
typedef struct STRINGLIB(_pre) {
316
    const STRINGLIB_CHAR *needle;
317
    Py_ssize_t len_needle;
318
    Py_ssize_t cut;
319
    Py_ssize_t period;
320
    Py_ssize_t gap;
321
    int is_periodic;
322
    SHIFT_TYPE table[TABLE_SIZE];
323
} STRINGLIB(prework);
324
325
326
static void
327
STRINGLIB(_preprocess)(const STRINGLIB_CHAR *needle, Py_ssize_t len_needle,
328
                       STRINGLIB(prework) *p)
329
20
{
330
20
    p->needle = needle;
331
20
    p->len_needle = len_needle;
332
20
    p->cut = STRINGLIB(_factorize)(needle, len_needle, &(p->period));
333
20
    assert(p->period + p->cut <= len_needle);
334
20
    p->is_periodic = (0 == memcmp(needle,
335
20
                                  needle + p->period,
336
20
                                  p->cut * STRINGLIB_SIZEOF_CHAR));
337
20
    if (p->is_periodic) {
338
0
        assert(p->cut <= len_needle/2);
339
0
        assert(p->cut < p->period);
340
0
    }
341
20
    else {
342
        // A lower bound on the period
343
20
        p->period = Py_MAX(p->cut, len_needle - p->cut) + 1;
344
20
    }
345
    // The gap between the last character and the previous
346
    // occurrence of an equivalent character (modulo TABLE_SIZE)
347
20
    p->gap = len_needle;
348
20
    STRINGLIB_CHAR last = needle[len_needle - 1] & TABLE_MASK;
349
140
    for (Py_ssize_t i = len_needle - 2; i >= 0; i--) {
350
140
        STRINGLIB_CHAR x = needle[i] & TABLE_MASK;
351
140
        if (x == last) {
352
20
            p->gap = len_needle - 1 - i;
353
20
            break;
354
20
        }
355
140
    }
356
    // Fill up a compressed Boyer-Moore "Bad Character" table
357
20
    Py_ssize_t not_found_shift = Py_MIN(len_needle, MAX_SHIFT);
358
1.30k
    for (Py_ssize_t i = 0; i < (Py_ssize_t)TABLE_SIZE; i++) {
359
1.28k
        p->table[i] = Py_SAFE_DOWNCAST(not_found_shift,
360
1.28k
                                       Py_ssize_t, SHIFT_TYPE);
361
1.28k
    }
362
220
    for (Py_ssize_t i = len_needle - not_found_shift; i < len_needle; i++) {
363
200
        SHIFT_TYPE shift = Py_SAFE_DOWNCAST(len_needle - 1 - i,
364
200
                                            Py_ssize_t, SHIFT_TYPE);
365
200
        p->table[needle[i] & TABLE_MASK] = shift;
366
200
    }
367
20
}
Unexecuted instantiation: bytesobject.c:stringlib__preprocess
Unexecuted instantiation: unicodeobject.c:asciilib__preprocess
unicodeobject.c:ucs1lib__preprocess
Line
Count
Source
329
20
{
330
20
    p->needle = needle;
331
20
    p->len_needle = len_needle;
332
20
    p->cut = STRINGLIB(_factorize)(needle, len_needle, &(p->period));
333
20
    assert(p->period + p->cut <= len_needle);
334
20
    p->is_periodic = (0 == memcmp(needle,
335
20
                                  needle + p->period,
336
20
                                  p->cut * STRINGLIB_SIZEOF_CHAR));
337
20
    if (p->is_periodic) {
338
0
        assert(p->cut <= len_needle/2);
339
0
        assert(p->cut < p->period);
340
0
    }
341
20
    else {
342
        // A lower bound on the period
343
20
        p->period = Py_MAX(p->cut, len_needle - p->cut) + 1;
344
20
    }
345
    // The gap between the last character and the previous
346
    // occurrence of an equivalent character (modulo TABLE_SIZE)
347
20
    p->gap = len_needle;
348
20
    STRINGLIB_CHAR last = needle[len_needle - 1] & TABLE_MASK;
349
140
    for (Py_ssize_t i = len_needle - 2; i >= 0; i--) {
350
140
        STRINGLIB_CHAR x = needle[i] & TABLE_MASK;
351
140
        if (x == last) {
352
20
            p->gap = len_needle - 1 - i;
353
20
            break;
354
20
        }
355
140
    }
356
    // Fill up a compressed Boyer-Moore "Bad Character" table
357
20
    Py_ssize_t not_found_shift = Py_MIN(len_needle, MAX_SHIFT);
358
1.30k
    for (Py_ssize_t i = 0; i < (Py_ssize_t)TABLE_SIZE; i++) {
359
1.28k
        p->table[i] = Py_SAFE_DOWNCAST(not_found_shift,
360
1.28k
                                       Py_ssize_t, SHIFT_TYPE);
361
1.28k
    }
362
220
    for (Py_ssize_t i = len_needle - not_found_shift; i < len_needle; i++) {
363
200
        SHIFT_TYPE shift = Py_SAFE_DOWNCAST(len_needle - 1 - i,
364
200
                                            Py_ssize_t, SHIFT_TYPE);
365
200
        p->table[needle[i] & TABLE_MASK] = shift;
366
200
    }
367
20
}
Unexecuted instantiation: unicodeobject.c:ucs2lib__preprocess
Unexecuted instantiation: unicodeobject.c:ucs4lib__preprocess
Unexecuted instantiation: bytes_methods.c:stringlib__preprocess
Unexecuted instantiation: bytearrayobject.c:stringlib__preprocess
368
369
static Py_ssize_t
370
STRINGLIB(_two_way)(const STRINGLIB_CHAR *haystack, Py_ssize_t len_haystack,
371
                    STRINGLIB(prework) *p)
372
20
{
373
    // Crochemore and Perrin's (1991) Two-Way algorithm.
374
    // See http://www-igm.univ-mlv.fr/~lecroq/string/node26.html#SECTION00260
375
20
    const Py_ssize_t len_needle = p->len_needle;
376
20
    const Py_ssize_t cut = p->cut;
377
20
    Py_ssize_t period = p->period;
378
20
    const STRINGLIB_CHAR *const needle = p->needle;
379
20
    const STRINGLIB_CHAR *window_last = haystack + len_needle - 1;
380
20
    const STRINGLIB_CHAR *const haystack_end = haystack + len_haystack;
381
20
    SHIFT_TYPE *table = p->table;
382
20
    const STRINGLIB_CHAR *window;
383
20
    LOG("===== Two-way: \"%s\" in \"%s\". =====\n", needle, haystack);
384
385
20
    Py_ssize_t gap = p->gap;
386
20
    Py_ssize_t gap_jump_end = Py_MIN(len_needle, cut + gap);
387
20
    if (p->is_periodic) {
388
0
        LOG("Needle is periodic.\n");
389
0
        Py_ssize_t memory = 0;
390
0
      periodicwindowloop:
391
0
        while (window_last < haystack_end) {
392
0
            assert(memory == 0);
393
0
            for (;;) {
394
0
                LOG_LINEUP();
395
0
                Py_ssize_t shift = table[(*window_last) & TABLE_MASK];
396
0
                window_last += shift;
397
0
                if (shift == 0) {
398
0
                    break;
399
0
                }
400
0
                if (window_last >= haystack_end) {
401
0
                    return -1;
402
0
                }
403
0
                LOG("Horspool skip\n");
404
0
            }
405
0
          no_shift:
406
0
            window = window_last - len_needle + 1;
407
0
            assert((window[len_needle - 1] & TABLE_MASK) ==
408
0
                   (needle[len_needle - 1] & TABLE_MASK));
409
0
            Py_ssize_t i = Py_MAX(cut, memory);
410
0
            for (; i < len_needle; i++) {
411
0
                if (needle[i] != window[i]) {
412
0
                    if (i < gap_jump_end) {
413
0
                        LOG("Early right half mismatch: jump by gap.\n");
414
0
                        assert(gap >= i - cut + 1);
415
0
                        window_last += gap;
416
0
                    }
417
0
                    else {
418
0
                        LOG("Late right half mismatch: jump by n (>gap)\n");
419
0
                        assert(i - cut + 1 > gap);
420
0
                        window_last += i - cut + 1;
421
0
                    }
422
0
                    memory = 0;
423
0
                    goto periodicwindowloop;
424
0
                }
425
0
            }
426
0
            for (i = memory; i < cut; i++) {
427
0
                if (needle[i] != window[i]) {
428
0
                    LOG("Left half does not match.\n");
429
0
                    window_last += period;
430
0
                    memory = len_needle - period;
431
0
                    if (window_last >= haystack_end) {
432
0
                        return -1;
433
0
                    }
434
0
                    Py_ssize_t shift = table[(*window_last) & TABLE_MASK];
435
0
                    if (shift) {
436
                        // A mismatch has been identified to the right
437
                        // of where i will next start, so we can jump
438
                        // at least as far as if the mismatch occurred
439
                        // on the first comparison.
440
0
                        Py_ssize_t mem_jump = Py_MAX(cut, memory) - cut + 1;
441
0
                        LOG("Skip with Memory.\n");
442
0
                        memory = 0;
443
0
                        window_last += Py_MAX(shift, mem_jump);
444
0
                        goto periodicwindowloop;
445
0
                    }
446
0
                    goto no_shift;
447
0
                }
448
0
            }
449
0
            LOG("Found a match!\n");
450
0
            return window - haystack;
451
0
        }
452
0
    }
453
20
    else {
454
20
        period = Py_MAX(gap, period);
455
20
        LOG("Needle is not periodic.\n");
456
14.0k
      windowloop:
457
14.0k
        while (window_last < haystack_end) {
458
68.2k
            for (;;) {
459
68.2k
                LOG_LINEUP();
460
68.2k
                Py_ssize_t shift = table[(*window_last) & TABLE_MASK];
461
68.2k
                window_last += shift;
462
68.2k
                if (shift == 0) {
463
14.0k
                    break;
464
14.0k
                }
465
54.2k
                if (window_last >= haystack_end) {
466
17
                    return -1;
467
17
                }
468
54.2k
                LOG("Horspool skip\n");
469
54.2k
            }
470
14.0k
            window = window_last - len_needle + 1;
471
14.0k
            assert((window[len_needle - 1] & TABLE_MASK) ==
472
14.0k
                   (needle[len_needle - 1] & TABLE_MASK));
473
14.0k
            Py_ssize_t i = cut;
474
14.2k
            for (; i < len_needle; i++) {
475
14.1k
                if (needle[i] != window[i]) {
476
13.9k
                    if (i < gap_jump_end) {
477
13.9k
                        LOG("Early right half mismatch: jump by gap.\n");
478
13.9k
                        assert(gap >= i - cut + 1);
479
13.9k
                        window_last += gap;
480
13.9k
                    }
481
0
                    else {
482
0
                        LOG("Late right half mismatch: jump by n (>gap)\n");
483
0
                        assert(i - cut + 1 > gap);
484
0
                        window_last += i - cut + 1;
485
0
                    }
486
13.9k
                    goto windowloop;
487
13.9k
                }
488
14.1k
            }
489
101
            for (Py_ssize_t i = 0; i < cut; i++) {
490
99
                if (needle[i] != window[i]) {
491
83
                    LOG("Left half does not match.\n");
492
83
                    window_last += period;
493
83
                    goto windowloop;
494
83
                }
495
99
            }
496
2
            LOG("Found a match!\n");
497
2
            return window - haystack;
498
85
        }
499
14.0k
    }
500
1
    LOG("Not found. Returning -1.\n");
501
1
    return -1;
502
20
}
Unexecuted instantiation: bytesobject.c:stringlib__two_way
Unexecuted instantiation: unicodeobject.c:asciilib__two_way
unicodeobject.c:ucs1lib__two_way
Line
Count
Source
372
20
{
373
    // Crochemore and Perrin's (1991) Two-Way algorithm.
374
    // See http://www-igm.univ-mlv.fr/~lecroq/string/node26.html#SECTION00260
375
20
    const Py_ssize_t len_needle = p->len_needle;
376
20
    const Py_ssize_t cut = p->cut;
377
20
    Py_ssize_t period = p->period;
378
20
    const STRINGLIB_CHAR *const needle = p->needle;
379
20
    const STRINGLIB_CHAR *window_last = haystack + len_needle - 1;
380
20
    const STRINGLIB_CHAR *const haystack_end = haystack + len_haystack;
381
20
    SHIFT_TYPE *table = p->table;
382
20
    const STRINGLIB_CHAR *window;
383
20
    LOG("===== Two-way: \"%s\" in \"%s\". =====\n", needle, haystack);
384
385
20
    Py_ssize_t gap = p->gap;
386
20
    Py_ssize_t gap_jump_end = Py_MIN(len_needle, cut + gap);
387
20
    if (p->is_periodic) {
388
0
        LOG("Needle is periodic.\n");
389
0
        Py_ssize_t memory = 0;
390
0
      periodicwindowloop:
391
0
        while (window_last < haystack_end) {
392
0
            assert(memory == 0);
393
0
            for (;;) {
394
0
                LOG_LINEUP();
395
0
                Py_ssize_t shift = table[(*window_last) & TABLE_MASK];
396
0
                window_last += shift;
397
0
                if (shift == 0) {
398
0
                    break;
399
0
                }
400
0
                if (window_last >= haystack_end) {
401
0
                    return -1;
402
0
                }
403
0
                LOG("Horspool skip\n");
404
0
            }
405
0
          no_shift:
406
0
            window = window_last - len_needle + 1;
407
0
            assert((window[len_needle - 1] & TABLE_MASK) ==
408
0
                   (needle[len_needle - 1] & TABLE_MASK));
409
0
            Py_ssize_t i = Py_MAX(cut, memory);
410
0
            for (; i < len_needle; i++) {
411
0
                if (needle[i] != window[i]) {
412
0
                    if (i < gap_jump_end) {
413
0
                        LOG("Early right half mismatch: jump by gap.\n");
414
0
                        assert(gap >= i - cut + 1);
415
0
                        window_last += gap;
416
0
                    }
417
0
                    else {
418
0
                        LOG("Late right half mismatch: jump by n (>gap)\n");
419
0
                        assert(i - cut + 1 > gap);
420
0
                        window_last += i - cut + 1;
421
0
                    }
422
0
                    memory = 0;
423
0
                    goto periodicwindowloop;
424
0
                }
425
0
            }
426
0
            for (i = memory; i < cut; i++) {
427
0
                if (needle[i] != window[i]) {
428
0
                    LOG("Left half does not match.\n");
429
0
                    window_last += period;
430
0
                    memory = len_needle - period;
431
0
                    if (window_last >= haystack_end) {
432
0
                        return -1;
433
0
                    }
434
0
                    Py_ssize_t shift = table[(*window_last) & TABLE_MASK];
435
0
                    if (shift) {
436
                        // A mismatch has been identified to the right
437
                        // of where i will next start, so we can jump
438
                        // at least as far as if the mismatch occurred
439
                        // on the first comparison.
440
0
                        Py_ssize_t mem_jump = Py_MAX(cut, memory) - cut + 1;
441
0
                        LOG("Skip with Memory.\n");
442
0
                        memory = 0;
443
0
                        window_last += Py_MAX(shift, mem_jump);
444
0
                        goto periodicwindowloop;
445
0
                    }
446
0
                    goto no_shift;
447
0
                }
448
0
            }
449
0
            LOG("Found a match!\n");
450
0
            return window - haystack;
451
0
        }
452
0
    }
453
20
    else {
454
20
        period = Py_MAX(gap, period);
455
20
        LOG("Needle is not periodic.\n");
456
14.0k
      windowloop:
457
14.0k
        while (window_last < haystack_end) {
458
68.2k
            for (;;) {
459
68.2k
                LOG_LINEUP();
460
68.2k
                Py_ssize_t shift = table[(*window_last) & TABLE_MASK];
461
68.2k
                window_last += shift;
462
68.2k
                if (shift == 0) {
463
14.0k
                    break;
464
14.0k
                }
465
54.2k
                if (window_last >= haystack_end) {
466
17
                    return -1;
467
17
                }
468
54.2k
                LOG("Horspool skip\n");
469
54.2k
            }
470
14.0k
            window = window_last - len_needle + 1;
471
14.0k
            assert((window[len_needle - 1] & TABLE_MASK) ==
472
14.0k
                   (needle[len_needle - 1] & TABLE_MASK));
473
14.0k
            Py_ssize_t i = cut;
474
14.2k
            for (; i < len_needle; i++) {
475
14.1k
                if (needle[i] != window[i]) {
476
13.9k
                    if (i < gap_jump_end) {
477
13.9k
                        LOG("Early right half mismatch: jump by gap.\n");
478
13.9k
                        assert(gap >= i - cut + 1);
479
13.9k
                        window_last += gap;
480
13.9k
                    }
481
0
                    else {
482
0
                        LOG("Late right half mismatch: jump by n (>gap)\n");
483
0
                        assert(i - cut + 1 > gap);
484
0
                        window_last += i - cut + 1;
485
0
                    }
486
13.9k
                    goto windowloop;
487
13.9k
                }
488
14.1k
            }
489
101
            for (Py_ssize_t i = 0; i < cut; i++) {
490
99
                if (needle[i] != window[i]) {
491
83
                    LOG("Left half does not match.\n");
492
83
                    window_last += period;
493
83
                    goto windowloop;
494
83
                }
495
99
            }
496
2
            LOG("Found a match!\n");
497
2
            return window - haystack;
498
85
        }
499
14.0k
    }
500
1
    LOG("Not found. Returning -1.\n");
501
1
    return -1;
502
20
}
Unexecuted instantiation: unicodeobject.c:ucs2lib__two_way
Unexecuted instantiation: unicodeobject.c:ucs4lib__two_way
Unexecuted instantiation: bytes_methods.c:stringlib__two_way
Unexecuted instantiation: bytearrayobject.c:stringlib__two_way
503
504
505
static Py_ssize_t
506
STRINGLIB(_two_way_find)(const STRINGLIB_CHAR *haystack,
507
                         Py_ssize_t len_haystack,
508
                         const STRINGLIB_CHAR *needle,
509
                         Py_ssize_t len_needle)
510
20
{
511
20
    LOG("###### Finding \"%s\" in \"%s\".\n", needle, haystack);
512
20
    STRINGLIB(prework) p;
513
20
    STRINGLIB(_preprocess)(needle, len_needle, &p);
514
20
    return STRINGLIB(_two_way)(haystack, len_haystack, &p);
515
20
}
Unexecuted instantiation: bytesobject.c:stringlib__two_way_find
Unexecuted instantiation: unicodeobject.c:asciilib__two_way_find
unicodeobject.c:ucs1lib__two_way_find
Line
Count
Source
510
20
{
511
20
    LOG("###### Finding \"%s\" in \"%s\".\n", needle, haystack);
512
20
    STRINGLIB(prework) p;
513
20
    STRINGLIB(_preprocess)(needle, len_needle, &p);
514
20
    return STRINGLIB(_two_way)(haystack, len_haystack, &p);
515
20
}
Unexecuted instantiation: unicodeobject.c:ucs2lib__two_way_find
Unexecuted instantiation: unicodeobject.c:ucs4lib__two_way_find
Unexecuted instantiation: bytes_methods.c:stringlib__two_way_find
Unexecuted instantiation: bytearrayobject.c:stringlib__two_way_find
516
517
518
static Py_ssize_t
519
STRINGLIB(_two_way_count)(const STRINGLIB_CHAR *haystack,
520
                          Py_ssize_t len_haystack,
521
                          const STRINGLIB_CHAR *needle,
522
                          Py_ssize_t len_needle,
523
                          Py_ssize_t maxcount)
524
0
{
525
0
    LOG("###### Counting \"%s\" in \"%s\".\n", needle, haystack);
526
0
    STRINGLIB(prework) p;
527
0
    STRINGLIB(_preprocess)(needle, len_needle, &p);
528
0
    Py_ssize_t index = 0, count = 0;
529
0
    while (1) {
530
0
        Py_ssize_t result;
531
0
        result = STRINGLIB(_two_way)(haystack + index,
532
0
                                     len_haystack - index, &p);
533
0
        if (result == -1) {
534
0
            return count;
535
0
        }
536
0
        count++;
537
0
        if (count == maxcount) {
538
0
            return maxcount;
539
0
        }
540
0
        index += result + len_needle;
541
0
    }
542
0
    return count;
543
0
}
Unexecuted instantiation: bytesobject.c:stringlib__two_way_count
Unexecuted instantiation: unicodeobject.c:asciilib__two_way_count
Unexecuted instantiation: unicodeobject.c:ucs1lib__two_way_count
Unexecuted instantiation: unicodeobject.c:ucs2lib__two_way_count
Unexecuted instantiation: unicodeobject.c:ucs4lib__two_way_count
Unexecuted instantiation: bytes_methods.c:stringlib__two_way_count
Unexecuted instantiation: bytearrayobject.c:stringlib__two_way_count
544
545
#undef SHIFT_TYPE
546
#undef NOT_FOUND
547
#undef SHIFT_OVERFLOW
548
#undef TABLE_SIZE_BITS
549
#undef TABLE_SIZE
550
#undef TABLE_MASK
551
552
#undef LOG
553
#undef LOG_STRING
554
#undef LOG_LINEUP
555
556
static inline Py_ssize_t
557
STRINGLIB(default_find)(const STRINGLIB_CHAR* s, Py_ssize_t n,
558
                        const STRINGLIB_CHAR* p, Py_ssize_t m,
559
                        Py_ssize_t maxcount, int mode)
560
26.3M
{
561
26.3M
    const Py_ssize_t w = n - m;
562
26.3M
    Py_ssize_t mlast = m - 1, count = 0;
563
26.3M
    Py_ssize_t gap = mlast;
564
26.3M
    const STRINGLIB_CHAR last = p[mlast];
565
26.3M
    const STRINGLIB_CHAR *const ss = &s[mlast];
566
567
26.3M
    unsigned long mask = 0;
568
115M
    for (Py_ssize_t i = 0; i < mlast; i++) {
569
89.4M
        STRINGLIB_BLOOM_ADD(mask, p[i]);
570
89.4M
        if (p[i] == last) {
571
543k
            gap = mlast - i - 1;
572
543k
        }
573
89.4M
    }
574
26.3M
    STRINGLIB_BLOOM_ADD(mask, last);
575
576
3.54G
    for (Py_ssize_t i = 0; i <= w; i++) {
577
3.52G
        if (ss[i] == last) {
578
            /* candidate match */
579
47.3M
            Py_ssize_t j;
580
69.8M
            for (j = 0; j < mlast; j++) {
581
47.4M
                if (s[i+j] != p[j]) {
582
24.8M
                    break;
583
24.8M
                }
584
47.4M
            }
585
47.3M
            if (j == mlast) {
586
                /* got a match! */
587
22.4M
                if (mode != FAST_COUNT) {
588
11.3M
                    return i;
589
11.3M
                }
590
11.0M
                count++;
591
11.0M
                if (count == maxcount) {
592
0
                    return maxcount;
593
0
                }
594
11.0M
                i = i + mlast;
595
11.0M
                continue;
596
11.0M
            }
597
            /* miss: check if next character is part of pattern */
598
24.8M
            if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) {
599
4.62M
                i = i + m;
600
4.62M
            }
601
20.2M
            else {
602
20.2M
                i = i + gap;
603
20.2M
            }
604
24.8M
        }
605
3.47G
        else {
606
            /* skip: check if next character is part of pattern */
607
3.47G
            if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) {
608
3.37G
                i = i + m;
609
3.37G
            }
610
3.47G
        }
611
3.52G
    }
612
14.9M
    return mode == FAST_COUNT ? count : -1;
613
26.3M
}
Unexecuted instantiation: bytesobject.c:stringlib_default_find
unicodeobject.c:asciilib_default_find
Line
Count
Source
560
1.80M
{
561
1.80M
    const Py_ssize_t w = n - m;
562
1.80M
    Py_ssize_t mlast = m - 1, count = 0;
563
1.80M
    Py_ssize_t gap = mlast;
564
1.80M
    const STRINGLIB_CHAR last = p[mlast];
565
1.80M
    const STRINGLIB_CHAR *const ss = &s[mlast];
566
567
1.80M
    unsigned long mask = 0;
568
3.64M
    for (Py_ssize_t i = 0; i < mlast; i++) {
569
1.83M
        STRINGLIB_BLOOM_ADD(mask, p[i]);
570
1.83M
        if (p[i] == last) {
571
22.7k
            gap = mlast - i - 1;
572
22.7k
        }
573
1.83M
    }
574
1.80M
    STRINGLIB_BLOOM_ADD(mask, last);
575
576
162M
    for (Py_ssize_t i = 0; i <= w; i++) {
577
162M
        if (ss[i] == last) {
578
            /* candidate match */
579
5.19M
            Py_ssize_t j;
580
6.99M
            for (j = 0; j < mlast; j++) {
581
5.21M
                if (s[i+j] != p[j]) {
582
3.41M
                    break;
583
3.41M
                }
584
5.21M
            }
585
5.19M
            if (j == mlast) {
586
                /* got a match! */
587
1.77M
                if (mode != FAST_COUNT) {
588
1.77M
                    return i;
589
1.77M
                }
590
0
                count++;
591
0
                if (count == maxcount) {
592
0
                    return maxcount;
593
0
                }
594
0
                i = i + mlast;
595
0
                continue;
596
0
            }
597
            /* miss: check if next character is part of pattern */
598
3.41M
            if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) {
599
60.5k
                i = i + m;
600
60.5k
            }
601
3.35M
            else {
602
3.35M
                i = i + gap;
603
3.35M
            }
604
3.41M
        }
605
156M
        else {
606
            /* skip: check if next character is part of pattern */
607
156M
            if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) {
608
149M
                i = i + m;
609
149M
            }
610
156M
        }
611
162M
    }
612
32.9k
    return mode == FAST_COUNT ? count : -1;
613
1.80M
}
unicodeobject.c:ucs1lib_default_find
Line
Count
Source
560
16.7M
{
561
16.7M
    const Py_ssize_t w = n - m;
562
16.7M
    Py_ssize_t mlast = m - 1, count = 0;
563
16.7M
    Py_ssize_t gap = mlast;
564
16.7M
    const STRINGLIB_CHAR last = p[mlast];
565
16.7M
    const STRINGLIB_CHAR *const ss = &s[mlast];
566
567
16.7M
    unsigned long mask = 0;
568
96.4M
    for (Py_ssize_t i = 0; i < mlast; i++) {
569
79.7M
        STRINGLIB_BLOOM_ADD(mask, p[i]);
570
79.7M
        if (p[i] == last) {
571
457k
            gap = mlast - i - 1;
572
457k
        }
573
79.7M
    }
574
16.7M
    STRINGLIB_BLOOM_ADD(mask, last);
575
576
611M
    for (Py_ssize_t i = 0; i <= w; i++) {
577
596M
        if (ss[i] == last) {
578
            /* candidate match */
579
14.9M
            Py_ssize_t j;
580
20.3M
            for (j = 0; j < mlast; j++) {
581
14.9M
                if (s[i+j] != p[j]) {
582
9.47M
                    break;
583
9.47M
                }
584
14.9M
            }
585
14.9M
            if (j == mlast) {
586
                /* got a match! */
587
5.44M
                if (mode != FAST_COUNT) {
588
1.89M
                    return i;
589
1.89M
                }
590
3.55M
                count++;
591
3.55M
                if (count == maxcount) {
592
0
                    return maxcount;
593
0
                }
594
3.55M
                i = i + mlast;
595
3.55M
                continue;
596
3.55M
            }
597
            /* miss: check if next character is part of pattern */
598
9.47M
            if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) {
599
1.91M
                i = i + m;
600
1.91M
            }
601
7.56M
            else {
602
7.56M
                i = i + gap;
603
7.56M
            }
604
9.47M
        }
605
581M
        else {
606
            /* skip: check if next character is part of pattern */
607
581M
            if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) {
608
521M
                i = i + m;
609
521M
            }
610
581M
        }
611
596M
    }
612
14.8M
    return mode == FAST_COUNT ? count : -1;
613
16.7M
}
unicodeobject.c:ucs2lib_default_find
Line
Count
Source
560
3.62M
{
561
3.62M
    const Py_ssize_t w = n - m;
562
3.62M
    Py_ssize_t mlast = m - 1, count = 0;
563
3.62M
    Py_ssize_t gap = mlast;
564
3.62M
    const STRINGLIB_CHAR last = p[mlast];
565
3.62M
    const STRINGLIB_CHAR *const ss = &s[mlast];
566
567
3.62M
    unsigned long mask = 0;
568
7.30M
    for (Py_ssize_t i = 0; i < mlast; i++) {
569
3.68M
        STRINGLIB_BLOOM_ADD(mask, p[i]);
570
3.68M
        if (p[i] == last) {
571
37.6k
            gap = mlast - i - 1;
572
37.6k
        }
573
3.68M
    }
574
3.62M
    STRINGLIB_BLOOM_ADD(mask, last);
575
576
1.07G
    for (Py_ssize_t i = 0; i <= w; i++) {
577
1.07G
        if (ss[i] == last) {
578
            /* candidate match */
579
14.5M
            Py_ssize_t j;
580
21.4M
            for (j = 0; j < mlast; j++) {
581
14.5M
                if (s[i+j] != p[j]) {
582
7.57M
                    break;
583
7.57M
                }
584
14.5M
            }
585
14.5M
            if (j == mlast) {
586
                /* got a match! */
587
6.92M
                if (mode != FAST_COUNT) {
588
3.52M
                    return i;
589
3.52M
                }
590
3.40M
                count++;
591
3.40M
                if (count == maxcount) {
592
0
                    return maxcount;
593
0
                }
594
3.40M
                i = i + mlast;
595
3.40M
                continue;
596
3.40M
            }
597
            /* miss: check if next character is part of pattern */
598
7.57M
            if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) {
599
1.14M
                i = i + m;
600
1.14M
            }
601
6.42M
            else {
602
6.42M
                i = i + gap;
603
6.42M
            }
604
7.57M
        }
605
1.06G
        else {
606
            /* skip: check if next character is part of pattern */
607
1.06G
            if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) {
608
1.03G
                i = i + m;
609
1.03G
            }
610
1.06G
        }
611
1.07G
    }
612
101k
    return mode == FAST_COUNT ? count : -1;
613
3.62M
}
unicodeobject.c:ucs4lib_default_find
Line
Count
Source
560
4.18M
{
561
4.18M
    const Py_ssize_t w = n - m;
562
4.18M
    Py_ssize_t mlast = m - 1, count = 0;
563
4.18M
    Py_ssize_t gap = mlast;
564
4.18M
    const STRINGLIB_CHAR last = p[mlast];
565
4.18M
    const STRINGLIB_CHAR *const ss = &s[mlast];
566
567
4.18M
    unsigned long mask = 0;
568
8.38M
    for (Py_ssize_t i = 0; i < mlast; i++) {
569
4.20M
        STRINGLIB_BLOOM_ADD(mask, p[i]);
570
4.20M
        if (p[i] == last) {
571
22.3k
            gap = mlast - i - 1;
572
22.3k
        }
573
4.20M
    }
574
4.18M
    STRINGLIB_BLOOM_ADD(mask, last);
575
576
1.69G
    for (Py_ssize_t i = 0; i <= w; i++) {
577
1.69G
        if (ss[i] == last) {
578
            /* candidate match */
579
12.6M
            Py_ssize_t j;
580
20.9M
            for (j = 0; j < mlast; j++) {
581
12.7M
                if (s[i+j] != p[j]) {
582
4.40M
                    break;
583
4.40M
                }
584
12.7M
            }
585
12.6M
            if (j == mlast) {
586
                /* got a match! */
587
8.28M
                if (mode != FAST_COUNT) {
588
4.15M
                    return i;
589
4.15M
                }
590
4.12M
                count++;
591
4.12M
                if (count == maxcount) {
592
0
                    return maxcount;
593
0
                }
594
4.12M
                i = i + mlast;
595
4.12M
                continue;
596
4.12M
            }
597
            /* miss: check if next character is part of pattern */
598
4.40M
            if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) {
599
1.50M
                i = i + m;
600
1.50M
            }
601
2.89M
            else {
602
2.89M
                i = i + gap;
603
2.89M
            }
604
4.40M
        }
605
1.67G
        else {
606
            /* skip: check if next character is part of pattern */
607
1.67G
            if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) {
608
1.66G
                i = i + m;
609
1.66G
            }
610
1.67G
        }
611
1.69G
    }
612
26.8k
    return mode == FAST_COUNT ? count : -1;
613
4.18M
}
bytes_methods.c:stringlib_default_find
Line
Count
Source
560
2.79k
{
561
2.79k
    const Py_ssize_t w = n - m;
562
2.79k
    Py_ssize_t mlast = m - 1, count = 0;
563
2.79k
    Py_ssize_t gap = mlast;
564
2.79k
    const STRINGLIB_CHAR last = p[mlast];
565
2.79k
    const STRINGLIB_CHAR *const ss = &s[mlast];
566
567
2.79k
    unsigned long mask = 0;
568
11.1k
    for (Py_ssize_t i = 0; i < mlast; i++) {
569
8.39k
        STRINGLIB_BLOOM_ADD(mask, p[i]);
570
8.39k
        if (p[i] == last) {
571
2.79k
            gap = mlast - i - 1;
572
2.79k
        }
573
8.39k
    }
574
2.79k
    STRINGLIB_BLOOM_ADD(mask, last);
575
576
441k
    for (Py_ssize_t i = 0; i <= w; i++) {
577
441k
        if (ss[i] == last) {
578
            /* candidate match */
579
7.27k
            Py_ssize_t j;
580
15.2k
            for (j = 0; j < mlast; j++) {
581
12.6k
                if (s[i+j] != p[j]) {
582
4.70k
                    break;
583
4.70k
                }
584
12.6k
            }
585
7.27k
            if (j == mlast) {
586
                /* got a match! */
587
2.57k
                if (mode != FAST_COUNT) {
588
2.57k
                    return i;
589
2.57k
                }
590
0
                count++;
591
0
                if (count == maxcount) {
592
0
                    return maxcount;
593
0
                }
594
0
                i = i + mlast;
595
0
                continue;
596
0
            }
597
            /* miss: check if next character is part of pattern */
598
4.70k
            if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) {
599
562
                i = i + m;
600
562
            }
601
4.13k
            else {
602
4.13k
                i = i + gap;
603
4.13k
            }
604
4.70k
        }
605
433k
        else {
606
            /* skip: check if next character is part of pattern */
607
433k
            if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) {
608
16.0k
                i = i + m;
609
16.0k
            }
610
433k
        }
611
441k
    }
612
226
    return mode == FAST_COUNT ? count : -1;
613
2.79k
}
Unexecuted instantiation: bytearrayobject.c:stringlib_default_find
614
615
616
static Py_ssize_t
617
STRINGLIB(adaptive_find)(const STRINGLIB_CHAR* s, Py_ssize_t n,
618
                         const STRINGLIB_CHAR* p, Py_ssize_t m,
619
                         Py_ssize_t maxcount, int mode)
620
0
{
621
0
    const Py_ssize_t w = n - m;
622
0
    Py_ssize_t mlast = m - 1, count = 0;
623
0
    Py_ssize_t gap = mlast;
624
0
    Py_ssize_t hits = 0, res;
625
0
    const STRINGLIB_CHAR last = p[mlast];
626
0
    const STRINGLIB_CHAR *const ss = &s[mlast];
627
628
0
    unsigned long mask = 0;
629
0
    for (Py_ssize_t i = 0; i < mlast; i++) {
630
0
        STRINGLIB_BLOOM_ADD(mask, p[i]);
631
0
        if (p[i] == last) {
632
0
            gap = mlast - i - 1;
633
0
        }
634
0
    }
635
0
    STRINGLIB_BLOOM_ADD(mask, last);
636
637
0
    for (Py_ssize_t i = 0; i <= w; i++) {
638
0
        if (ss[i] == last) {
639
            /* candidate match */
640
0
            Py_ssize_t j;
641
0
            for (j = 0; j < mlast; j++) {
642
0
                if (s[i+j] != p[j]) {
643
0
                    break;
644
0
                }
645
0
            }
646
0
            if (j == mlast) {
647
                /* got a match! */
648
0
                if (mode != FAST_COUNT) {
649
0
                    return i;
650
0
                }
651
0
                count++;
652
0
                if (count == maxcount) {
653
0
                    return maxcount;
654
0
                }
655
0
                i = i + mlast;
656
0
                continue;
657
0
            }
658
0
            hits += j + 1;
659
0
            if (hits > m / 4 && w - i > 2000) {
660
0
                if (mode == FAST_SEARCH) {
661
0
                    res = STRINGLIB(_two_way_find)(s + i, n - i, p, m);
662
0
                    return res == -1 ? -1 : res + i;
663
0
                }
664
0
                else {
665
0
                    res = STRINGLIB(_two_way_count)(s + i, n - i, p, m,
666
0
                                                    maxcount - count);
667
0
                    return res + count;
668
0
                }
669
0
            }
670
            /* miss: check if next character is part of pattern */
671
0
            if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) {
672
0
                i = i + m;
673
0
            }
674
0
            else {
675
0
                i = i + gap;
676
0
            }
677
0
        }
678
0
        else {
679
            /* skip: check if next character is part of pattern */
680
0
            if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) {
681
0
                i = i + m;
682
0
            }
683
0
        }
684
0
    }
685
0
    return mode == FAST_COUNT ? count : -1;
686
0
}
Unexecuted instantiation: bytesobject.c:stringlib_adaptive_find
Unexecuted instantiation: unicodeobject.c:asciilib_adaptive_find
Unexecuted instantiation: unicodeobject.c:ucs1lib_adaptive_find
Unexecuted instantiation: unicodeobject.c:ucs2lib_adaptive_find
Unexecuted instantiation: unicodeobject.c:ucs4lib_adaptive_find
Unexecuted instantiation: bytes_methods.c:stringlib_adaptive_find
Unexecuted instantiation: bytearrayobject.c:stringlib_adaptive_find
687
688
689
static Py_ssize_t
690
STRINGLIB(default_rfind)(const STRINGLIB_CHAR* s, Py_ssize_t n,
691
                         const STRINGLIB_CHAR* p, Py_ssize_t m,
692
                         Py_ssize_t maxcount, int mode)
693
6.41k
{
694
    /* create compressed boyer-moore delta 1 table */
695
6.41k
    unsigned long mask = 0;
696
6.41k
    Py_ssize_t i, j, mlast = m - 1, skip = m - 1, w = n - m;
697
698
    /* process pattern[0] outside the loop */
699
6.41k
    STRINGLIB_BLOOM_ADD(mask, p[0]);
700
    /* process pattern[:0:-1] */
701
25.6k
    for (i = mlast; i > 0; i--) {
702
19.2k
        STRINGLIB_BLOOM_ADD(mask, p[i]);
703
19.2k
        if (p[i] == p[0]) {
704
0
            skip = i - 1;
705
0
        }
706
19.2k
    }
707
708
1.24M
    for (i = w; i >= 0; i--) {
709
1.24M
        if (s[i] == p[0]) {
710
            /* candidate match */
711
58.2k
            for (j = mlast; j > 0; j--) {
712
51.9k
                if (s[i+j] != p[j]) {
713
32.5k
                    break;
714
32.5k
                }
715
51.9k
            }
716
38.8k
            if (j == 0) {
717
                /* got a match! */
718
6.31k
                return i;
719
6.31k
            }
720
            /* miss: check if previous character is part of pattern */
721
32.5k
            if (i > 0 && !STRINGLIB_BLOOM(mask, s[i-1])) {
722
30.4k
                i = i - m;
723
30.4k
            }
724
2.12k
            else {
725
2.12k
                i = i - skip;
726
2.12k
            }
727
32.5k
        }
728
1.20M
        else {
729
            /* skip: check if previous character is part of pattern */
730
1.20M
            if (i > 0 && !STRINGLIB_BLOOM(mask, s[i-1])) {
731
1.11M
                i = i - m;
732
1.11M
            }
733
1.20M
        }
734
1.24M
    }
735
93
    return -1;
736
6.41k
}
Unexecuted instantiation: bytesobject.c:stringlib_default_rfind
Unexecuted instantiation: unicodeobject.c:asciilib_default_rfind
Unexecuted instantiation: unicodeobject.c:ucs1lib_default_rfind
Unexecuted instantiation: unicodeobject.c:ucs2lib_default_rfind
Unexecuted instantiation: unicodeobject.c:ucs4lib_default_rfind
bytes_methods.c:stringlib_default_rfind
Line
Count
Source
693
6.41k
{
694
    /* create compressed boyer-moore delta 1 table */
695
6.41k
    unsigned long mask = 0;
696
6.41k
    Py_ssize_t i, j, mlast = m - 1, skip = m - 1, w = n - m;
697
698
    /* process pattern[0] outside the loop */
699
6.41k
    STRINGLIB_BLOOM_ADD(mask, p[0]);
700
    /* process pattern[:0:-1] */
701
25.6k
    for (i = mlast; i > 0; i--) {
702
19.2k
        STRINGLIB_BLOOM_ADD(mask, p[i]);
703
19.2k
        if (p[i] == p[0]) {
704
0
            skip = i - 1;
705
0
        }
706
19.2k
    }
707
708
1.24M
    for (i = w; i >= 0; i--) {
709
1.24M
        if (s[i] == p[0]) {
710
            /* candidate match */
711
58.2k
            for (j = mlast; j > 0; j--) {
712
51.9k
                if (s[i+j] != p[j]) {
713
32.5k
                    break;
714
32.5k
                }
715
51.9k
            }
716
38.8k
            if (j == 0) {
717
                /* got a match! */
718
6.31k
                return i;
719
6.31k
            }
720
            /* miss: check if previous character is part of pattern */
721
32.5k
            if (i > 0 && !STRINGLIB_BLOOM(mask, s[i-1])) {
722
30.4k
                i = i - m;
723
30.4k
            }
724
2.12k
            else {
725
2.12k
                i = i - skip;
726
2.12k
            }
727
32.5k
        }
728
1.20M
        else {
729
            /* skip: check if previous character is part of pattern */
730
1.20M
            if (i > 0 && !STRINGLIB_BLOOM(mask, s[i-1])) {
731
1.11M
                i = i - m;
732
1.11M
            }
733
1.20M
        }
734
1.24M
    }
735
93
    return -1;
736
6.41k
}
Unexecuted instantiation: bytearrayobject.c:stringlib_default_rfind
737
738
739
static inline Py_ssize_t
740
STRINGLIB(count_char)(const STRINGLIB_CHAR *s, Py_ssize_t n,
741
                      const STRINGLIB_CHAR p0, Py_ssize_t maxcount)
742
0
{
743
0
    Py_ssize_t i, count = 0;
744
0
    for (i = 0; i < n; i++) {
745
0
        if (s[i] == p0) {
746
0
            count++;
747
0
            if (count == maxcount) {
748
0
                return maxcount;
749
0
            }
750
0
        }
751
0
    }
752
0
    return count;
753
0
}
Unexecuted instantiation: bytesobject.c:stringlib_count_char
Unexecuted instantiation: unicodeobject.c:asciilib_count_char
Unexecuted instantiation: unicodeobject.c:ucs1lib_count_char
Unexecuted instantiation: unicodeobject.c:ucs2lib_count_char
Unexecuted instantiation: unicodeobject.c:ucs4lib_count_char
Unexecuted instantiation: bytes_methods.c:stringlib_count_char
Unexecuted instantiation: bytearrayobject.c:stringlib_count_char
754
755
756
static inline Py_ssize_t
757
STRINGLIB(count_char_no_maxcount)(const STRINGLIB_CHAR *s, Py_ssize_t n,
758
                                  const STRINGLIB_CHAR p0)
759
/* A specialized function of count_char that does not cut off at a maximum.
760
   As a result, the compiler is able to vectorize the loop. */
761
66.4M
{
762
66.4M
    Py_ssize_t count = 0;
763
8.21G
    for (Py_ssize_t i = 0; i < n; i++) {
764
8.14G
        if (s[i] == p0) {
765
253M
            count++;
766
253M
        }
767
8.14G
    }
768
66.4M
    return count;
769
66.4M
}
Unexecuted instantiation: bytesobject.c:stringlib_count_char_no_maxcount
Unexecuted instantiation: unicodeobject.c:asciilib_count_char_no_maxcount
unicodeobject.c:ucs1lib_count_char_no_maxcount
Line
Count
Source
761
50.8M
{
762
50.8M
    Py_ssize_t count = 0;
763
1.57G
    for (Py_ssize_t i = 0; i < n; i++) {
764
1.51G
        if (s[i] == p0) {
765
46.1M
            count++;
766
46.1M
        }
767
1.51G
    }
768
50.8M
    return count;
769
50.8M
}
unicodeobject.c:ucs2lib_count_char_no_maxcount
Line
Count
Source
761
10.1M
{
762
10.1M
    Py_ssize_t count = 0;
763
2.09G
    for (Py_ssize_t i = 0; i < n; i++) {
764
2.08G
        if (s[i] == p0) {
765
71.0M
            count++;
766
71.0M
        }
767
2.08G
    }
768
10.1M
    return count;
769
10.1M
}
unicodeobject.c:ucs4lib_count_char_no_maxcount
Line
Count
Source
761
1.25M
{
762
1.25M
    Py_ssize_t count = 0;
763
2.39G
    for (Py_ssize_t i = 0; i < n; i++) {
764
2.39G
        if (s[i] == p0) {
765
60.4M
            count++;
766
60.4M
        }
767
2.39G
    }
768
1.25M
    return count;
769
1.25M
}
bytes_methods.c:stringlib_count_char_no_maxcount
Line
Count
Source
761
4.19M
{
762
4.19M
    Py_ssize_t count = 0;
763
2.14G
    for (Py_ssize_t i = 0; i < n; i++) {
764
2.14G
        if (s[i] == p0) {
765
76.1M
            count++;
766
76.1M
        }
767
2.14G
    }
768
4.19M
    return count;
769
4.19M
}
Unexecuted instantiation: bytearrayobject.c:stringlib_count_char_no_maxcount
770
771
772
Py_LOCAL_INLINE(Py_ssize_t)
773
FASTSEARCH(const STRINGLIB_CHAR* s, Py_ssize_t n,
774
           const STRINGLIB_CHAR* p, Py_ssize_t m,
775
           Py_ssize_t maxcount, int mode)
776
237M
{
777
237M
    if (n < m || (mode == FAST_COUNT && maxcount == 0)) {
778
14.8k
        return -1;
779
14.8k
    }
780
781
    /* look for special cases */
782
237M
    if (m <= 1) {
783
211M
        if (m <= 0) {
784
0
            return -1;
785
0
        }
786
        /* use special case for 1-character strings */
787
211M
        if (mode == FAST_SEARCH)
788
144M
            return STRINGLIB(find_char)(s, n, p[0]);
789
66.4M
        else if (mode == FAST_RSEARCH)
790
49.2k
            return STRINGLIB(rfind_char)(s, n, p[0]);
791
66.4M
        else {
792
66.4M
            if (maxcount == PY_SSIZE_T_MAX) {
793
66.4M
                return STRINGLIB(count_char_no_maxcount)(s, n, p[0]);
794
66.4M
            }
795
0
            return STRINGLIB(count_char)(s, n, p[0], maxcount);
796
66.4M
        }
797
211M
    }
798
799
26.3M
    if (mode != FAST_RSEARCH) {
800
26.3M
        if (n < 2500 || (m < 100 && n < 30000) || m < 6) {
801
26.3M
            return STRINGLIB(default_find)(s, n, p, m, maxcount, mode);
802
26.3M
        }
803
20
        else if ((m >> 2) * 3 < (n >> 2)) {
804
            /* 33% threshold, but don't overflow. */
805
            /* For larger problems where the needle isn't a huge
806
               percentage of the size of the haystack, the relatively
807
               expensive O(m) startup cost of the two-way algorithm
808
               will surely pay off. */
809
20
            if (mode == FAST_SEARCH) {
810
20
                return STRINGLIB(_two_way_find)(s, n, p, m);
811
20
            }
812
0
            else {
813
0
                return STRINGLIB(_two_way_count)(s, n, p, m, maxcount);
814
0
            }
815
20
        }
816
0
        else {
817
            /* To ensure that we have good worst-case behavior,
818
               here's an adaptive version of the algorithm, where if
819
               we match O(m) characters without any matches of the
820
               entire needle, then we predict that the startup cost of
821
               the two-way algorithm will probably be worth it. */
822
0
            return STRINGLIB(adaptive_find)(s, n, p, m, maxcount, mode);
823
0
        }
824
26.3M
    }
825
6.41k
    else {
826
        /* FAST_RSEARCH */
827
6.41k
        return STRINGLIB(default_rfind)(s, n, p, m, maxcount, mode);
828
6.41k
    }
829
26.3M
}
bytesobject.c:fastsearch
Line
Count
Source
776
45.8k
{
777
45.8k
    if (n < m || (mode == FAST_COUNT && maxcount == 0)) {
778
0
        return -1;
779
0
    }
780
781
    /* look for special cases */
782
45.8k
    if (m <= 1) {
783
45.8k
        if (m <= 0) {
784
0
            return -1;
785
0
        }
786
        /* use special case for 1-character strings */
787
45.8k
        if (mode == FAST_SEARCH)
788
45.8k
            return STRINGLIB(find_char)(s, n, p[0]);
789
0
        else if (mode == FAST_RSEARCH)
790
0
            return STRINGLIB(rfind_char)(s, n, p[0]);
791
0
        else {
792
0
            if (maxcount == PY_SSIZE_T_MAX) {
793
0
                return STRINGLIB(count_char_no_maxcount)(s, n, p[0]);
794
0
            }
795
0
            return STRINGLIB(count_char)(s, n, p[0], maxcount);
796
0
        }
797
45.8k
    }
798
799
0
    if (mode != FAST_RSEARCH) {
800
0
        if (n < 2500 || (m < 100 && n < 30000) || m < 6) {
801
0
            return STRINGLIB(default_find)(s, n, p, m, maxcount, mode);
802
0
        }
803
0
        else if ((m >> 2) * 3 < (n >> 2)) {
804
            /* 33% threshold, but don't overflow. */
805
            /* For larger problems where the needle isn't a huge
806
               percentage of the size of the haystack, the relatively
807
               expensive O(m) startup cost of the two-way algorithm
808
               will surely pay off. */
809
0
            if (mode == FAST_SEARCH) {
810
0
                return STRINGLIB(_two_way_find)(s, n, p, m);
811
0
            }
812
0
            else {
813
0
                return STRINGLIB(_two_way_count)(s, n, p, m, maxcount);
814
0
            }
815
0
        }
816
0
        else {
817
            /* To ensure that we have good worst-case behavior,
818
               here's an adaptive version of the algorithm, where if
819
               we match O(m) characters without any matches of the
820
               entire needle, then we predict that the startup cost of
821
               the two-way algorithm will probably be worth it. */
822
0
            return STRINGLIB(adaptive_find)(s, n, p, m, maxcount, mode);
823
0
        }
824
0
    }
825
0
    else {
826
        /* FAST_RSEARCH */
827
0
        return STRINGLIB(default_rfind)(s, n, p, m, maxcount, mode);
828
0
    }
829
0
}
unicodeobject.c:asciilib_fastsearch
Line
Count
Source
776
24.3M
{
777
24.3M
    if (n < m || (mode == FAST_COUNT && maxcount == 0)) {
778
0
        return -1;
779
0
    }
780
781
    /* look for special cases */
782
24.3M
    if (m <= 1) {
783
22.5M
        if (m <= 0) {
784
0
            return -1;
785
0
        }
786
        /* use special case for 1-character strings */
787
22.5M
        if (mode == FAST_SEARCH)
788
22.5M
            return STRINGLIB(find_char)(s, n, p[0]);
789
49.2k
        else if (mode == FAST_RSEARCH)
790
49.2k
            return STRINGLIB(rfind_char)(s, n, p[0]);
791
0
        else {
792
0
            if (maxcount == PY_SSIZE_T_MAX) {
793
0
                return STRINGLIB(count_char_no_maxcount)(s, n, p[0]);
794
0
            }
795
0
            return STRINGLIB(count_char)(s, n, p[0], maxcount);
796
0
        }
797
22.5M
    }
798
799
1.80M
    if (mode != FAST_RSEARCH) {
800
1.80M
        if (n < 2500 || (m < 100 && n < 30000) || m < 6) {
801
1.80M
            return STRINGLIB(default_find)(s, n, p, m, maxcount, mode);
802
1.80M
        }
803
0
        else if ((m >> 2) * 3 < (n >> 2)) {
804
            /* 33% threshold, but don't overflow. */
805
            /* For larger problems where the needle isn't a huge
806
               percentage of the size of the haystack, the relatively
807
               expensive O(m) startup cost of the two-way algorithm
808
               will surely pay off. */
809
0
            if (mode == FAST_SEARCH) {
810
0
                return STRINGLIB(_two_way_find)(s, n, p, m);
811
0
            }
812
0
            else {
813
0
                return STRINGLIB(_two_way_count)(s, n, p, m, maxcount);
814
0
            }
815
0
        }
816
0
        else {
817
            /* To ensure that we have good worst-case behavior,
818
               here's an adaptive version of the algorithm, where if
819
               we match O(m) characters without any matches of the
820
               entire needle, then we predict that the startup cost of
821
               the two-way algorithm will probably be worth it. */
822
0
            return STRINGLIB(adaptive_find)(s, n, p, m, maxcount, mode);
823
0
        }
824
1.80M
    }
825
0
    else {
826
        /* FAST_RSEARCH */
827
0
        return STRINGLIB(default_rfind)(s, n, p, m, maxcount, mode);
828
0
    }
829
1.80M
}
unicodeobject.c:ucs1lib_fastsearch
Line
Count
Source
776
75.9M
{
777
75.9M
    if (n < m || (mode == FAST_COUNT && maxcount == 0)) {
778
0
        return -1;
779
0
    }
780
781
    /* look for special cases */
782
75.9M
    if (m <= 1) {
783
59.2M
        if (m <= 0) {
784
0
            return -1;
785
0
        }
786
        /* use special case for 1-character strings */
787
59.2M
        if (mode == FAST_SEARCH)
788
8.45M
            return STRINGLIB(find_char)(s, n, p[0]);
789
50.8M
        else if (mode == FAST_RSEARCH)
790
0
            return STRINGLIB(rfind_char)(s, n, p[0]);
791
50.8M
        else {
792
50.8M
            if (maxcount == PY_SSIZE_T_MAX) {
793
50.8M
                return STRINGLIB(count_char_no_maxcount)(s, n, p[0]);
794
50.8M
            }
795
0
            return STRINGLIB(count_char)(s, n, p[0], maxcount);
796
50.8M
        }
797
59.2M
    }
798
799
16.7M
    if (mode != FAST_RSEARCH) {
800
16.7M
        if (n < 2500 || (m < 100 && n < 30000) || m < 6) {
801
16.7M
            return STRINGLIB(default_find)(s, n, p, m, maxcount, mode);
802
16.7M
        }
803
20
        else if ((m >> 2) * 3 < (n >> 2)) {
804
            /* 33% threshold, but don't overflow. */
805
            /* For larger problems where the needle isn't a huge
806
               percentage of the size of the haystack, the relatively
807
               expensive O(m) startup cost of the two-way algorithm
808
               will surely pay off. */
809
20
            if (mode == FAST_SEARCH) {
810
20
                return STRINGLIB(_two_way_find)(s, n, p, m);
811
20
            }
812
0
            else {
813
0
                return STRINGLIB(_two_way_count)(s, n, p, m, maxcount);
814
0
            }
815
20
        }
816
0
        else {
817
            /* To ensure that we have good worst-case behavior,
818
               here's an adaptive version of the algorithm, where if
819
               we match O(m) characters without any matches of the
820
               entire needle, then we predict that the startup cost of
821
               the two-way algorithm will probably be worth it. */
822
0
            return STRINGLIB(adaptive_find)(s, n, p, m, maxcount, mode);
823
0
        }
824
16.7M
    }
825
0
    else {
826
        /* FAST_RSEARCH */
827
0
        return STRINGLIB(default_rfind)(s, n, p, m, maxcount, mode);
828
0
    }
829
16.7M
}
unicodeobject.c:ucs2lib_fastsearch
Line
Count
Source
776
75.5M
{
777
75.5M
    if (n < m || (mode == FAST_COUNT && maxcount == 0)) {
778
14.7k
        return -1;
779
14.7k
    }
780
781
    /* look for special cases */
782
75.5M
    if (m <= 1) {
783
71.9M
        if (m <= 0) {
784
0
            return -1;
785
0
        }
786
        /* use special case for 1-character strings */
787
71.9M
        if (mode == FAST_SEARCH)
788
61.7M
            return STRINGLIB(find_char)(s, n, p[0]);
789
10.1M
        else if (mode == FAST_RSEARCH)
790
0
            return STRINGLIB(rfind_char)(s, n, p[0]);
791
10.1M
        else {
792
10.1M
            if (maxcount == PY_SSIZE_T_MAX) {
793
10.1M
                return STRINGLIB(count_char_no_maxcount)(s, n, p[0]);
794
10.1M
            }
795
0
            return STRINGLIB(count_char)(s, n, p[0], maxcount);
796
10.1M
        }
797
71.9M
    }
798
799
3.62M
    if (mode != FAST_RSEARCH) {
800
3.62M
        if (n < 2500 || (m < 100 && n < 30000) || m < 6) {
801
3.62M
            return STRINGLIB(default_find)(s, n, p, m, maxcount, mode);
802
3.62M
        }
803
0
        else if ((m >> 2) * 3 < (n >> 2)) {
804
            /* 33% threshold, but don't overflow. */
805
            /* For larger problems where the needle isn't a huge
806
               percentage of the size of the haystack, the relatively
807
               expensive O(m) startup cost of the two-way algorithm
808
               will surely pay off. */
809
0
            if (mode == FAST_SEARCH) {
810
0
                return STRINGLIB(_two_way_find)(s, n, p, m);
811
0
            }
812
0
            else {
813
0
                return STRINGLIB(_two_way_count)(s, n, p, m, maxcount);
814
0
            }
815
0
        }
816
0
        else {
817
            /* To ensure that we have good worst-case behavior,
818
               here's an adaptive version of the algorithm, where if
819
               we match O(m) characters without any matches of the
820
               entire needle, then we predict that the startup cost of
821
               the two-way algorithm will probably be worth it. */
822
0
            return STRINGLIB(adaptive_find)(s, n, p, m, maxcount, mode);
823
0
        }
824
3.62M
    }
825
0
    else {
826
        /* FAST_RSEARCH */
827
0
        return STRINGLIB(default_rfind)(s, n, p, m, maxcount, mode);
828
0
    }
829
3.62M
}
unicodeobject.c:ucs4lib_fastsearch
Line
Count
Source
776
57.3M
{
777
57.3M
    if (n < m || (mode == FAST_COUNT && maxcount == 0)) {
778
0
        return -1;
779
0
    }
780
781
    /* look for special cases */
782
57.3M
    if (m <= 1) {
783
53.1M
        if (m <= 0) {
784
0
            return -1;
785
0
        }
786
        /* use special case for 1-character strings */
787
53.1M
        if (mode == FAST_SEARCH)
788
51.8M
            return STRINGLIB(find_char)(s, n, p[0]);
789
1.25M
        else if (mode == FAST_RSEARCH)
790
0
            return STRINGLIB(rfind_char)(s, n, p[0]);
791
1.25M
        else {
792
1.25M
            if (maxcount == PY_SSIZE_T_MAX) {
793
1.25M
                return STRINGLIB(count_char_no_maxcount)(s, n, p[0]);
794
1.25M
            }
795
0
            return STRINGLIB(count_char)(s, n, p[0], maxcount);
796
1.25M
        }
797
53.1M
    }
798
799
4.18M
    if (mode != FAST_RSEARCH) {
800
4.18M
        if (n < 2500 || (m < 100 && n < 30000) || m < 6) {
801
4.18M
            return STRINGLIB(default_find)(s, n, p, m, maxcount, mode);
802
4.18M
        }
803
0
        else if ((m >> 2) * 3 < (n >> 2)) {
804
            /* 33% threshold, but don't overflow. */
805
            /* For larger problems where the needle isn't a huge
806
               percentage of the size of the haystack, the relatively
807
               expensive O(m) startup cost of the two-way algorithm
808
               will surely pay off. */
809
0
            if (mode == FAST_SEARCH) {
810
0
                return STRINGLIB(_two_way_find)(s, n, p, m);
811
0
            }
812
0
            else {
813
0
                return STRINGLIB(_two_way_count)(s, n, p, m, maxcount);
814
0
            }
815
0
        }
816
0
        else {
817
            /* To ensure that we have good worst-case behavior,
818
               here's an adaptive version of the algorithm, where if
819
               we match O(m) characters without any matches of the
820
               entire needle, then we predict that the startup cost of
821
               the two-way algorithm will probably be worth it. */
822
0
            return STRINGLIB(adaptive_find)(s, n, p, m, maxcount, mode);
823
0
        }
824
4.18M
    }
825
0
    else {
826
        /* FAST_RSEARCH */
827
0
        return STRINGLIB(default_rfind)(s, n, p, m, maxcount, mode);
828
0
    }
829
4.18M
}
bytes_methods.c:fastsearch
Line
Count
Source
776
4.19M
{
777
4.19M
    if (n < m || (mode == FAST_COUNT && maxcount == 0)) {
778
8
        return -1;
779
8
    }
780
781
    /* look for special cases */
782
4.19M
    if (m <= 1) {
783
4.19M
        if (m <= 0) {
784
0
            return -1;
785
0
        }
786
        /* use special case for 1-character strings */
787
4.19M
        if (mode == FAST_SEARCH)
788
0
            return STRINGLIB(find_char)(s, n, p[0]);
789
4.19M
        else if (mode == FAST_RSEARCH)
790
0
            return STRINGLIB(rfind_char)(s, n, p[0]);
791
4.19M
        else {
792
4.19M
            if (maxcount == PY_SSIZE_T_MAX) {
793
4.19M
                return STRINGLIB(count_char_no_maxcount)(s, n, p[0]);
794
4.19M
            }
795
0
            return STRINGLIB(count_char)(s, n, p[0], maxcount);
796
4.19M
        }
797
4.19M
    }
798
799
9.20k
    if (mode != FAST_RSEARCH) {
800
2.79k
        if (n < 2500 || (m < 100 && n < 30000) || m < 6) {
801
2.79k
            return STRINGLIB(default_find)(s, n, p, m, maxcount, mode);
802
2.79k
        }
803
0
        else if ((m >> 2) * 3 < (n >> 2)) {
804
            /* 33% threshold, but don't overflow. */
805
            /* For larger problems where the needle isn't a huge
806
               percentage of the size of the haystack, the relatively
807
               expensive O(m) startup cost of the two-way algorithm
808
               will surely pay off. */
809
0
            if (mode == FAST_SEARCH) {
810
0
                return STRINGLIB(_two_way_find)(s, n, p, m);
811
0
            }
812
0
            else {
813
0
                return STRINGLIB(_two_way_count)(s, n, p, m, maxcount);
814
0
            }
815
0
        }
816
0
        else {
817
            /* To ensure that we have good worst-case behavior,
818
               here's an adaptive version of the algorithm, where if
819
               we match O(m) characters without any matches of the
820
               entire needle, then we predict that the startup cost of
821
               the two-way algorithm will probably be worth it. */
822
0
            return STRINGLIB(adaptive_find)(s, n, p, m, maxcount, mode);
823
0
        }
824
2.79k
    }
825
6.41k
    else {
826
        /* FAST_RSEARCH */
827
6.41k
        return STRINGLIB(default_rfind)(s, n, p, m, maxcount, mode);
828
6.41k
    }
829
9.20k
}
Unexecuted instantiation: bytearrayobject.c:fastsearch
830