Coverage Report

Created: 2026-02-09 07:07

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/cpython/Objects/stringlib/fastsearch.h
Line
Count
Source
1
/* stringlib: fastsearch implementation */
2
3
#define STRINGLIB_FASTSEARCH_H
4
5
/* fast search/count implementation, based on a mix between boyer-
6
   moore and horspool, with a few more bells and whistles on the top.
7
   for some more background, see:
8
   https://web.archive.org/web/20201107074620/http://effbot.org/zone/stringlib.htm */
9
10
/* note: fastsearch may access s[n], which isn't a problem when using
11
   Python's ordinary string types, but may cause problems if you're
12
   using this code in other contexts.  also, the count mode returns -1
13
   if there cannot possibly be a match in the target string, and 0 if
14
   it has actually checked for matches, but didn't find any.  callers
15
   beware! */
16
17
/* If the strings are long enough, use Crochemore and Perrin's Two-Way
18
   algorithm, which has worst-case O(n) runtime and best-case O(n/k).
19
   Also compute a table of shifts to achieve O(n/k) in more cases,
20
   and often (data dependent) deduce larger shifts than pure C&P can
21
   deduce. See stringlib_find_two_way_notes.txt in this folder for a
22
   detailed explanation. */
23
24
398M
#define FAST_COUNT 0
25
242M
#define FAST_SEARCH 1
26
67.8M
#define FAST_RSEARCH 2
27
28
#if LONG_BIT >= 128
29
#define STRINGLIB_BLOOM_WIDTH 128
30
#elif LONG_BIT >= 64
31
2.03G
#define STRINGLIB_BLOOM_WIDTH 64
32
#elif LONG_BIT >= 32
33
#define STRINGLIB_BLOOM_WIDTH 32
34
#else
35
#error "LONG_BIT is smaller than 32"
36
#endif
37
38
#define STRINGLIB_BLOOM_ADD(mask, ch) \
39
74.4M
    ((mask |= (1UL << ((ch) & (STRINGLIB_BLOOM_WIDTH -1)))))
40
#define STRINGLIB_BLOOM(mask, ch)     \
41
1.96G
    ((mask &  (1UL << ((ch) & (STRINGLIB_BLOOM_WIDTH -1)))))
42
43
#ifdef STRINGLIB_FAST_MEMCHR
44
220M
#  define MEMCHR_CUT_OFF 15
45
#else
46
50.3M
#  define MEMCHR_CUT_OFF 40
47
#endif
48
49
Py_LOCAL_INLINE(Py_ssize_t)
50
STRINGLIB(find_char)(const STRINGLIB_CHAR* s, Py_ssize_t n, STRINGLIB_CHAR ch)
51
270M
{
52
270M
    const STRINGLIB_CHAR *p, *e;
53
54
270M
    p = s;
55
270M
    e = s + n;
56
270M
    if (n > MEMCHR_CUT_OFF) {
57
#ifdef STRINGLIB_FAST_MEMCHR
58
63.6M
        p = STRINGLIB_FAST_MEMCHR(s, ch, n);
59
63.6M
        if (p != NULL)
60
61.7M
            return (p - s);
61
1.82M
        return -1;
62
#else
63
        /* use memchr if we can choose a needle without too many likely
64
           false positives */
65
        const STRINGLIB_CHAR *s1, *e1;
66
        unsigned char needle = ch & 0xff;
67
        /* If looking for a multiple of 256, we'd have too
68
           many false positives looking for the '\0' byte in UCS2
69
           and UCS4 representations. */
70
42.9M
        if (needle != 0) {
71
42.4M
            do {
72
42.4M
                void *candidate = memchr(p, needle,
73
42.4M
                                         (e - p) * sizeof(STRINGLIB_CHAR));
74
42.4M
                if (candidate == NULL)
75
467k
                    return -1;
76
41.9M
                s1 = p;
77
41.9M
                p = (const STRINGLIB_CHAR *)
78
41.9M
                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
79
41.9M
                if (*p == ch)
80
41.8M
                    return (p - s);
81
                /* False positive */
82
64.3k
                p++;
83
64.3k
                if (p - s1 > MEMCHR_CUT_OFF)
84
29.2k
                    continue;
85
35.1k
                if (e - p <= MEMCHR_CUT_OFF)
86
3.35k
                    break;
87
31.7k
                e1 = p + MEMCHR_CUT_OFF;
88
948k
                while (p != e1) {
89
929k
                    if (*p == ch)
90
12.6k
                        return (p - s);
91
917k
                    p++;
92
917k
                }
93
31.7k
            }
94
42.3M
            while (e - p > MEMCHR_CUT_OFF);
95
42.3M
        }
96
#endif
97
106M
    }
98
907M
    while (p < e) {
99
765M
        if (*p == ch)
100
22.7M
            return (p - s);
101
742M
        p++;
102
742M
    }
103
141M
    return -1;
104
164M
}
bytesobject.c:stringlib_find_char
Line
Count
Source
51
441k
{
52
441k
    const STRINGLIB_CHAR *p, *e;
53
54
441k
    p = s;
55
441k
    e = s + n;
56
441k
    if (n > MEMCHR_CUT_OFF) {
57
441k
#ifdef STRINGLIB_FAST_MEMCHR
58
441k
        p = STRINGLIB_FAST_MEMCHR(s, ch, n);
59
441k
        if (p != NULL)
60
439k
            return (p - s);
61
2.32k
        return -1;
62
#else
63
        /* use memchr if we can choose a needle without too many likely
64
           false positives */
65
        const STRINGLIB_CHAR *s1, *e1;
66
        unsigned char needle = ch & 0xff;
67
        /* If looking for a multiple of 256, we'd have too
68
           many false positives looking for the '\0' byte in UCS2
69
           and UCS4 representations. */
70
        if (needle != 0) {
71
            do {
72
                void *candidate = memchr(p, needle,
73
                                         (e - p) * sizeof(STRINGLIB_CHAR));
74
                if (candidate == NULL)
75
                    return -1;
76
                s1 = p;
77
                p = (const STRINGLIB_CHAR *)
78
                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
79
                if (*p == ch)
80
                    return (p - s);
81
                /* False positive */
82
                p++;
83
                if (p - s1 > MEMCHR_CUT_OFF)
84
                    continue;
85
                if (e - p <= MEMCHR_CUT_OFF)
86
                    break;
87
                e1 = p + MEMCHR_CUT_OFF;
88
                while (p != e1) {
89
                    if (*p == ch)
90
                        return (p - s);
91
                    p++;
92
                }
93
            }
94
            while (e - p > MEMCHR_CUT_OFF);
95
        }
96
#endif
97
441k
    }
98
30
    while (p < e) {
99
30
        if (*p == ch)
100
5
            return (p - s);
101
25
        p++;
102
25
    }
103
0
    return -1;
104
5
}
unicodeobject.c:ucs1lib_find_char
Line
Count
Source
51
164M
{
52
164M
    const STRINGLIB_CHAR *p, *e;
53
54
164M
    p = s;
55
164M
    e = s + n;
56
164M
    if (n > MEMCHR_CUT_OFF) {
57
19.6M
#ifdef STRINGLIB_FAST_MEMCHR
58
19.6M
        p = STRINGLIB_FAST_MEMCHR(s, ch, n);
59
19.6M
        if (p != NULL)
60
18.4M
            return (p - s);
61
1.21M
        return -1;
62
#else
63
        /* use memchr if we can choose a needle without too many likely
64
           false positives */
65
        const STRINGLIB_CHAR *s1, *e1;
66
        unsigned char needle = ch & 0xff;
67
        /* If looking for a multiple of 256, we'd have too
68
           many false positives looking for the '\0' byte in UCS2
69
           and UCS4 representations. */
70
        if (needle != 0) {
71
            do {
72
                void *candidate = memchr(p, needle,
73
                                         (e - p) * sizeof(STRINGLIB_CHAR));
74
                if (candidate == NULL)
75
                    return -1;
76
                s1 = p;
77
                p = (const STRINGLIB_CHAR *)
78
                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
79
                if (*p == ch)
80
                    return (p - s);
81
                /* False positive */
82
                p++;
83
                if (p - s1 > MEMCHR_CUT_OFF)
84
                    continue;
85
                if (e - p <= MEMCHR_CUT_OFF)
86
                    break;
87
                e1 = p + MEMCHR_CUT_OFF;
88
                while (p != e1) {
89
                    if (*p == ch)
90
                        return (p - s);
91
                    p++;
92
                }
93
            }
94
            while (e - p > MEMCHR_CUT_OFF);
95
        }
96
#endif
97
19.6M
    }
98
725M
    while (p < e) {
99
589M
        if (*p == ch)
100
8.70M
            return (p - s);
101
580M
        p++;
102
580M
    }
103
136M
    return -1;
104
145M
}
unicodeobject.c:ucs2lib_find_char
Line
Count
Source
51
50.1M
{
52
50.1M
    const STRINGLIB_CHAR *p, *e;
53
54
50.1M
    p = s;
55
50.1M
    e = s + n;
56
50.1M
    if (n > MEMCHR_CUT_OFF) {
57
#ifdef STRINGLIB_FAST_MEMCHR
58
        p = STRINGLIB_FAST_MEMCHR(s, ch, n);
59
        if (p != NULL)
60
            return (p - s);
61
        return -1;
62
#else
63
        /* use memchr if we can choose a needle without too many likely
64
           false positives */
65
42.9M
        const STRINGLIB_CHAR *s1, *e1;
66
42.9M
        unsigned char needle = ch & 0xff;
67
        /* If looking for a multiple of 256, we'd have too
68
           many false positives looking for the '\0' byte in UCS2
69
           and UCS4 representations. */
70
42.9M
        if (needle != 0) {
71
42.4M
            do {
72
42.4M
                void *candidate = memchr(p, needle,
73
42.4M
                                         (e - p) * sizeof(STRINGLIB_CHAR));
74
42.4M
                if (candidate == NULL)
75
467k
                    return -1;
76
41.9M
                s1 = p;
77
41.9M
                p = (const STRINGLIB_CHAR *)
78
41.9M
                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
79
41.9M
                if (*p == ch)
80
41.8M
                    return (p - s);
81
                /* False positive */
82
64.3k
                p++;
83
64.3k
                if (p - s1 > MEMCHR_CUT_OFF)
84
29.2k
                    continue;
85
35.1k
                if (e - p <= MEMCHR_CUT_OFF)
86
3.35k
                    break;
87
31.7k
                e1 = p + MEMCHR_CUT_OFF;
88
948k
                while (p != e1) {
89
929k
                    if (*p == ch)
90
12.6k
                        return (p - s);
91
917k
                    p++;
92
917k
                }
93
31.7k
            }
94
42.3M
            while (e - p > MEMCHR_CUT_OFF);
95
42.3M
        }
96
42.9M
#endif
97
42.9M
    }
98
102M
    while (p < e) {
99
99.9M
        if (*p == ch)
100
5.56M
            return (p - s);
101
94.3M
        p++;
102
94.3M
    }
103
2.23M
    return -1;
104
7.80M
}
unicodeobject.c:ucs4lib_find_char
Line
Count
Source
51
25.3M
{
52
25.3M
    const STRINGLIB_CHAR *p, *e;
53
54
25.3M
    p = s;
55
25.3M
    e = s + n;
56
25.3M
    if (n > MEMCHR_CUT_OFF) {
57
25.2M
#ifdef STRINGLIB_FAST_MEMCHR
58
25.2M
        p = STRINGLIB_FAST_MEMCHR(s, ch, n);
59
25.2M
        if (p != NULL)
60
25.2M
            return (p - s);
61
25.3k
        return -1;
62
#else
63
        /* use memchr if we can choose a needle without too many likely
64
           false positives */
65
        const STRINGLIB_CHAR *s1, *e1;
66
        unsigned char needle = ch & 0xff;
67
        /* If looking for a multiple of 256, we'd have too
68
           many false positives looking for the '\0' byte in UCS2
69
           and UCS4 representations. */
70
        if (needle != 0) {
71
            do {
72
                void *candidate = memchr(p, needle,
73
                                         (e - p) * sizeof(STRINGLIB_CHAR));
74
                if (candidate == NULL)
75
                    return -1;
76
                s1 = p;
77
                p = (const STRINGLIB_CHAR *)
78
                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
79
                if (*p == ch)
80
                    return (p - s);
81
                /* False positive */
82
                p++;
83
                if (p - s1 > MEMCHR_CUT_OFF)
84
                    continue;
85
                if (e - p <= MEMCHR_CUT_OFF)
86
                    break;
87
                e1 = p + MEMCHR_CUT_OFF;
88
                while (p != e1) {
89
                    if (*p == ch)
90
                        return (p - s);
91
                    p++;
92
                }
93
            }
94
            while (e - p > MEMCHR_CUT_OFF);
95
        }
96
#endif
97
25.2M
    }
98
295k
    while (p < e) {
99
260k
        if (*p == ch)
100
36.1k
            return (p - s);
101
224k
        p++;
102
224k
    }
103
34.5k
    return -1;
104
70.7k
}
unicodeobject.c:asciilib_find_char
Line
Count
Source
51
15.8M
{
52
15.8M
    const STRINGLIB_CHAR *p, *e;
53
54
15.8M
    p = s;
55
15.8M
    e = s + n;
56
15.8M
    if (n > MEMCHR_CUT_OFF) {
57
13.7M
#ifdef STRINGLIB_FAST_MEMCHR
58
13.7M
        p = STRINGLIB_FAST_MEMCHR(s, ch, n);
59
13.7M
        if (p != NULL)
60
13.2M
            return (p - s);
61
532k
        return -1;
62
#else
63
        /* use memchr if we can choose a needle without too many likely
64
           false positives */
65
        const STRINGLIB_CHAR *s1, *e1;
66
        unsigned char needle = ch & 0xff;
67
        /* If looking for a multiple of 256, we'd have too
68
           many false positives looking for the '\0' byte in UCS2
69
           and UCS4 representations. */
70
        if (needle != 0) {
71
            do {
72
                void *candidate = memchr(p, needle,
73
                                         (e - p) * sizeof(STRINGLIB_CHAR));
74
                if (candidate == NULL)
75
                    return -1;
76
                s1 = p;
77
                p = (const STRINGLIB_CHAR *)
78
                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
79
                if (*p == ch)
80
                    return (p - s);
81
                /* False positive */
82
                p++;
83
                if (p - s1 > MEMCHR_CUT_OFF)
84
                    continue;
85
                if (e - p <= MEMCHR_CUT_OFF)
86
                    break;
87
                e1 = p + MEMCHR_CUT_OFF;
88
                while (p != e1) {
89
                    if (*p == ch)
90
                        return (p - s);
91
                    p++;
92
                }
93
            }
94
            while (e - p > MEMCHR_CUT_OFF);
95
        }
96
#endif
97
13.7M
    }
98
5.72M
    while (p < e) {
99
5.67M
        if (*p == ch)
100
2.05M
            return (p - s);
101
3.62M
        p++;
102
3.62M
    }
103
43.7k
    return -1;
104
2.10M
}
bytes_methods.c:stringlib_find_char
Line
Count
Source
51
13.8M
{
52
13.8M
    const STRINGLIB_CHAR *p, *e;
53
54
13.8M
    p = s;
55
13.8M
    e = s + n;
56
13.8M
    if (n > MEMCHR_CUT_OFF) {
57
4.48M
#ifdef STRINGLIB_FAST_MEMCHR
58
4.48M
        p = STRINGLIB_FAST_MEMCHR(s, ch, n);
59
4.48M
        if (p != NULL)
60
4.43M
            return (p - s);
61
55.2k
        return -1;
62
#else
63
        /* use memchr if we can choose a needle without too many likely
64
           false positives */
65
        const STRINGLIB_CHAR *s1, *e1;
66
        unsigned char needle = ch & 0xff;
67
        /* If looking for a multiple of 256, we'd have too
68
           many false positives looking for the '\0' byte in UCS2
69
           and UCS4 representations. */
70
        if (needle != 0) {
71
            do {
72
                void *candidate = memchr(p, needle,
73
                                         (e - p) * sizeof(STRINGLIB_CHAR));
74
                if (candidate == NULL)
75
                    return -1;
76
                s1 = p;
77
                p = (const STRINGLIB_CHAR *)
78
                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
79
                if (*p == ch)
80
                    return (p - s);
81
                /* False positive */
82
                p++;
83
                if (p - s1 > MEMCHR_CUT_OFF)
84
                    continue;
85
                if (e - p <= MEMCHR_CUT_OFF)
86
                    break;
87
                e1 = p + MEMCHR_CUT_OFF;
88
                while (p != e1) {
89
                    if (*p == ch)
90
                        return (p - s);
91
                    p++;
92
                }
93
            }
94
            while (e - p > MEMCHR_CUT_OFF);
95
        }
96
#endif
97
4.48M
    }
98
73.3M
    while (p < e) {
99
70.4M
        if (*p == ch)
100
6.41M
            return (p - s);
101
64.0M
        p++;
102
64.0M
    }
103
2.96M
    return -1;
104
9.38M
}
Unexecuted instantiation: bytearrayobject.c:stringlib_find_char
105
106
#undef MEMCHR_CUT_OFF
107
108
#if STRINGLIB_SIZEOF_CHAR == 1
109
131k
#  define MEMRCHR_CUT_OFF 15
110
#else
111
419k
#  define MEMRCHR_CUT_OFF 40
112
#endif
113
114
115
Py_LOCAL_INLINE(Py_ssize_t)
116
STRINGLIB(rfind_char)(const STRINGLIB_CHAR* s, Py_ssize_t n, STRINGLIB_CHAR ch)
117
432k
{
118
432k
    const STRINGLIB_CHAR *p;
119
432k
#ifdef HAVE_MEMRCHR
120
    /* memrchr() is a GNU extension, available since glibc 2.1.91.  it
121
       doesn't seem as optimized as memchr(), but is still quite
122
       faster than our hand-written loop below. There is no wmemrchr
123
       for 4-byte chars. */
124
125
432k
    if (n > MEMRCHR_CUT_OFF) {
126
#if STRINGLIB_SIZEOF_CHAR == 1
127
        p = memrchr(s, ch, n);
128
85.5k
        if (p != NULL)
129
82.2k
            return (p - s);
130
3.38k
        return -1;
131
#else
132
        /* use memrchr if we can choose a needle without too many likely
133
           false positives */
134
        const STRINGLIB_CHAR *s1;
135
        Py_ssize_t n1;
136
        unsigned char needle = ch & 0xff;
137
        /* If looking for a multiple of 256, we'd have too
138
           many false positives looking for the '\0' byte in UCS2
139
           and UCS4 representations. */
140
245k
        if (needle != 0) {
141
283k
            do {
142
283k
                void *candidate = memrchr(s, needle,
143
283k
                                          n * sizeof(STRINGLIB_CHAR));
144
283k
                if (candidate == NULL)
145
1.37k
                    return -1;
146
281k
                n1 = n;
147
281k
                p = (const STRINGLIB_CHAR *)
148
281k
                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
149
281k
                n = p - s;
150
281k
                if (*p == ch)
151
241k
                    return n;
152
                /* False positive */
153
40.1k
                if (n1 - n > MEMRCHR_CUT_OFF)
154
19.6k
                    continue;
155
20.4k
                if (n <= MEMRCHR_CUT_OFF)
156
889
                    break;
157
19.5k
                s1 = p - MEMRCHR_CUT_OFF;
158
788k
                while (p > s1) {
159
769k
                    p--;
160
769k
                    if (*p == ch)
161
588
                        return (p - s);
162
769k
                }
163
18.9k
                n = p - s;
164
18.9k
            }
165
245k
            while (n > MEMRCHR_CUT_OFF);
166
245k
        }
167
#endif
168
330k
    }
169
103k
#endif  /* HAVE_MEMRCHR */
170
103k
    p = s + n;
171
549k
    while (p > s) {
172
518k
        p--;
173
518k
        if (*p == ch)
174
71.5k
            return (p - s);
175
518k
    }
176
31.6k
    return -1;
177
103k
}
Unexecuted instantiation: bytesobject.c:stringlib_rfind_char
unicodeobject.c:ucs1lib_rfind_char
Line
Count
Source
117
77.6k
{
118
77.6k
    const STRINGLIB_CHAR *p;
119
77.6k
#ifdef HAVE_MEMRCHR
120
    /* memrchr() is a GNU extension, available since glibc 2.1.91.  it
121
       doesn't seem as optimized as memchr(), but is still quite
122
       faster than our hand-written loop below. There is no wmemrchr
123
       for 4-byte chars. */
124
125
77.6k
    if (n > MEMRCHR_CUT_OFF) {
126
71.5k
#if STRINGLIB_SIZEOF_CHAR == 1
127
71.5k
        p = memrchr(s, ch, n);
128
71.5k
        if (p != NULL)
129
70.4k
            return (p - s);
130
1.12k
        return -1;
131
#else
132
        /* use memrchr if we can choose a needle without too many likely
133
           false positives */
134
        const STRINGLIB_CHAR *s1;
135
        Py_ssize_t n1;
136
        unsigned char needle = ch & 0xff;
137
        /* If looking for a multiple of 256, we'd have too
138
           many false positives looking for the '\0' byte in UCS2
139
           and UCS4 representations. */
140
        if (needle != 0) {
141
            do {
142
                void *candidate = memrchr(s, needle,
143
                                          n * sizeof(STRINGLIB_CHAR));
144
                if (candidate == NULL)
145
                    return -1;
146
                n1 = n;
147
                p = (const STRINGLIB_CHAR *)
148
                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
149
                n = p - s;
150
                if (*p == ch)
151
                    return n;
152
                /* False positive */
153
                if (n1 - n > MEMRCHR_CUT_OFF)
154
                    continue;
155
                if (n <= MEMRCHR_CUT_OFF)
156
                    break;
157
                s1 = p - MEMRCHR_CUT_OFF;
158
                while (p > s1) {
159
                    p--;
160
                    if (*p == ch)
161
                        return (p - s);
162
                }
163
                n = p - s;
164
            }
165
            while (n > MEMRCHR_CUT_OFF);
166
        }
167
#endif
168
71.5k
    }
169
6.04k
#endif  /* HAVE_MEMRCHR */
170
6.04k
    p = s + n;
171
17.8k
    while (p > s) {
172
16.7k
        p--;
173
16.7k
        if (*p == ch)
174
4.94k
            return (p - s);
175
16.7k
    }
176
1.10k
    return -1;
177
6.04k
}
unicodeobject.c:ucs2lib_rfind_char
Line
Count
Source
117
200k
{
118
200k
    const STRINGLIB_CHAR *p;
119
200k
#ifdef HAVE_MEMRCHR
120
    /* memrchr() is a GNU extension, available since glibc 2.1.91.  it
121
       doesn't seem as optimized as memchr(), but is still quite
122
       faster than our hand-written loop below. There is no wmemrchr
123
       for 4-byte chars. */
124
125
200k
    if (n > MEMRCHR_CUT_OFF) {
126
#if STRINGLIB_SIZEOF_CHAR == 1
127
        p = memrchr(s, ch, n);
128
        if (p != NULL)
129
            return (p - s);
130
        return -1;
131
#else
132
        /* use memrchr if we can choose a needle without too many likely
133
           false positives */
134
176k
        const STRINGLIB_CHAR *s1;
135
176k
        Py_ssize_t n1;
136
176k
        unsigned char needle = ch & 0xff;
137
        /* If looking for a multiple of 256, we'd have too
138
           many false positives looking for the '\0' byte in UCS2
139
           and UCS4 representations. */
140
176k
        if (needle != 0) {
141
179k
            do {
142
179k
                void *candidate = memrchr(s, needle,
143
179k
                                          n * sizeof(STRINGLIB_CHAR));
144
179k
                if (candidate == NULL)
145
770
                    return -1;
146
179k
                n1 = n;
147
179k
                p = (const STRINGLIB_CHAR *)
148
179k
                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
149
179k
                n = p - s;
150
179k
                if (*p == ch)
151
175k
                    return n;
152
                /* False positive */
153
3.91k
                if (n1 - n > MEMRCHR_CUT_OFF)
154
1.71k
                    continue;
155
2.20k
                if (n <= MEMRCHR_CUT_OFF)
156
460
                    break;
157
1.74k
                s1 = p - MEMRCHR_CUT_OFF;
158
64.5k
                while (p > s1) {
159
63.0k
                    p--;
160
63.0k
                    if (*p == ch)
161
266
                        return (p - s);
162
63.0k
                }
163
1.47k
                n = p - s;
164
1.47k
            }
165
176k
            while (n > MEMRCHR_CUT_OFF);
166
176k
        }
167
176k
#endif
168
176k
    }
169
24.0k
#endif  /* HAVE_MEMRCHR */
170
24.0k
    p = s + n;
171
89.0k
    while (p > s) {
172
86.9k
        p--;
173
86.9k
        if (*p == ch)
174
21.9k
            return (p - s);
175
86.9k
    }
176
2.12k
    return -1;
177
24.0k
}
unicodeobject.c:ucs4lib_rfind_char
Line
Count
Source
117
100k
{
118
100k
    const STRINGLIB_CHAR *p;
119
100k
#ifdef HAVE_MEMRCHR
120
    /* memrchr() is a GNU extension, available since glibc 2.1.91.  it
121
       doesn't seem as optimized as memchr(), but is still quite
122
       faster than our hand-written loop below. There is no wmemrchr
123
       for 4-byte chars. */
124
125
100k
    if (n > MEMRCHR_CUT_OFF) {
126
#if STRINGLIB_SIZEOF_CHAR == 1
127
        p = memrchr(s, ch, n);
128
        if (p != NULL)
129
            return (p - s);
130
        return -1;
131
#else
132
        /* use memrchr if we can choose a needle without too many likely
133
           false positives */
134
68.0k
        const STRINGLIB_CHAR *s1;
135
68.0k
        Py_ssize_t n1;
136
68.0k
        unsigned char needle = ch & 0xff;
137
        /* If looking for a multiple of 256, we'd have too
138
           many false positives looking for the '\0' byte in UCS2
139
           and UCS4 representations. */
140
68.0k
        if (needle != 0) {
141
103k
            do {
142
103k
                void *candidate = memrchr(s, needle,
143
103k
                                          n * sizeof(STRINGLIB_CHAR));
144
103k
                if (candidate == NULL)
145
609
                    return -1;
146
102k
                n1 = n;
147
102k
                p = (const STRINGLIB_CHAR *)
148
102k
                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
149
102k
                n = p - s;
150
102k
                if (*p == ch)
151
66.3k
                    return n;
152
                /* False positive */
153
36.2k
                if (n1 - n > MEMRCHR_CUT_OFF)
154
17.9k
                    continue;
155
18.2k
                if (n <= MEMRCHR_CUT_OFF)
156
429
                    break;
157
17.8k
                s1 = p - MEMRCHR_CUT_OFF;
158
723k
                while (p > s1) {
159
706k
                    p--;
160
706k
                    if (*p == ch)
161
322
                        return (p - s);
162
706k
                }
163
17.5k
                n = p - s;
164
17.5k
            }
165
68.0k
            while (n > MEMRCHR_CUT_OFF);
166
68.0k
        }
167
68.0k
#endif
168
68.0k
    }
169
32.7k
#endif  /* HAVE_MEMRCHR */
170
32.7k
    p = s + n;
171
224k
    while (p > s) {
172
222k
        p--;
173
222k
        if (*p == ch)
174
31.0k
            return (p - s);
175
222k
    }
176
1.72k
    return -1;
177
32.7k
}
unicodeobject.c:asciilib_rfind_char
Line
Count
Source
117
33.5k
{
118
33.5k
    const STRINGLIB_CHAR *p;
119
33.5k
#ifdef HAVE_MEMRCHR
120
    /* memrchr() is a GNU extension, available since glibc 2.1.91.  it
121
       doesn't seem as optimized as memchr(), but is still quite
122
       faster than our hand-written loop below. There is no wmemrchr
123
       for 4-byte chars. */
124
125
33.5k
    if (n > MEMRCHR_CUT_OFF) {
126
11.1k
#if STRINGLIB_SIZEOF_CHAR == 1
127
11.1k
        p = memrchr(s, ch, n);
128
11.1k
        if (p != NULL)
129
10.9k
            return (p - s);
130
210
        return -1;
131
#else
132
        /* use memrchr if we can choose a needle without too many likely
133
           false positives */
134
        const STRINGLIB_CHAR *s1;
135
        Py_ssize_t n1;
136
        unsigned char needle = ch & 0xff;
137
        /* If looking for a multiple of 256, we'd have too
138
           many false positives looking for the '\0' byte in UCS2
139
           and UCS4 representations. */
140
        if (needle != 0) {
141
            do {
142
                void *candidate = memrchr(s, needle,
143
                                          n * sizeof(STRINGLIB_CHAR));
144
                if (candidate == NULL)
145
                    return -1;
146
                n1 = n;
147
                p = (const STRINGLIB_CHAR *)
148
                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
149
                n = p - s;
150
                if (*p == ch)
151
                    return n;
152
                /* False positive */
153
                if (n1 - n > MEMRCHR_CUT_OFF)
154
                    continue;
155
                if (n <= MEMRCHR_CUT_OFF)
156
                    break;
157
                s1 = p - MEMRCHR_CUT_OFF;
158
                while (p > s1) {
159
                    p--;
160
                    if (*p == ch)
161
                        return (p - s);
162
                }
163
                n = p - s;
164
            }
165
            while (n > MEMRCHR_CUT_OFF);
166
        }
167
#endif
168
11.1k
    }
169
22.3k
#endif  /* HAVE_MEMRCHR */
170
22.3k
    p = s + n;
171
128k
    while (p > s) {
172
114k
        p--;
173
114k
        if (*p == ch)
174
8.56k
            return (p - s);
175
114k
    }
176
13.7k
    return -1;
177
22.3k
}
bytes_methods.c:stringlib_rfind_char
Line
Count
Source
117
20.7k
{
118
20.7k
    const STRINGLIB_CHAR *p;
119
20.7k
#ifdef HAVE_MEMRCHR
120
    /* memrchr() is a GNU extension, available since glibc 2.1.91.  it
121
       doesn't seem as optimized as memchr(), but is still quite
122
       faster than our hand-written loop below. There is no wmemrchr
123
       for 4-byte chars. */
124
125
20.7k
    if (n > MEMRCHR_CUT_OFF) {
126
2.84k
#if STRINGLIB_SIZEOF_CHAR == 1
127
2.84k
        p = memrchr(s, ch, n);
128
2.84k
        if (p != NULL)
129
799
            return (p - s);
130
2.04k
        return -1;
131
#else
132
        /* use memrchr if we can choose a needle without too many likely
133
           false positives */
134
        const STRINGLIB_CHAR *s1;
135
        Py_ssize_t n1;
136
        unsigned char needle = ch & 0xff;
137
        /* If looking for a multiple of 256, we'd have too
138
           many false positives looking for the '\0' byte in UCS2
139
           and UCS4 representations. */
140
        if (needle != 0) {
141
            do {
142
                void *candidate = memrchr(s, needle,
143
                                          n * sizeof(STRINGLIB_CHAR));
144
                if (candidate == NULL)
145
                    return -1;
146
                n1 = n;
147
                p = (const STRINGLIB_CHAR *)
148
                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
149
                n = p - s;
150
                if (*p == ch)
151
                    return n;
152
                /* False positive */
153
                if (n1 - n > MEMRCHR_CUT_OFF)
154
                    continue;
155
                if (n <= MEMRCHR_CUT_OFF)
156
                    break;
157
                s1 = p - MEMRCHR_CUT_OFF;
158
                while (p > s1) {
159
                    p--;
160
                    if (*p == ch)
161
                        return (p - s);
162
                }
163
                n = p - s;
164
            }
165
            while (n > MEMRCHR_CUT_OFF);
166
        }
167
#endif
168
2.84k
    }
169
17.8k
#endif  /* HAVE_MEMRCHR */
170
17.8k
    p = s + n;
171
90.1k
    while (p > s) {
172
77.2k
        p--;
173
77.2k
        if (*p == ch)
174
5.02k
            return (p - s);
175
77.2k
    }
176
12.8k
    return -1;
177
17.8k
}
Unexecuted instantiation: bytearrayobject.c:stringlib_rfind_char
178
179
#undef MEMRCHR_CUT_OFF
180
181
/* Change to a 1 to see logging comments walk through the algorithm. */
182
#if 0 && STRINGLIB_SIZEOF_CHAR == 1
183
# define LOG(...) printf(__VA_ARGS__)
184
# define LOG_STRING(s, n) printf("\"%.*s\"", (int)(n), s)
185
# define LOG_LINEUP() do {                                         \
186
    LOG("> "); LOG_STRING(haystack, len_haystack); LOG("\n> ");    \
187
    LOG("%*s",(int)(window_last - haystack + 1 - len_needle), ""); \
188
    LOG_STRING(needle, len_needle); LOG("\n");                     \
189
} while(0)
190
#else
191
# define LOG(...)
192
# define LOG_STRING(s, n)
193
# define LOG_LINEUP()
194
#endif
195
196
Py_LOCAL_INLINE(Py_ssize_t)
197
STRINGLIB(_lex_search)(const STRINGLIB_CHAR *needle, Py_ssize_t len_needle,
198
                       Py_ssize_t *return_period, int invert_alphabet)
199
46
{
200
    /* Do a lexicographic search. Essentially this:
201
           >>> max(needle[i:] for i in range(len(needle)+1))
202
       Also find the period of the right half.   */
203
46
    Py_ssize_t max_suffix = 0;
204
46
    Py_ssize_t candidate = 1;
205
46
    Py_ssize_t k = 0;
206
    // The period of the right half.
207
46
    Py_ssize_t period = 1;
208
209
460
    while (candidate + k < len_needle) {
210
        // each loop increases candidate + k + max_suffix
211
414
        STRINGLIB_CHAR a = needle[candidate + k];
212
414
        STRINGLIB_CHAR b = needle[max_suffix + k];
213
        // check if the suffix at candidate is better than max_suffix
214
414
        if (invert_alphabet ? (b < a) : (a < b)) {
215
            // Fell short of max_suffix.
216
            // The next k + 1 characters are non-increasing
217
            // from candidate, so they won't start a maximal suffix.
218
299
            candidate += k + 1;
219
299
            k = 0;
220
            // We've ruled out any period smaller than what's
221
            // been scanned since max_suffix.
222
299
            period = candidate - max_suffix;
223
299
        }
224
115
        else if (a == b) {
225
23
            if (k + 1 != period) {
226
                // Keep scanning the equal strings
227
0
                k++;
228
0
            }
229
23
            else {
230
                // Matched a whole period.
231
                // Start matching the next period.
232
23
                candidate += period;
233
23
                k = 0;
234
23
            }
235
23
        }
236
92
        else {
237
            // Did better than max_suffix, so replace it.
238
92
            max_suffix = candidate;
239
92
            candidate++;
240
92
            k = 0;
241
92
            period = 1;
242
92
        }
243
414
    }
244
46
    *return_period = period;
245
46
    return max_suffix;
246
46
}
Unexecuted instantiation: bytesobject.c:stringlib__lex_search
Unexecuted instantiation: unicodeobject.c:asciilib__lex_search
unicodeobject.c:ucs1lib__lex_search
Line
Count
Source
199
46
{
200
    /* Do a lexicographic search. Essentially this:
201
           >>> max(needle[i:] for i in range(len(needle)+1))
202
       Also find the period of the right half.   */
203
46
    Py_ssize_t max_suffix = 0;
204
46
    Py_ssize_t candidate = 1;
205
46
    Py_ssize_t k = 0;
206
    // The period of the right half.
207
46
    Py_ssize_t period = 1;
208
209
460
    while (candidate + k < len_needle) {
210
        // each loop increases candidate + k + max_suffix
211
414
        STRINGLIB_CHAR a = needle[candidate + k];
212
414
        STRINGLIB_CHAR b = needle[max_suffix + k];
213
        // check if the suffix at candidate is better than max_suffix
214
414
        if (invert_alphabet ? (b < a) : (a < b)) {
215
            // Fell short of max_suffix.
216
            // The next k + 1 characters are non-increasing
217
            // from candidate, so they won't start a maximal suffix.
218
299
            candidate += k + 1;
219
299
            k = 0;
220
            // We've ruled out any period smaller than what's
221
            // been scanned since max_suffix.
222
299
            period = candidate - max_suffix;
223
299
        }
224
115
        else if (a == b) {
225
23
            if (k + 1 != period) {
226
                // Keep scanning the equal strings
227
0
                k++;
228
0
            }
229
23
            else {
230
                // Matched a whole period.
231
                // Start matching the next period.
232
23
                candidate += period;
233
23
                k = 0;
234
23
            }
235
23
        }
236
92
        else {
237
            // Did better than max_suffix, so replace it.
238
92
            max_suffix = candidate;
239
92
            candidate++;
240
92
            k = 0;
241
92
            period = 1;
242
92
        }
243
414
    }
244
46
    *return_period = period;
245
46
    return max_suffix;
246
46
}
Unexecuted instantiation: unicodeobject.c:ucs2lib__lex_search
Unexecuted instantiation: unicodeobject.c:ucs4lib__lex_search
Unexecuted instantiation: bytes_methods.c:stringlib__lex_search
Unexecuted instantiation: bytearrayobject.c:stringlib__lex_search
247
248
Py_LOCAL_INLINE(Py_ssize_t)
249
STRINGLIB(_factorize)(const STRINGLIB_CHAR *needle,
250
                      Py_ssize_t len_needle,
251
                      Py_ssize_t *return_period)
252
23
{
253
    /* Do a "critical factorization", making it so that:
254
       >>> needle = (left := needle[:cut]) + (right := needle[cut:])
255
       where the "local period" of the cut is maximal.
256
257
       The local period of the cut is the minimal length of a string w
258
       such that (left endswith w or w endswith left)
259
       and (right startswith w or w startswith right).
260
261
       The Critical Factorization Theorem says that this maximal local
262
       period is the global period of the string.
263
264
       Crochemore and Perrin (1991) show that this cut can be computed
265
       as the later of two cuts: one that gives a lexicographically
266
       maximal right half, and one that gives the same with the
267
       with respect to a reversed alphabet-ordering.
268
269
       This is what we want to happen:
270
           >>> x = "GCAGAGAG"
271
           >>> cut, period = factorize(x)
272
           >>> x[:cut], (right := x[cut:])
273
           ('GC', 'AGAGAG')
274
           >>> period  # right half period
275
           2
276
           >>> right[period:] == right[:-period]
277
           True
278
279
       This is how the local period lines up in the above example:
280
                GC | AGAGAG
281
           AGAGAGC = AGAGAGC
282
       The length of this minimal repetition is 7, which is indeed the
283
       period of the original string. */
284
285
23
    Py_ssize_t cut1, period1, cut2, period2, cut, period;
286
23
    cut1 = STRINGLIB(_lex_search)(needle, len_needle, &period1, 0);
287
23
    cut2 = STRINGLIB(_lex_search)(needle, len_needle, &period2, 1);
288
289
    // Take the later cut.
290
23
    if (cut1 > cut2) {
291
23
        period = period1;
292
23
        cut = cut1;
293
23
    }
294
0
    else {
295
0
        period = period2;
296
0
        cut = cut2;
297
0
    }
298
299
23
    LOG("split: "); LOG_STRING(needle, cut);
300
23
    LOG(" + "); LOG_STRING(needle + cut, len_needle - cut);
301
23
    LOG("\n");
302
303
23
    *return_period = period;
304
23
    return cut;
305
23
}
Unexecuted instantiation: bytesobject.c:stringlib__factorize
Unexecuted instantiation: unicodeobject.c:asciilib__factorize
unicodeobject.c:ucs1lib__factorize
Line
Count
Source
252
23
{
253
    /* Do a "critical factorization", making it so that:
254
       >>> needle = (left := needle[:cut]) + (right := needle[cut:])
255
       where the "local period" of the cut is maximal.
256
257
       The local period of the cut is the minimal length of a string w
258
       such that (left endswith w or w endswith left)
259
       and (right startswith w or w startswith right).
260
261
       The Critical Factorization Theorem says that this maximal local
262
       period is the global period of the string.
263
264
       Crochemore and Perrin (1991) show that this cut can be computed
265
       as the later of two cuts: one that gives a lexicographically
266
       maximal right half, and one that gives the same with the
267
       with respect to a reversed alphabet-ordering.
268
269
       This is what we want to happen:
270
           >>> x = "GCAGAGAG"
271
           >>> cut, period = factorize(x)
272
           >>> x[:cut], (right := x[cut:])
273
           ('GC', 'AGAGAG')
274
           >>> period  # right half period
275
           2
276
           >>> right[period:] == right[:-period]
277
           True
278
279
       This is how the local period lines up in the above example:
280
                GC | AGAGAG
281
           AGAGAGC = AGAGAGC
282
       The length of this minimal repetition is 7, which is indeed the
283
       period of the original string. */
284
285
23
    Py_ssize_t cut1, period1, cut2, period2, cut, period;
286
23
    cut1 = STRINGLIB(_lex_search)(needle, len_needle, &period1, 0);
287
23
    cut2 = STRINGLIB(_lex_search)(needle, len_needle, &period2, 1);
288
289
    // Take the later cut.
290
23
    if (cut1 > cut2) {
291
23
        period = period1;
292
23
        cut = cut1;
293
23
    }
294
0
    else {
295
0
        period = period2;
296
0
        cut = cut2;
297
0
    }
298
299
23
    LOG("split: "); LOG_STRING(needle, cut);
300
23
    LOG(" + "); LOG_STRING(needle + cut, len_needle - cut);
301
23
    LOG("\n");
302
303
23
    *return_period = period;
304
23
    return cut;
305
23
}
Unexecuted instantiation: unicodeobject.c:ucs2lib__factorize
Unexecuted instantiation: unicodeobject.c:ucs4lib__factorize
Unexecuted instantiation: bytes_methods.c:stringlib__factorize
Unexecuted instantiation: bytearrayobject.c:stringlib__factorize
306
307
308
253
#define SHIFT_TYPE uint8_t
309
#define MAX_SHIFT UINT8_MAX
310
311
83.9k
#define TABLE_SIZE_BITS 6u
312
83.9k
#define TABLE_SIZE (1U << TABLE_SIZE_BITS)
313
82.4k
#define TABLE_MASK (TABLE_SIZE - 1U)
314
315
typedef struct STRINGLIB(_pre) {
316
    const STRINGLIB_CHAR *needle;
317
    Py_ssize_t len_needle;
318
    Py_ssize_t cut;
319
    Py_ssize_t period;
320
    Py_ssize_t gap;
321
    int is_periodic;
322
    SHIFT_TYPE table[TABLE_SIZE];
323
} STRINGLIB(prework);
324
325
326
static void
327
STRINGLIB(_preprocess)(const STRINGLIB_CHAR *needle, Py_ssize_t len_needle,
328
                       STRINGLIB(prework) *p)
329
23
{
330
23
    p->needle = needle;
331
23
    p->len_needle = len_needle;
332
23
    p->cut = STRINGLIB(_factorize)(needle, len_needle, &(p->period));
333
23
    assert(p->period + p->cut <= len_needle);
334
23
    p->is_periodic = (0 == memcmp(needle,
335
23
                                  needle + p->period,
336
23
                                  p->cut * STRINGLIB_SIZEOF_CHAR));
337
23
    if (p->is_periodic) {
338
0
        assert(p->cut <= len_needle/2);
339
0
        assert(p->cut < p->period);
340
0
    }
341
23
    else {
342
        // A lower bound on the period
343
23
        p->period = Py_MAX(p->cut, len_needle - p->cut) + 1;
344
23
    }
345
    // The gap between the last character and the previous
346
    // occurrence of an equivalent character (modulo TABLE_SIZE)
347
23
    p->gap = len_needle;
348
23
    STRINGLIB_CHAR last = needle[len_needle - 1] & TABLE_MASK;
349
161
    for (Py_ssize_t i = len_needle - 2; i >= 0; i--) {
350
161
        STRINGLIB_CHAR x = needle[i] & TABLE_MASK;
351
161
        if (x == last) {
352
23
            p->gap = len_needle - 1 - i;
353
23
            break;
354
23
        }
355
161
    }
356
    // Fill up a compressed Boyer-Moore "Bad Character" table
357
23
    Py_ssize_t not_found_shift = Py_MIN(len_needle, MAX_SHIFT);
358
1.49k
    for (Py_ssize_t i = 0; i < (Py_ssize_t)TABLE_SIZE; i++) {
359
1.47k
        p->table[i] = Py_SAFE_DOWNCAST(not_found_shift,
360
1.47k
                                       Py_ssize_t, SHIFT_TYPE);
361
1.47k
    }
362
253
    for (Py_ssize_t i = len_needle - not_found_shift; i < len_needle; i++) {
363
230
        SHIFT_TYPE shift = Py_SAFE_DOWNCAST(len_needle - 1 - i,
364
230
                                            Py_ssize_t, SHIFT_TYPE);
365
230
        p->table[needle[i] & TABLE_MASK] = shift;
366
230
    }
367
23
}
Unexecuted instantiation: bytesobject.c:stringlib__preprocess
Unexecuted instantiation: unicodeobject.c:asciilib__preprocess
unicodeobject.c:ucs1lib__preprocess
Line
Count
Source
329
23
{
330
23
    p->needle = needle;
331
23
    p->len_needle = len_needle;
332
23
    p->cut = STRINGLIB(_factorize)(needle, len_needle, &(p->period));
333
23
    assert(p->period + p->cut <= len_needle);
334
23
    p->is_periodic = (0 == memcmp(needle,
335
23
                                  needle + p->period,
336
23
                                  p->cut * STRINGLIB_SIZEOF_CHAR));
337
23
    if (p->is_periodic) {
338
0
        assert(p->cut <= len_needle/2);
339
0
        assert(p->cut < p->period);
340
0
    }
341
23
    else {
342
        // A lower bound on the period
343
23
        p->period = Py_MAX(p->cut, len_needle - p->cut) + 1;
344
23
    }
345
    // The gap between the last character and the previous
346
    // occurrence of an equivalent character (modulo TABLE_SIZE)
347
23
    p->gap = len_needle;
348
23
    STRINGLIB_CHAR last = needle[len_needle - 1] & TABLE_MASK;
349
161
    for (Py_ssize_t i = len_needle - 2; i >= 0; i--) {
350
161
        STRINGLIB_CHAR x = needle[i] & TABLE_MASK;
351
161
        if (x == last) {
352
23
            p->gap = len_needle - 1 - i;
353
23
            break;
354
23
        }
355
161
    }
356
    // Fill up a compressed Boyer-Moore "Bad Character" table
357
23
    Py_ssize_t not_found_shift = Py_MIN(len_needle, MAX_SHIFT);
358
1.49k
    for (Py_ssize_t i = 0; i < (Py_ssize_t)TABLE_SIZE; i++) {
359
1.47k
        p->table[i] = Py_SAFE_DOWNCAST(not_found_shift,
360
1.47k
                                       Py_ssize_t, SHIFT_TYPE);
361
1.47k
    }
362
253
    for (Py_ssize_t i = len_needle - not_found_shift; i < len_needle; i++) {
363
230
        SHIFT_TYPE shift = Py_SAFE_DOWNCAST(len_needle - 1 - i,
364
230
                                            Py_ssize_t, SHIFT_TYPE);
365
230
        p->table[needle[i] & TABLE_MASK] = shift;
366
230
    }
367
23
}
Unexecuted instantiation: unicodeobject.c:ucs2lib__preprocess
Unexecuted instantiation: unicodeobject.c:ucs4lib__preprocess
Unexecuted instantiation: bytes_methods.c:stringlib__preprocess
Unexecuted instantiation: bytearrayobject.c:stringlib__preprocess
368
369
static Py_ssize_t
370
STRINGLIB(_two_way)(const STRINGLIB_CHAR *haystack, Py_ssize_t len_haystack,
371
                    STRINGLIB(prework) *p)
372
23
{
373
    // Crochemore and Perrin's (1991) Two-Way algorithm.
374
    // See http://www-igm.univ-mlv.fr/~lecroq/string/node26.html#SECTION00260
375
23
    const Py_ssize_t len_needle = p->len_needle;
376
23
    const Py_ssize_t cut = p->cut;
377
23
    Py_ssize_t period = p->period;
378
23
    const STRINGLIB_CHAR *const needle = p->needle;
379
23
    const STRINGLIB_CHAR *window_last = haystack + len_needle - 1;
380
23
    const STRINGLIB_CHAR *const haystack_end = haystack + len_haystack;
381
23
    SHIFT_TYPE *table = p->table;
382
23
    const STRINGLIB_CHAR *window;
383
23
    LOG("===== Two-way: \"%s\" in \"%s\". =====\n", needle, haystack);
384
385
23
    Py_ssize_t gap = p->gap;
386
23
    Py_ssize_t gap_jump_end = Py_MIN(len_needle, cut + gap);
387
23
    if (p->is_periodic) {
388
0
        LOG("Needle is periodic.\n");
389
0
        Py_ssize_t memory = 0;
390
0
      periodicwindowloop:
391
0
        while (window_last < haystack_end) {
392
0
            assert(memory == 0);
393
0
            for (;;) {
394
0
                LOG_LINEUP();
395
0
                Py_ssize_t shift = table[(*window_last) & TABLE_MASK];
396
0
                window_last += shift;
397
0
                if (shift == 0) {
398
0
                    break;
399
0
                }
400
0
                if (window_last >= haystack_end) {
401
0
                    return -1;
402
0
                }
403
0
                LOG("Horspool skip\n");
404
0
            }
405
0
          no_shift:
406
0
            window = window_last - len_needle + 1;
407
0
            assert((window[len_needle - 1] & TABLE_MASK) ==
408
0
                   (needle[len_needle - 1] & TABLE_MASK));
409
0
            Py_ssize_t i = Py_MAX(cut, memory);
410
0
            for (; i < len_needle; i++) {
411
0
                if (needle[i] != window[i]) {
412
0
                    if (i < gap_jump_end) {
413
0
                        LOG("Early right half mismatch: jump by gap.\n");
414
0
                        assert(gap >= i - cut + 1);
415
0
                        window_last += gap;
416
0
                    }
417
0
                    else {
418
0
                        LOG("Late right half mismatch: jump by n (>gap)\n");
419
0
                        assert(i - cut + 1 > gap);
420
0
                        window_last += i - cut + 1;
421
0
                    }
422
0
                    memory = 0;
423
0
                    goto periodicwindowloop;
424
0
                }
425
0
            }
426
0
            for (i = memory; i < cut; i++) {
427
0
                if (needle[i] != window[i]) {
428
0
                    LOG("Left half does not match.\n");
429
0
                    window_last += period;
430
0
                    memory = len_needle - period;
431
0
                    if (window_last >= haystack_end) {
432
0
                        return -1;
433
0
                    }
434
0
                    Py_ssize_t shift = table[(*window_last) & TABLE_MASK];
435
0
                    if (shift) {
436
                        // A mismatch has been identified to the right
437
                        // of where i will next start, so we can jump
438
                        // at least as far as if the mismatch occurred
439
                        // on the first comparison.
440
0
                        Py_ssize_t mem_jump = Py_MAX(cut, memory) - cut + 1;
441
0
                        LOG("Skip with Memory.\n");
442
0
                        memory = 0;
443
0
                        window_last += Py_MAX(shift, mem_jump);
444
0
                        goto periodicwindowloop;
445
0
                    }
446
0
                    goto no_shift;
447
0
                }
448
0
            }
449
0
            LOG("Found a match!\n");
450
0
            return window - haystack;
451
0
        }
452
0
    }
453
23
    else {
454
23
        period = Py_MAX(gap, period);
455
23
        LOG("Needle is not periodic.\n");
456
14.1k
      windowloop:
457
14.1k
        while (window_last < haystack_end) {
458
81.9k
            for (;;) {
459
81.9k
                LOG_LINEUP();
460
81.9k
                Py_ssize_t shift = table[(*window_last) & TABLE_MASK];
461
81.9k
                window_last += shift;
462
81.9k
                if (shift == 0) {
463
14.0k
                    break;
464
14.0k
                }
465
67.9k
                if (window_last >= haystack_end) {
466
19
                    return -1;
467
19
                }
468
67.8k
                LOG("Horspool skip\n");
469
67.8k
            }
470
14.0k
            window = window_last - len_needle + 1;
471
14.0k
            assert((window[len_needle - 1] & TABLE_MASK) ==
472
14.0k
                   (needle[len_needle - 1] & TABLE_MASK));
473
14.0k
            Py_ssize_t i = cut;
474
14.3k
            for (; i < len_needle; i++) {
475
14.2k
                if (needle[i] != window[i]) {
476
13.9k
                    if (i < gap_jump_end) {
477
13.9k
                        LOG("Early right half mismatch: jump by gap.\n");
478
13.9k
                        assert(gap >= i - cut + 1);
479
13.9k
                        window_last += gap;
480
13.9k
                    }
481
0
                    else {
482
0
                        LOG("Late right half mismatch: jump by n (>gap)\n");
483
0
                        assert(i - cut + 1 > gap);
484
0
                        window_last += i - cut + 1;
485
0
                    }
486
13.9k
                    goto windowloop;
487
13.9k
                }
488
14.2k
            }
489
226
            for (Py_ssize_t i = 0; i < cut; i++) {
490
224
                if (needle[i] != window[i]) {
491
128
                    LOG("Left half does not match.\n");
492
128
                    window_last += period;
493
128
                    goto windowloop;
494
128
                }
495
224
            }
496
2
            LOG("Found a match!\n");
497
2
            return window - haystack;
498
130
        }
499
14.1k
    }
500
2
    LOG("Not found. Returning -1.\n");
501
2
    return -1;
502
23
}
Unexecuted instantiation: bytesobject.c:stringlib__two_way
Unexecuted instantiation: unicodeobject.c:asciilib__two_way
unicodeobject.c:ucs1lib__two_way
Line
Count
Source
372
23
{
373
    // Crochemore and Perrin's (1991) Two-Way algorithm.
374
    // See http://www-igm.univ-mlv.fr/~lecroq/string/node26.html#SECTION00260
375
23
    const Py_ssize_t len_needle = p->len_needle;
376
23
    const Py_ssize_t cut = p->cut;
377
23
    Py_ssize_t period = p->period;
378
23
    const STRINGLIB_CHAR *const needle = p->needle;
379
23
    const STRINGLIB_CHAR *window_last = haystack + len_needle - 1;
380
23
    const STRINGLIB_CHAR *const haystack_end = haystack + len_haystack;
381
23
    SHIFT_TYPE *table = p->table;
382
23
    const STRINGLIB_CHAR *window;
383
23
    LOG("===== Two-way: \"%s\" in \"%s\". =====\n", needle, haystack);
384
385
23
    Py_ssize_t gap = p->gap;
386
23
    Py_ssize_t gap_jump_end = Py_MIN(len_needle, cut + gap);
387
23
    if (p->is_periodic) {
388
0
        LOG("Needle is periodic.\n");
389
0
        Py_ssize_t memory = 0;
390
0
      periodicwindowloop:
391
0
        while (window_last < haystack_end) {
392
0
            assert(memory == 0);
393
0
            for (;;) {
394
0
                LOG_LINEUP();
395
0
                Py_ssize_t shift = table[(*window_last) & TABLE_MASK];
396
0
                window_last += shift;
397
0
                if (shift == 0) {
398
0
                    break;
399
0
                }
400
0
                if (window_last >= haystack_end) {
401
0
                    return -1;
402
0
                }
403
0
                LOG("Horspool skip\n");
404
0
            }
405
0
          no_shift:
406
0
            window = window_last - len_needle + 1;
407
0
            assert((window[len_needle - 1] & TABLE_MASK) ==
408
0
                   (needle[len_needle - 1] & TABLE_MASK));
409
0
            Py_ssize_t i = Py_MAX(cut, memory);
410
0
            for (; i < len_needle; i++) {
411
0
                if (needle[i] != window[i]) {
412
0
                    if (i < gap_jump_end) {
413
0
                        LOG("Early right half mismatch: jump by gap.\n");
414
0
                        assert(gap >= i - cut + 1);
415
0
                        window_last += gap;
416
0
                    }
417
0
                    else {
418
0
                        LOG("Late right half mismatch: jump by n (>gap)\n");
419
0
                        assert(i - cut + 1 > gap);
420
0
                        window_last += i - cut + 1;
421
0
                    }
422
0
                    memory = 0;
423
0
                    goto periodicwindowloop;
424
0
                }
425
0
            }
426
0
            for (i = memory; i < cut; i++) {
427
0
                if (needle[i] != window[i]) {
428
0
                    LOG("Left half does not match.\n");
429
0
                    window_last += period;
430
0
                    memory = len_needle - period;
431
0
                    if (window_last >= haystack_end) {
432
0
                        return -1;
433
0
                    }
434
0
                    Py_ssize_t shift = table[(*window_last) & TABLE_MASK];
435
0
                    if (shift) {
436
                        // A mismatch has been identified to the right
437
                        // of where i will next start, so we can jump
438
                        // at least as far as if the mismatch occurred
439
                        // on the first comparison.
440
0
                        Py_ssize_t mem_jump = Py_MAX(cut, memory) - cut + 1;
441
0
                        LOG("Skip with Memory.\n");
442
0
                        memory = 0;
443
0
                        window_last += Py_MAX(shift, mem_jump);
444
0
                        goto periodicwindowloop;
445
0
                    }
446
0
                    goto no_shift;
447
0
                }
448
0
            }
449
0
            LOG("Found a match!\n");
450
0
            return window - haystack;
451
0
        }
452
0
    }
453
23
    else {
454
23
        period = Py_MAX(gap, period);
455
23
        LOG("Needle is not periodic.\n");
456
14.1k
      windowloop:
457
14.1k
        while (window_last < haystack_end) {
458
81.9k
            for (;;) {
459
81.9k
                LOG_LINEUP();
460
81.9k
                Py_ssize_t shift = table[(*window_last) & TABLE_MASK];
461
81.9k
                window_last += shift;
462
81.9k
                if (shift == 0) {
463
14.0k
                    break;
464
14.0k
                }
465
67.9k
                if (window_last >= haystack_end) {
466
19
                    return -1;
467
19
                }
468
67.8k
                LOG("Horspool skip\n");
469
67.8k
            }
470
14.0k
            window = window_last - len_needle + 1;
471
14.0k
            assert((window[len_needle - 1] & TABLE_MASK) ==
472
14.0k
                   (needle[len_needle - 1] & TABLE_MASK));
473
14.0k
            Py_ssize_t i = cut;
474
14.3k
            for (; i < len_needle; i++) {
475
14.2k
                if (needle[i] != window[i]) {
476
13.9k
                    if (i < gap_jump_end) {
477
13.9k
                        LOG("Early right half mismatch: jump by gap.\n");
478
13.9k
                        assert(gap >= i - cut + 1);
479
13.9k
                        window_last += gap;
480
13.9k
                    }
481
0
                    else {
482
0
                        LOG("Late right half mismatch: jump by n (>gap)\n");
483
0
                        assert(i - cut + 1 > gap);
484
0
                        window_last += i - cut + 1;
485
0
                    }
486
13.9k
                    goto windowloop;
487
13.9k
                }
488
14.2k
            }
489
226
            for (Py_ssize_t i = 0; i < cut; i++) {
490
224
                if (needle[i] != window[i]) {
491
128
                    LOG("Left half does not match.\n");
492
128
                    window_last += period;
493
128
                    goto windowloop;
494
128
                }
495
224
            }
496
2
            LOG("Found a match!\n");
497
2
            return window - haystack;
498
130
        }
499
14.1k
    }
500
2
    LOG("Not found. Returning -1.\n");
501
2
    return -1;
502
23
}
Unexecuted instantiation: unicodeobject.c:ucs2lib__two_way
Unexecuted instantiation: unicodeobject.c:ucs4lib__two_way
Unexecuted instantiation: bytes_methods.c:stringlib__two_way
Unexecuted instantiation: bytearrayobject.c:stringlib__two_way
503
504
505
static Py_ssize_t
506
STRINGLIB(_two_way_find)(const STRINGLIB_CHAR *haystack,
507
                         Py_ssize_t len_haystack,
508
                         const STRINGLIB_CHAR *needle,
509
                         Py_ssize_t len_needle)
510
23
{
511
23
    LOG("###### Finding \"%s\" in \"%s\".\n", needle, haystack);
512
23
    STRINGLIB(prework) p;
513
23
    STRINGLIB(_preprocess)(needle, len_needle, &p);
514
23
    return STRINGLIB(_two_way)(haystack, len_haystack, &p);
515
23
}
Unexecuted instantiation: bytesobject.c:stringlib__two_way_find
Unexecuted instantiation: unicodeobject.c:asciilib__two_way_find
unicodeobject.c:ucs1lib__two_way_find
Line
Count
Source
510
23
{
511
23
    LOG("###### Finding \"%s\" in \"%s\".\n", needle, haystack);
512
23
    STRINGLIB(prework) p;
513
23
    STRINGLIB(_preprocess)(needle, len_needle, &p);
514
23
    return STRINGLIB(_two_way)(haystack, len_haystack, &p);
515
23
}
Unexecuted instantiation: unicodeobject.c:ucs2lib__two_way_find
Unexecuted instantiation: unicodeobject.c:ucs4lib__two_way_find
Unexecuted instantiation: bytes_methods.c:stringlib__two_way_find
Unexecuted instantiation: bytearrayobject.c:stringlib__two_way_find
516
517
518
static Py_ssize_t
519
STRINGLIB(_two_way_count)(const STRINGLIB_CHAR *haystack,
520
                          Py_ssize_t len_haystack,
521
                          const STRINGLIB_CHAR *needle,
522
                          Py_ssize_t len_needle,
523
                          Py_ssize_t maxcount)
524
0
{
525
0
    LOG("###### Counting \"%s\" in \"%s\".\n", needle, haystack);
526
0
    STRINGLIB(prework) p;
527
0
    STRINGLIB(_preprocess)(needle, len_needle, &p);
528
0
    Py_ssize_t index = 0, count = 0;
529
0
    while (1) {
530
0
        Py_ssize_t result;
531
0
        result = STRINGLIB(_two_way)(haystack + index,
532
0
                                     len_haystack - index, &p);
533
0
        if (result == -1) {
534
0
            return count;
535
0
        }
536
0
        count++;
537
0
        if (count == maxcount) {
538
0
            return maxcount;
539
0
        }
540
0
        index += result + len_needle;
541
0
    }
542
0
    return count;
543
0
}
Unexecuted instantiation: bytesobject.c:stringlib__two_way_count
Unexecuted instantiation: unicodeobject.c:asciilib__two_way_count
Unexecuted instantiation: unicodeobject.c:ucs1lib__two_way_count
Unexecuted instantiation: unicodeobject.c:ucs2lib__two_way_count
Unexecuted instantiation: unicodeobject.c:ucs4lib__two_way_count
Unexecuted instantiation: bytes_methods.c:stringlib__two_way_count
Unexecuted instantiation: bytearrayobject.c:stringlib__two_way_count
544
545
#undef SHIFT_TYPE
546
#undef NOT_FOUND
547
#undef SHIFT_OVERFLOW
548
#undef TABLE_SIZE_BITS
549
#undef TABLE_SIZE
550
#undef TABLE_MASK
551
552
#undef LOG
553
#undef LOG_STRING
554
#undef LOG_LINEUP
555
556
static inline Py_ssize_t
557
STRINGLIB(default_find)(const STRINGLIB_CHAR* s, Py_ssize_t n,
558
                        const STRINGLIB_CHAR* p, Py_ssize_t m,
559
                        Py_ssize_t maxcount, int mode)
560
22.5M
{
561
22.5M
    const Py_ssize_t w = n - m;
562
22.5M
    Py_ssize_t mlast = m - 1, count = 0;
563
22.5M
    Py_ssize_t gap = mlast;
564
22.5M
    const STRINGLIB_CHAR last = p[mlast];
565
22.5M
    const STRINGLIB_CHAR *const ss = &s[mlast];
566
567
22.5M
    unsigned long mask = 0;
568
74.3M
    for (Py_ssize_t i = 0; i < mlast; i++) {
569
51.8M
        STRINGLIB_BLOOM_ADD(mask, p[i]);
570
51.8M
        if (p[i] == last) {
571
922k
            gap = mlast - i - 1;
572
922k
        }
573
51.8M
    }
574
22.5M
    STRINGLIB_BLOOM_ADD(mask, last);
575
576
1.99G
    for (Py_ssize_t i = 0; i <= w; i++) {
577
1.98G
        if (ss[i] == last) {
578
            /* candidate match */
579
40.3M
            Py_ssize_t j;
580
64.3M
            for (j = 0; j < mlast; j++) {
581
41.4M
                if (s[i+j] != p[j]) {
582
17.5M
                    break;
583
17.5M
                }
584
41.4M
            }
585
40.3M
            if (j == mlast) {
586
                /* got a match! */
587
22.8M
                if (mode != FAST_COUNT) {
588
12.0M
                    return i;
589
12.0M
                }
590
10.8M
                count++;
591
10.8M
                if (count == maxcount) {
592
0
                    return maxcount;
593
0
                }
594
10.8M
                i = i + mlast;
595
10.8M
                continue;
596
10.8M
            }
597
            /* miss: check if next character is part of pattern */
598
17.5M
            if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) {
599
3.96M
                i = i + m;
600
3.96M
            }
601
13.5M
            else {
602
13.5M
                i = i + gap;
603
13.5M
            }
604
17.5M
        }
605
1.94G
        else {
606
            /* skip: check if next character is part of pattern */
607
1.94G
            if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) {
608
1.89G
                i = i + m;
609
1.89G
            }
610
1.94G
        }
611
1.98G
    }
612
10.5M
    return mode == FAST_COUNT ? count : -1;
613
22.5M
}
Unexecuted instantiation: bytesobject.c:stringlib_default_find
unicodeobject.c:asciilib_default_find
Line
Count
Source
560
2.71M
{
561
2.71M
    const Py_ssize_t w = n - m;
562
2.71M
    Py_ssize_t mlast = m - 1, count = 0;
563
2.71M
    Py_ssize_t gap = mlast;
564
2.71M
    const STRINGLIB_CHAR last = p[mlast];
565
2.71M
    const STRINGLIB_CHAR *const ss = &s[mlast];
566
567
2.71M
    unsigned long mask = 0;
568
5.85M
    for (Py_ssize_t i = 0; i < mlast; i++) {
569
3.14M
        STRINGLIB_BLOOM_ADD(mask, p[i]);
570
3.14M
        if (p[i] == last) {
571
21.0k
            gap = mlast - i - 1;
572
21.0k
        }
573
3.14M
    }
574
2.71M
    STRINGLIB_BLOOM_ADD(mask, last);
575
576
79.0M
    for (Py_ssize_t i = 0; i <= w; i++) {
577
78.9M
        if (ss[i] == last) {
578
            /* candidate match */
579
4.87M
            Py_ssize_t j;
580
8.00M
            for (j = 0; j < mlast; j++) {
581
5.32M
                if (s[i+j] != p[j]) {
582
2.18M
                    break;
583
2.18M
                }
584
5.32M
            }
585
4.87M
            if (j == mlast) {
586
                /* got a match! */
587
2.68M
                if (mode != FAST_COUNT) {
588
2.68M
                    return i;
589
2.68M
                }
590
0
                count++;
591
0
                if (count == maxcount) {
592
0
                    return maxcount;
593
0
                }
594
0
                i = i + mlast;
595
0
                continue;
596
0
            }
597
            /* miss: check if next character is part of pattern */
598
2.18M
            if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) {
599
127k
                i = i + m;
600
127k
            }
601
2.06M
            else {
602
2.06M
                i = i + gap;
603
2.06M
            }
604
2.18M
        }
605
74.1M
        else {
606
            /* skip: check if next character is part of pattern */
607
74.1M
            if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) {
608
69.8M
                i = i + m;
609
69.8M
            }
610
74.1M
        }
611
78.9M
    }
612
27.1k
    return mode == FAST_COUNT ? count : -1;
613
2.71M
}
unicodeobject.c:ucs1lib_default_find
Line
Count
Source
560
12.0M
{
561
12.0M
    const Py_ssize_t w = n - m;
562
12.0M
    Py_ssize_t mlast = m - 1, count = 0;
563
12.0M
    Py_ssize_t gap = mlast;
564
12.0M
    const STRINGLIB_CHAR last = p[mlast];
565
12.0M
    const STRINGLIB_CHAR *const ss = &s[mlast];
566
567
12.0M
    unsigned long mask = 0;
568
52.6M
    for (Py_ssize_t i = 0; i < mlast; i++) {
569
40.5M
        STRINGLIB_BLOOM_ADD(mask, p[i]);
570
40.5M
        if (p[i] == last) {
571
836k
            gap = mlast - i - 1;
572
836k
        }
573
40.5M
    }
574
12.0M
    STRINGLIB_BLOOM_ADD(mask, last);
575
576
336M
    for (Py_ssize_t i = 0; i <= w; i++) {
577
325M
        if (ss[i] == last) {
578
            /* candidate match */
579
13.1M
            Py_ssize_t j;
580
18.8M
            for (j = 0; j < mlast; j++) {
581
13.3M
                if (s[i+j] != p[j]) {
582
7.72M
                    break;
583
7.72M
                }
584
13.3M
            }
585
13.1M
            if (j == mlast) {
586
                /* got a match! */
587
5.44M
                if (mode != FAST_COUNT) {
588
1.72M
                    return i;
589
1.72M
                }
590
3.72M
                count++;
591
3.72M
                if (count == maxcount) {
592
0
                    return maxcount;
593
0
                }
594
3.72M
                i = i + mlast;
595
3.72M
                continue;
596
3.72M
            }
597
            /* miss: check if next character is part of pattern */
598
7.72M
            if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) {
599
1.78M
                i = i + m;
600
1.78M
            }
601
5.94M
            else {
602
5.94M
                i = i + gap;
603
5.94M
            }
604
7.72M
        }
605
312M
        else {
606
            /* skip: check if next character is part of pattern */
607
312M
            if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) {
608
281M
                i = i + m;
609
281M
            }
610
312M
        }
611
325M
    }
612
10.3M
    return mode == FAST_COUNT ? count : -1;
613
12.0M
}
unicodeobject.c:ucs2lib_default_find
Line
Count
Source
560
3.90M
{
561
3.90M
    const Py_ssize_t w = n - m;
562
3.90M
    Py_ssize_t mlast = m - 1, count = 0;
563
3.90M
    Py_ssize_t gap = mlast;
564
3.90M
    const STRINGLIB_CHAR last = p[mlast];
565
3.90M
    const STRINGLIB_CHAR *const ss = &s[mlast];
566
567
3.90M
    unsigned long mask = 0;
568
8.17M
    for (Py_ssize_t i = 0; i < mlast; i++) {
569
4.27M
        STRINGLIB_BLOOM_ADD(mask, p[i]);
570
4.27M
        if (p[i] == last) {
571
39.7k
            gap = mlast - i - 1;
572
39.7k
        }
573
4.27M
    }
574
3.90M
    STRINGLIB_BLOOM_ADD(mask, last);
575
576
643M
    for (Py_ssize_t i = 0; i <= w; i++) {
577
642M
        if (ss[i] == last) {
578
            /* candidate match */
579
11.4M
            Py_ssize_t j;
580
19.0M
            for (j = 0; j < mlast; j++) {
581
11.8M
                if (s[i+j] != p[j]) {
582
4.24M
                    break;
583
4.24M
                }
584
11.8M
            }
585
11.4M
            if (j == mlast) {
586
                /* got a match! */
587
7.21M
                if (mode != FAST_COUNT) {
588
3.81M
                    return i;
589
3.81M
                }
590
3.40M
                count++;
591
3.40M
                if (count == maxcount) {
592
0
                    return maxcount;
593
0
                }
594
3.40M
                i = i + mlast;
595
3.40M
                continue;
596
3.40M
            }
597
            /* miss: check if next character is part of pattern */
598
4.24M
            if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) {
599
919k
                i = i + m;
600
919k
            }
601
3.32M
            else {
602
3.32M
                i = i + gap;
603
3.32M
            }
604
4.24M
        }
605
631M
        else {
606
            /* skip: check if next character is part of pattern */
607
631M
            if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) {
608
625M
                i = i + m;
609
625M
            }
610
631M
        }
611
642M
    }
612
94.0k
    return mode == FAST_COUNT ? count : -1;
613
3.90M
}
unicodeobject.c:ucs4lib_default_find
Line
Count
Source
560
3.81M
{
561
3.81M
    const Py_ssize_t w = n - m;
562
3.81M
    Py_ssize_t mlast = m - 1, count = 0;
563
3.81M
    Py_ssize_t gap = mlast;
564
3.81M
    const STRINGLIB_CHAR last = p[mlast];
565
3.81M
    const STRINGLIB_CHAR *const ss = &s[mlast];
566
567
3.81M
    unsigned long mask = 0;
568
7.70M
    for (Py_ssize_t i = 0; i < mlast; i++) {
569
3.88M
        STRINGLIB_BLOOM_ADD(mask, p[i]);
570
3.88M
        if (p[i] == last) {
571
23.0k
            gap = mlast - i - 1;
572
23.0k
        }
573
3.88M
    }
574
3.81M
    STRINGLIB_BLOOM_ADD(mask, last);
575
576
936M
    for (Py_ssize_t i = 0; i <= w; i++) {
577
936M
        if (ss[i] == last) {
578
            /* candidate match */
579
10.8M
            Py_ssize_t j;
580
18.4M
            for (j = 0; j < mlast; j++) {
581
10.9M
                if (s[i+j] != p[j]) {
582
3.35M
                    break;
583
3.35M
                }
584
10.9M
            }
585
10.8M
            if (j == mlast) {
586
                /* got a match! */
587
7.49M
                if (mode != FAST_COUNT) {
588
3.78M
                    return i;
589
3.78M
                }
590
3.70M
                count++;
591
3.70M
                if (count == maxcount) {
592
0
                    return maxcount;
593
0
                }
594
3.70M
                i = i + mlast;
595
3.70M
                continue;
596
3.70M
            }
597
            /* miss: check if next character is part of pattern */
598
3.35M
            if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) {
599
1.13M
                i = i + m;
600
1.13M
            }
601
2.21M
            else {
602
2.21M
                i = i + gap;
603
2.21M
            }
604
3.35M
        }
605
926M
        else {
606
            /* skip: check if next character is part of pattern */
607
926M
            if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) {
608
917M
                i = i + m;
609
917M
            }
610
926M
        }
611
936M
    }
612
26.8k
    return mode == FAST_COUNT ? count : -1;
613
3.81M
}
bytes_methods.c:stringlib_default_find
Line
Count
Source
560
2.61k
{
561
2.61k
    const Py_ssize_t w = n - m;
562
2.61k
    Py_ssize_t mlast = m - 1, count = 0;
563
2.61k
    Py_ssize_t gap = mlast;
564
2.61k
    const STRINGLIB_CHAR last = p[mlast];
565
2.61k
    const STRINGLIB_CHAR *const ss = &s[mlast];
566
567
2.61k
    unsigned long mask = 0;
568
10.4k
    for (Py_ssize_t i = 0; i < mlast; i++) {
569
7.83k
        STRINGLIB_BLOOM_ADD(mask, p[i]);
570
7.83k
        if (p[i] == last) {
571
2.61k
            gap = mlast - i - 1;
572
2.61k
        }
573
7.83k
    }
574
2.61k
    STRINGLIB_BLOOM_ADD(mask, last);
575
576
384k
    for (Py_ssize_t i = 0; i <= w; i++) {
577
384k
        if (ss[i] == last) {
578
            /* candidate match */
579
5.70k
            Py_ssize_t j;
580
12.9k
            for (j = 0; j < mlast; j++) {
581
10.5k
                if (s[i+j] != p[j]) {
582
3.30k
                    break;
583
3.30k
                }
584
10.5k
            }
585
5.70k
            if (j == mlast) {
586
                /* got a match! */
587
2.40k
                if (mode != FAST_COUNT) {
588
2.40k
                    return i;
589
2.40k
                }
590
0
                count++;
591
0
                if (count == maxcount) {
592
0
                    return maxcount;
593
0
                }
594
0
                i = i + mlast;
595
0
                continue;
596
0
            }
597
            /* miss: check if next character is part of pattern */
598
3.30k
            if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) {
599
530
                i = i + m;
600
530
            }
601
2.77k
            else {
602
2.77k
                i = i + gap;
603
2.77k
            }
604
3.30k
        }
605
378k
        else {
606
            /* skip: check if next character is part of pattern */
607
378k
            if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) {
608
17.8k
                i = i + m;
609
17.8k
            }
610
378k
        }
611
384k
    }
612
206
    return mode == FAST_COUNT ? count : -1;
613
2.61k
}
Unexecuted instantiation: bytearrayobject.c:stringlib_default_find
614
615
616
static Py_ssize_t
617
STRINGLIB(adaptive_find)(const STRINGLIB_CHAR* s, Py_ssize_t n,
618
                         const STRINGLIB_CHAR* p, Py_ssize_t m,
619
                         Py_ssize_t maxcount, int mode)
620
0
{
621
0
    const Py_ssize_t w = n - m;
622
0
    Py_ssize_t mlast = m - 1, count = 0;
623
0
    Py_ssize_t gap = mlast;
624
0
    Py_ssize_t hits = 0, res;
625
0
    const STRINGLIB_CHAR last = p[mlast];
626
0
    const STRINGLIB_CHAR *const ss = &s[mlast];
627
628
0
    unsigned long mask = 0;
629
0
    for (Py_ssize_t i = 0; i < mlast; i++) {
630
0
        STRINGLIB_BLOOM_ADD(mask, p[i]);
631
0
        if (p[i] == last) {
632
0
            gap = mlast - i - 1;
633
0
        }
634
0
    }
635
0
    STRINGLIB_BLOOM_ADD(mask, last);
636
637
0
    for (Py_ssize_t i = 0; i <= w; i++) {
638
0
        if (ss[i] == last) {
639
            /* candidate match */
640
0
            Py_ssize_t j;
641
0
            for (j = 0; j < mlast; j++) {
642
0
                if (s[i+j] != p[j]) {
643
0
                    break;
644
0
                }
645
0
            }
646
0
            if (j == mlast) {
647
                /* got a match! */
648
0
                if (mode != FAST_COUNT) {
649
0
                    return i;
650
0
                }
651
0
                count++;
652
0
                if (count == maxcount) {
653
0
                    return maxcount;
654
0
                }
655
0
                i = i + mlast;
656
0
                continue;
657
0
            }
658
0
            hits += j + 1;
659
0
            if (hits > m / 4 && w - i > 2000) {
660
0
                if (mode == FAST_SEARCH) {
661
0
                    res = STRINGLIB(_two_way_find)(s + i, n - i, p, m);
662
0
                    return res == -1 ? -1 : res + i;
663
0
                }
664
0
                else {
665
0
                    res = STRINGLIB(_two_way_count)(s + i, n - i, p, m,
666
0
                                                    maxcount - count);
667
0
                    return res + count;
668
0
                }
669
0
            }
670
            /* miss: check if next character is part of pattern */
671
0
            if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) {
672
0
                i = i + m;
673
0
            }
674
0
            else {
675
0
                i = i + gap;
676
0
            }
677
0
        }
678
0
        else {
679
            /* skip: check if next character is part of pattern */
680
0
            if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) {
681
0
                i = i + m;
682
0
            }
683
0
        }
684
0
    }
685
0
    return mode == FAST_COUNT ? count : -1;
686
0
}
Unexecuted instantiation: bytesobject.c:stringlib_adaptive_find
Unexecuted instantiation: unicodeobject.c:asciilib_adaptive_find
Unexecuted instantiation: unicodeobject.c:ucs1lib_adaptive_find
Unexecuted instantiation: unicodeobject.c:ucs2lib_adaptive_find
Unexecuted instantiation: unicodeobject.c:ucs4lib_adaptive_find
Unexecuted instantiation: bytes_methods.c:stringlib_adaptive_find
Unexecuted instantiation: bytearrayobject.c:stringlib_adaptive_find
687
688
689
static Py_ssize_t
690
STRINGLIB(default_rfind)(const STRINGLIB_CHAR* s, Py_ssize_t n,
691
                         const STRINGLIB_CHAR* p, Py_ssize_t m,
692
                         Py_ssize_t maxcount, int mode)
693
6.59k
{
694
    /* create compressed boyer-moore delta 1 table */
695
6.59k
    unsigned long mask = 0;
696
6.59k
    Py_ssize_t i, j, mlast = m - 1, skip = m - 1, w = n - m;
697
698
    /* process pattern[0] outside the loop */
699
6.59k
    STRINGLIB_BLOOM_ADD(mask, p[0]);
700
    /* process pattern[:0:-1] */
701
26.3k
    for (i = mlast; i > 0; i--) {
702
19.7k
        STRINGLIB_BLOOM_ADD(mask, p[i]);
703
19.7k
        if (p[i] == p[0]) {
704
0
            skip = i - 1;
705
0
        }
706
19.7k
    }
707
708
1.42M
    for (i = w; i >= 0; i--) {
709
1.42M
        if (s[i] == p[0]) {
710
            /* candidate match */
711
73.6k
            for (j = mlast; j > 0; j--) {
712
67.1k
                if (s[i+j] != p[j]) {
713
46.9k
                    break;
714
46.9k
                }
715
67.1k
            }
716
53.4k
            if (j == 0) {
717
                /* got a match! */
718
6.50k
                return i;
719
6.50k
            }
720
            /* miss: check if previous character is part of pattern */
721
46.9k
            if (i > 0 && !STRINGLIB_BLOOM(mask, s[i-1])) {
722
45.1k
                i = i - m;
723
45.1k
            }
724
1.84k
            else {
725
1.84k
                i = i - skip;
726
1.84k
            }
727
46.9k
        }
728
1.37M
        else {
729
            /* skip: check if previous character is part of pattern */
730
1.37M
            if (i > 0 && !STRINGLIB_BLOOM(mask, s[i-1])) {
731
1.25M
                i = i - m;
732
1.25M
            }
733
1.37M
        }
734
1.42M
    }
735
90
    return -1;
736
6.59k
}
Unexecuted instantiation: bytesobject.c:stringlib_default_rfind
Unexecuted instantiation: unicodeobject.c:asciilib_default_rfind
Unexecuted instantiation: unicodeobject.c:ucs1lib_default_rfind
Unexecuted instantiation: unicodeobject.c:ucs2lib_default_rfind
Unexecuted instantiation: unicodeobject.c:ucs4lib_default_rfind
bytes_methods.c:stringlib_default_rfind
Line
Count
Source
693
6.59k
{
694
    /* create compressed boyer-moore delta 1 table */
695
6.59k
    unsigned long mask = 0;
696
6.59k
    Py_ssize_t i, j, mlast = m - 1, skip = m - 1, w = n - m;
697
698
    /* process pattern[0] outside the loop */
699
6.59k
    STRINGLIB_BLOOM_ADD(mask, p[0]);
700
    /* process pattern[:0:-1] */
701
26.3k
    for (i = mlast; i > 0; i--) {
702
19.7k
        STRINGLIB_BLOOM_ADD(mask, p[i]);
703
19.7k
        if (p[i] == p[0]) {
704
0
            skip = i - 1;
705
0
        }
706
19.7k
    }
707
708
1.42M
    for (i = w; i >= 0; i--) {
709
1.42M
        if (s[i] == p[0]) {
710
            /* candidate match */
711
73.6k
            for (j = mlast; j > 0; j--) {
712
67.1k
                if (s[i+j] != p[j]) {
713
46.9k
                    break;
714
46.9k
                }
715
67.1k
            }
716
53.4k
            if (j == 0) {
717
                /* got a match! */
718
6.50k
                return i;
719
6.50k
            }
720
            /* miss: check if previous character is part of pattern */
721
46.9k
            if (i > 0 && !STRINGLIB_BLOOM(mask, s[i-1])) {
722
45.1k
                i = i - m;
723
45.1k
            }
724
1.84k
            else {
725
1.84k
                i = i - skip;
726
1.84k
            }
727
46.9k
        }
728
1.37M
        else {
729
            /* skip: check if previous character is part of pattern */
730
1.37M
            if (i > 0 && !STRINGLIB_BLOOM(mask, s[i-1])) {
731
1.25M
                i = i - m;
732
1.25M
            }
733
1.37M
        }
734
1.42M
    }
735
90
    return -1;
736
6.59k
}
Unexecuted instantiation: bytearrayobject.c:stringlib_default_rfind
737
738
739
static inline Py_ssize_t
740
STRINGLIB(count_char)(const STRINGLIB_CHAR *s, Py_ssize_t n,
741
                      const STRINGLIB_CHAR p0, Py_ssize_t maxcount)
742
0
{
743
0
    Py_ssize_t i, count = 0;
744
0
    for (i = 0; i < n; i++) {
745
0
        if (s[i] == p0) {
746
0
            count++;
747
0
            if (count == maxcount) {
748
0
                return maxcount;
749
0
            }
750
0
        }
751
0
    }
752
0
    return count;
753
0
}
Unexecuted instantiation: bytesobject.c:stringlib_count_char
Unexecuted instantiation: unicodeobject.c:asciilib_count_char
Unexecuted instantiation: unicodeobject.c:ucs1lib_count_char
Unexecuted instantiation: unicodeobject.c:ucs2lib_count_char
Unexecuted instantiation: unicodeobject.c:ucs4lib_count_char
Unexecuted instantiation: bytes_methods.c:stringlib_count_char
Unexecuted instantiation: bytearrayobject.c:stringlib_count_char
754
755
756
static inline Py_ssize_t
757
STRINGLIB(count_char_no_maxcount)(const STRINGLIB_CHAR *s, Py_ssize_t n,
758
                                  const STRINGLIB_CHAR p0)
759
/* A specialized function of count_char that does not cut off at a maximum.
760
   As a result, the compiler is able to vectorize the loop. */
761
45.2M
{
762
45.2M
    Py_ssize_t count = 0;
763
6.18G
    for (Py_ssize_t i = 0; i < n; i++) {
764
6.13G
        if (s[i] == p0) {
765
894M
            count++;
766
894M
        }
767
6.13G
    }
768
45.2M
    return count;
769
45.2M
}
Unexecuted instantiation: bytesobject.c:stringlib_count_char_no_maxcount
Unexecuted instantiation: unicodeobject.c:asciilib_count_char_no_maxcount
unicodeobject.c:ucs1lib_count_char_no_maxcount
Line
Count
Source
761
30.7M
{
762
30.7M
    Py_ssize_t count = 0;
763
972M
    for (Py_ssize_t i = 0; i < n; i++) {
764
941M
        if (s[i] == p0) {
765
38.9M
            count++;
766
38.9M
        }
767
941M
    }
768
30.7M
    return count;
769
30.7M
}
unicodeobject.c:ucs2lib_count_char_no_maxcount
Line
Count
Source
761
7.51M
{
762
7.51M
    Py_ssize_t count = 0;
763
1.42G
    for (Py_ssize_t i = 0; i < n; i++) {
764
1.42G
        if (s[i] == p0) {
765
59.2M
            count++;
766
59.2M
        }
767
1.42G
    }
768
7.51M
    return count;
769
7.51M
}
unicodeobject.c:ucs4lib_count_char_no_maxcount
Line
Count
Source
761
2.25M
{
762
2.25M
    Py_ssize_t count = 0;
763
1.34G
    for (Py_ssize_t i = 0; i < n; i++) {
764
1.34G
        if (s[i] == p0) {
765
29.1M
            count++;
766
29.1M
        }
767
1.34G
    }
768
2.25M
    return count;
769
2.25M
}
bytes_methods.c:stringlib_count_char_no_maxcount
Line
Count
Source
761
4.74M
{
762
4.74M
    Py_ssize_t count = 0;
763
2.43G
    for (Py_ssize_t i = 0; i < n; i++) {
764
2.42G
        if (s[i] == p0) {
765
767M
            count++;
766
767M
        }
767
2.42G
    }
768
4.74M
    return count;
769
4.74M
}
Unexecuted instantiation: bytearrayobject.c:stringlib_count_char_no_maxcount
770
771
772
Py_LOCAL_INLINE(Py_ssize_t)
773
FASTSEARCH(const STRINGLIB_CHAR* s, Py_ssize_t n,
774
           const STRINGLIB_CHAR* p, Py_ssize_t m,
775
           Py_ssize_t maxcount, int mode)
776
157M
{
777
157M
    if (n < m || (mode == FAST_COUNT && maxcount == 0)) {
778
10.7k
        return -1;
779
10.7k
    }
780
781
    /* look for special cases */
782
157M
    if (m <= 1) {
783
135M
        if (m <= 0) {
784
0
            return -1;
785
0
        }
786
        /* use special case for 1-character strings */
787
135M
        if (mode == FAST_SEARCH)
788
89.8M
            return STRINGLIB(find_char)(s, n, p[0]);
789
45.2M
        else if (mode == FAST_RSEARCH)
790
33.5k
            return STRINGLIB(rfind_char)(s, n, p[0]);
791
45.2M
        else {
792
45.2M
            if (maxcount == PY_SSIZE_T_MAX) {
793
45.2M
                return STRINGLIB(count_char_no_maxcount)(s, n, p[0]);
794
45.2M
            }
795
0
            return STRINGLIB(count_char)(s, n, p[0], maxcount);
796
45.2M
        }
797
135M
    }
798
799
22.5M
    if (mode != FAST_RSEARCH) {
800
22.5M
        if (n < 2500 || (m < 100 && n < 30000) || m < 6) {
801
22.5M
            return STRINGLIB(default_find)(s, n, p, m, maxcount, mode);
802
22.5M
        }
803
23
        else if ((m >> 2) * 3 < (n >> 2)) {
804
            /* 33% threshold, but don't overflow. */
805
            /* For larger problems where the needle isn't a huge
806
               percentage of the size of the haystack, the relatively
807
               expensive O(m) startup cost of the two-way algorithm
808
               will surely pay off. */
809
23
            if (mode == FAST_SEARCH) {
810
23
                return STRINGLIB(_two_way_find)(s, n, p, m);
811
23
            }
812
0
            else {
813
0
                return STRINGLIB(_two_way_count)(s, n, p, m, maxcount);
814
0
            }
815
23
        }
816
0
        else {
817
            /* To ensure that we have good worst-case behavior,
818
               here's an adaptive version of the algorithm, where if
819
               we match O(m) characters without any matches of the
820
               entire needle, then we predict that the startup cost of
821
               the two-way algorithm will probably be worth it. */
822
0
            return STRINGLIB(adaptive_find)(s, n, p, m, maxcount, mode);
823
0
        }
824
22.5M
    }
825
6.59k
    else {
826
        /* FAST_RSEARCH */
827
6.59k
        return STRINGLIB(default_rfind)(s, n, p, m, maxcount, mode);
828
6.59k
    }
829
22.5M
}
bytesobject.c:fastsearch
Line
Count
Source
776
441k
{
777
441k
    if (n < m || (mode == FAST_COUNT && maxcount == 0)) {
778
0
        return -1;
779
0
    }
780
781
    /* look for special cases */
782
441k
    if (m <= 1) {
783
441k
        if (m <= 0) {
784
0
            return -1;
785
0
        }
786
        /* use special case for 1-character strings */
787
441k
        if (mode == FAST_SEARCH)
788
441k
            return STRINGLIB(find_char)(s, n, p[0]);
789
0
        else if (mode == FAST_RSEARCH)
790
0
            return STRINGLIB(rfind_char)(s, n, p[0]);
791
0
        else {
792
0
            if (maxcount == PY_SSIZE_T_MAX) {
793
0
                return STRINGLIB(count_char_no_maxcount)(s, n, p[0]);
794
0
            }
795
0
            return STRINGLIB(count_char)(s, n, p[0], maxcount);
796
0
        }
797
441k
    }
798
799
0
    if (mode != FAST_RSEARCH) {
800
0
        if (n < 2500 || (m < 100 && n < 30000) || m < 6) {
801
0
            return STRINGLIB(default_find)(s, n, p, m, maxcount, mode);
802
0
        }
803
0
        else if ((m >> 2) * 3 < (n >> 2)) {
804
            /* 33% threshold, but don't overflow. */
805
            /* For larger problems where the needle isn't a huge
806
               percentage of the size of the haystack, the relatively
807
               expensive O(m) startup cost of the two-way algorithm
808
               will surely pay off. */
809
0
            if (mode == FAST_SEARCH) {
810
0
                return STRINGLIB(_two_way_find)(s, n, p, m);
811
0
            }
812
0
            else {
813
0
                return STRINGLIB(_two_way_count)(s, n, p, m, maxcount);
814
0
            }
815
0
        }
816
0
        else {
817
            /* To ensure that we have good worst-case behavior,
818
               here's an adaptive version of the algorithm, where if
819
               we match O(m) characters without any matches of the
820
               entire needle, then we predict that the startup cost of
821
               the two-way algorithm will probably be worth it. */
822
0
            return STRINGLIB(adaptive_find)(s, n, p, m, maxcount, mode);
823
0
        }
824
0
    }
825
0
    else {
826
        /* FAST_RSEARCH */
827
0
        return STRINGLIB(default_rfind)(s, n, p, m, maxcount, mode);
828
0
    }
829
0
}
unicodeobject.c:asciilib_fastsearch
Line
Count
Source
776
18.5M
{
777
18.5M
    if (n < m || (mode == FAST_COUNT && maxcount == 0)) {
778
0
        return -1;
779
0
    }
780
781
    /* look for special cases */
782
18.5M
    if (m <= 1) {
783
15.8M
        if (m <= 0) {
784
0
            return -1;
785
0
        }
786
        /* use special case for 1-character strings */
787
15.8M
        if (mode == FAST_SEARCH)
788
15.8M
            return STRINGLIB(find_char)(s, n, p[0]);
789
33.5k
        else if (mode == FAST_RSEARCH)
790
33.5k
            return STRINGLIB(rfind_char)(s, n, p[0]);
791
0
        else {
792
0
            if (maxcount == PY_SSIZE_T_MAX) {
793
0
                return STRINGLIB(count_char_no_maxcount)(s, n, p[0]);
794
0
            }
795
0
            return STRINGLIB(count_char)(s, n, p[0], maxcount);
796
0
        }
797
15.8M
    }
798
799
2.71M
    if (mode != FAST_RSEARCH) {
800
2.71M
        if (n < 2500 || (m < 100 && n < 30000) || m < 6) {
801
2.71M
            return STRINGLIB(default_find)(s, n, p, m, maxcount, mode);
802
2.71M
        }
803
0
        else if ((m >> 2) * 3 < (n >> 2)) {
804
            /* 33% threshold, but don't overflow. */
805
            /* For larger problems where the needle isn't a huge
806
               percentage of the size of the haystack, the relatively
807
               expensive O(m) startup cost of the two-way algorithm
808
               will surely pay off. */
809
0
            if (mode == FAST_SEARCH) {
810
0
                return STRINGLIB(_two_way_find)(s, n, p, m);
811
0
            }
812
0
            else {
813
0
                return STRINGLIB(_two_way_count)(s, n, p, m, maxcount);
814
0
            }
815
0
        }
816
0
        else {
817
            /* To ensure that we have good worst-case behavior,
818
               here's an adaptive version of the algorithm, where if
819
               we match O(m) characters without any matches of the
820
               entire needle, then we predict that the startup cost of
821
               the two-way algorithm will probably be worth it. */
822
0
            return STRINGLIB(adaptive_find)(s, n, p, m, maxcount, mode);
823
0
        }
824
2.71M
    }
825
0
    else {
826
        /* FAST_RSEARCH */
827
0
        return STRINGLIB(default_rfind)(s, n, p, m, maxcount, mode);
828
0
    }
829
2.71M
}
unicodeobject.c:ucs1lib_fastsearch
Line
Count
Source
776
49.7M
{
777
49.7M
    if (n < m || (mode == FAST_COUNT && maxcount == 0)) {
778
0
        return -1;
779
0
    }
780
781
    /* look for special cases */
782
49.7M
    if (m <= 1) {
783
37.6M
        if (m <= 0) {
784
0
            return -1;
785
0
        }
786
        /* use special case for 1-character strings */
787
37.6M
        if (mode == FAST_SEARCH)
788
6.92M
            return STRINGLIB(find_char)(s, n, p[0]);
789
30.7M
        else if (mode == FAST_RSEARCH)
790
0
            return STRINGLIB(rfind_char)(s, n, p[0]);
791
30.7M
        else {
792
30.7M
            if (maxcount == PY_SSIZE_T_MAX) {
793
30.7M
                return STRINGLIB(count_char_no_maxcount)(s, n, p[0]);
794
30.7M
            }
795
0
            return STRINGLIB(count_char)(s, n, p[0], maxcount);
796
30.7M
        }
797
37.6M
    }
798
799
12.0M
    if (mode != FAST_RSEARCH) {
800
12.0M
        if (n < 2500 || (m < 100 && n < 30000) || m < 6) {
801
12.0M
            return STRINGLIB(default_find)(s, n, p, m, maxcount, mode);
802
12.0M
        }
803
23
        else if ((m >> 2) * 3 < (n >> 2)) {
804
            /* 33% threshold, but don't overflow. */
805
            /* For larger problems where the needle isn't a huge
806
               percentage of the size of the haystack, the relatively
807
               expensive O(m) startup cost of the two-way algorithm
808
               will surely pay off. */
809
23
            if (mode == FAST_SEARCH) {
810
23
                return STRINGLIB(_two_way_find)(s, n, p, m);
811
23
            }
812
0
            else {
813
0
                return STRINGLIB(_two_way_count)(s, n, p, m, maxcount);
814
0
            }
815
23
        }
816
0
        else {
817
            /* To ensure that we have good worst-case behavior,
818
               here's an adaptive version of the algorithm, where if
819
               we match O(m) characters without any matches of the
820
               entire needle, then we predict that the startup cost of
821
               the two-way algorithm will probably be worth it. */
822
0
            return STRINGLIB(adaptive_find)(s, n, p, m, maxcount, mode);
823
0
        }
824
12.0M
    }
825
0
    else {
826
        /* FAST_RSEARCH */
827
0
        return STRINGLIB(default_rfind)(s, n, p, m, maxcount, mode);
828
0
    }
829
12.0M
}
unicodeobject.c:ucs2lib_fastsearch
Line
Count
Source
776
54.9M
{
777
54.9M
    if (n < m || (mode == FAST_COUNT && maxcount == 0)) {
778
10.7k
        return -1;
779
10.7k
    }
780
781
    /* look for special cases */
782
54.9M
    if (m <= 1) {
783
51.0M
        if (m <= 0) {
784
0
            return -1;
785
0
        }
786
        /* use special case for 1-character strings */
787
51.0M
        if (mode == FAST_SEARCH)
788
43.5M
            return STRINGLIB(find_char)(s, n, p[0]);
789
7.51M
        else if (mode == FAST_RSEARCH)
790
0
            return STRINGLIB(rfind_char)(s, n, p[0]);
791
7.51M
        else {
792
7.51M
            if (maxcount == PY_SSIZE_T_MAX) {
793
7.51M
                return STRINGLIB(count_char_no_maxcount)(s, n, p[0]);
794
7.51M
            }
795
0
            return STRINGLIB(count_char)(s, n, p[0], maxcount);
796
7.51M
        }
797
51.0M
    }
798
799
3.90M
    if (mode != FAST_RSEARCH) {
800
3.90M
        if (n < 2500 || (m < 100 && n < 30000) || m < 6) {
801
3.90M
            return STRINGLIB(default_find)(s, n, p, m, maxcount, mode);
802
3.90M
        }
803
0
        else if ((m >> 2) * 3 < (n >> 2)) {
804
            /* 33% threshold, but don't overflow. */
805
            /* For larger problems where the needle isn't a huge
806
               percentage of the size of the haystack, the relatively
807
               expensive O(m) startup cost of the two-way algorithm
808
               will surely pay off. */
809
0
            if (mode == FAST_SEARCH) {
810
0
                return STRINGLIB(_two_way_find)(s, n, p, m);
811
0
            }
812
0
            else {
813
0
                return STRINGLIB(_two_way_count)(s, n, p, m, maxcount);
814
0
            }
815
0
        }
816
0
        else {
817
            /* To ensure that we have good worst-case behavior,
818
               here's an adaptive version of the algorithm, where if
819
               we match O(m) characters without any matches of the
820
               entire needle, then we predict that the startup cost of
821
               the two-way algorithm will probably be worth it. */
822
0
            return STRINGLIB(adaptive_find)(s, n, p, m, maxcount, mode);
823
0
        }
824
3.90M
    }
825
0
    else {
826
        /* FAST_RSEARCH */
827
0
        return STRINGLIB(default_rfind)(s, n, p, m, maxcount, mode);
828
0
    }
829
3.90M
}
unicodeobject.c:ucs4lib_fastsearch
Line
Count
Source
776
29.1M
{
777
29.1M
    if (n < m || (mode == FAST_COUNT && maxcount == 0)) {
778
0
        return -1;
779
0
    }
780
781
    /* look for special cases */
782
29.1M
    if (m <= 1) {
783
25.3M
        if (m <= 0) {
784
0
            return -1;
785
0
        }
786
        /* use special case for 1-character strings */
787
25.3M
        if (mode == FAST_SEARCH)
788
23.0M
            return STRINGLIB(find_char)(s, n, p[0]);
789
2.25M
        else if (mode == FAST_RSEARCH)
790
0
            return STRINGLIB(rfind_char)(s, n, p[0]);
791
2.25M
        else {
792
2.25M
            if (maxcount == PY_SSIZE_T_MAX) {
793
2.25M
                return STRINGLIB(count_char_no_maxcount)(s, n, p[0]);
794
2.25M
            }
795
0
            return STRINGLIB(count_char)(s, n, p[0], maxcount);
796
2.25M
        }
797
25.3M
    }
798
799
3.81M
    if (mode != FAST_RSEARCH) {
800
3.81M
        if (n < 2500 || (m < 100 && n < 30000) || m < 6) {
801
3.81M
            return STRINGLIB(default_find)(s, n, p, m, maxcount, mode);
802
3.81M
        }
803
0
        else if ((m >> 2) * 3 < (n >> 2)) {
804
            /* 33% threshold, but don't overflow. */
805
            /* For larger problems where the needle isn't a huge
806
               percentage of the size of the haystack, the relatively
807
               expensive O(m) startup cost of the two-way algorithm
808
               will surely pay off. */
809
0
            if (mode == FAST_SEARCH) {
810
0
                return STRINGLIB(_two_way_find)(s, n, p, m);
811
0
            }
812
0
            else {
813
0
                return STRINGLIB(_two_way_count)(s, n, p, m, maxcount);
814
0
            }
815
0
        }
816
0
        else {
817
            /* To ensure that we have good worst-case behavior,
818
               here's an adaptive version of the algorithm, where if
819
               we match O(m) characters without any matches of the
820
               entire needle, then we predict that the startup cost of
821
               the two-way algorithm will probably be worth it. */
822
0
            return STRINGLIB(adaptive_find)(s, n, p, m, maxcount, mode);
823
0
        }
824
3.81M
    }
825
0
    else {
826
        /* FAST_RSEARCH */
827
0
        return STRINGLIB(default_rfind)(s, n, p, m, maxcount, mode);
828
0
    }
829
3.81M
}
bytes_methods.c:fastsearch
Line
Count
Source
776
4.74M
{
777
4.74M
    if (n < m || (mode == FAST_COUNT && maxcount == 0)) {
778
9
        return -1;
779
9
    }
780
781
    /* look for special cases */
782
4.74M
    if (m <= 1) {
783
4.74M
        if (m <= 0) {
784
0
            return -1;
785
0
        }
786
        /* use special case for 1-character strings */
787
4.74M
        if (mode == FAST_SEARCH)
788
0
            return STRINGLIB(find_char)(s, n, p[0]);
789
4.74M
        else if (mode == FAST_RSEARCH)
790
0
            return STRINGLIB(rfind_char)(s, n, p[0]);
791
4.74M
        else {
792
4.74M
            if (maxcount == PY_SSIZE_T_MAX) {
793
4.74M
                return STRINGLIB(count_char_no_maxcount)(s, n, p[0]);
794
4.74M
            }
795
0
            return STRINGLIB(count_char)(s, n, p[0], maxcount);
796
4.74M
        }
797
4.74M
    }
798
799
9.20k
    if (mode != FAST_RSEARCH) {
800
2.61k
        if (n < 2500 || (m < 100 && n < 30000) || m < 6) {
801
2.61k
            return STRINGLIB(default_find)(s, n, p, m, maxcount, mode);
802
2.61k
        }
803
0
        else if ((m >> 2) * 3 < (n >> 2)) {
804
            /* 33% threshold, but don't overflow. */
805
            /* For larger problems where the needle isn't a huge
806
               percentage of the size of the haystack, the relatively
807
               expensive O(m) startup cost of the two-way algorithm
808
               will surely pay off. */
809
0
            if (mode == FAST_SEARCH) {
810
0
                return STRINGLIB(_two_way_find)(s, n, p, m);
811
0
            }
812
0
            else {
813
0
                return STRINGLIB(_two_way_count)(s, n, p, m, maxcount);
814
0
            }
815
0
        }
816
0
        else {
817
            /* To ensure that we have good worst-case behavior,
818
               here's an adaptive version of the algorithm, where if
819
               we match O(m) characters without any matches of the
820
               entire needle, then we predict that the startup cost of
821
               the two-way algorithm will probably be worth it. */
822
0
            return STRINGLIB(adaptive_find)(s, n, p, m, maxcount, mode);
823
0
        }
824
2.61k
    }
825
6.59k
    else {
826
        /* FAST_RSEARCH */
827
6.59k
        return STRINGLIB(default_rfind)(s, n, p, m, maxcount, mode);
828
6.59k
    }
829
9.20k
}
Unexecuted instantiation: bytearrayobject.c:fastsearch
830