Coverage Report

Created: 2025-11-30 06:38

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/cpython/Objects/stringlib/fastsearch.h
Line
Count
Source
1
/* stringlib: fastsearch implementation */
2
3
#define STRINGLIB_FASTSEARCH_H
4
5
/* fast search/count implementation, based on a mix between boyer-
6
   moore and horspool, with a few more bells and whistles on the top.
7
   for some more background, see:
8
   https://web.archive.org/web/20201107074620/http://effbot.org/zone/stringlib.htm */
9
10
/* note: fastsearch may access s[n], which isn't a problem when using
11
   Python's ordinary string types, but may cause problems if you're
12
   using this code in other contexts.  also, the count mode returns -1
13
   if there cannot possibly be a match in the target string, and 0 if
14
   it has actually checked for matches, but didn't find any.  callers
15
   beware! */
16
17
/* If the strings are long enough, use Crochemore and Perrin's Two-Way
18
   algorithm, which has worst-case O(n) runtime and best-case O(n/k).
19
   Also compute a table of shifts to achieve O(n/k) in more cases,
20
   and often (data dependent) deduce larger shifts than pure C&P can
21
   deduce. See stringlib_find_two_way_notes.txt in this folder for a
22
   detailed explanation. */
23
24
586M
#define FAST_COUNT 0
25
387M
#define FAST_SEARCH 1
26
89.0M
#define FAST_RSEARCH 2
27
28
#if LONG_BIT >= 128
29
#define STRINGLIB_BLOOM_WIDTH 128
30
#elif LONG_BIT >= 64
31
3.36G
#define STRINGLIB_BLOOM_WIDTH 64
32
#elif LONG_BIT >= 32
33
#define STRINGLIB_BLOOM_WIDTH 32
34
#else
35
#error "LONG_BIT is smaller than 32"
36
#endif
37
38
#define STRINGLIB_BLOOM_ADD(mask, ch) \
39
86.9M
    ((mask |= (1UL << ((ch) & (STRINGLIB_BLOOM_WIDTH -1)))))
40
#define STRINGLIB_BLOOM(mask, ch)     \
41
3.27G
    ((mask &  (1UL << ((ch) & (STRINGLIB_BLOOM_WIDTH -1)))))
42
43
#ifdef STRINGLIB_FAST_MEMCHR
44
327M
#  define MEMCHR_CUT_OFF 15
45
#else
46
76.5M
#  define MEMCHR_CUT_OFF 40
47
#endif
48
49
Py_LOCAL_INLINE(Py_ssize_t)
50
STRINGLIB(find_char)(const STRINGLIB_CHAR* s, Py_ssize_t n, STRINGLIB_CHAR ch)
51
403M
{
52
403M
    const STRINGLIB_CHAR *p, *e;
53
54
403M
    p = s;
55
403M
    e = s + n;
56
403M
    if (n > MEMCHR_CUT_OFF) {
57
#ifdef STRINGLIB_FAST_MEMCHR
58
99.3M
        p = STRINGLIB_FAST_MEMCHR(s, ch, n);
59
99.3M
        if (p != NULL)
60
96.5M
            return (p - s);
61
2.74M
        return -1;
62
#else
63
        /* use memchr if we can choose a needle without too many likely
64
           false positives */
65
        const STRINGLIB_CHAR *s1, *e1;
66
        unsigned char needle = ch & 0xff;
67
        /* If looking for a multiple of 256, we'd have too
68
           many false positives looking for the '\0' byte in UCS2
69
           and UCS4 representations. */
70
65.9M
        if (needle != 0) {
71
65.5M
            do {
72
65.5M
                void *candidate = memchr(p, needle,
73
65.5M
                                         (e - p) * sizeof(STRINGLIB_CHAR));
74
65.5M
                if (candidate == NULL)
75
452k
                    return -1;
76
65.1M
                s1 = p;
77
65.1M
                p = (const STRINGLIB_CHAR *)
78
65.1M
                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
79
65.1M
                if (*p == ch)
80
65.0M
                    return (p - s);
81
                /* False positive */
82
117k
                p++;
83
117k
                if (p - s1 > MEMCHR_CUT_OFF)
84
54.2k
                    continue;
85
62.9k
                if (e - p <= MEMCHR_CUT_OFF)
86
3.75k
                    break;
87
59.1k
                e1 = p + MEMCHR_CUT_OFF;
88
1.70M
                while (p != e1) {
89
1.67M
                    if (*p == ch)
90
27.8k
                        return (p - s);
91
1.64M
                    p++;
92
1.64M
                }
93
59.1k
            }
94
65.4M
            while (e - p > MEMCHR_CUT_OFF);
95
65.4M
        }
96
#endif
97
165M
    }
98
1.40G
    while (p < e) {
99
1.18G
        if (*p == ch)
100
20.5M
            return (p - s);
101
1.16G
        p++;
102
1.16G
    }
103
217M
    return -1;
104
238M
}
bytesobject.c:stringlib_find_char
Line
Count
Source
51
3.46k
{
52
3.46k
    const STRINGLIB_CHAR *p, *e;
53
54
3.46k
    p = s;
55
3.46k
    e = s + n;
56
3.46k
    if (n > MEMCHR_CUT_OFF) {
57
3.46k
#ifdef STRINGLIB_FAST_MEMCHR
58
3.46k
        p = STRINGLIB_FAST_MEMCHR(s, ch, n);
59
3.46k
        if (p != NULL)
60
3.46k
            return (p - s);
61
0
        return -1;
62
#else
63
        /* use memchr if we can choose a needle without too many likely
64
           false positives */
65
        const STRINGLIB_CHAR *s1, *e1;
66
        unsigned char needle = ch & 0xff;
67
        /* If looking for a multiple of 256, we'd have too
68
           many false positives looking for the '\0' byte in UCS2
69
           and UCS4 representations. */
70
        if (needle != 0) {
71
            do {
72
                void *candidate = memchr(p, needle,
73
                                         (e - p) * sizeof(STRINGLIB_CHAR));
74
                if (candidate == NULL)
75
                    return -1;
76
                s1 = p;
77
                p = (const STRINGLIB_CHAR *)
78
                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
79
                if (*p == ch)
80
                    return (p - s);
81
                /* False positive */
82
                p++;
83
                if (p - s1 > MEMCHR_CUT_OFF)
84
                    continue;
85
                if (e - p <= MEMCHR_CUT_OFF)
86
                    break;
87
                e1 = p + MEMCHR_CUT_OFF;
88
                while (p != e1) {
89
                    if (*p == ch)
90
                        return (p - s);
91
                    p++;
92
                }
93
            }
94
            while (e - p > MEMCHR_CUT_OFF);
95
        }
96
#endif
97
3.46k
    }
98
0
    while (p < e) {
99
0
        if (*p == ch)
100
0
            return (p - s);
101
0
        p++;
102
0
    }
103
0
    return -1;
104
0
}
unicodeobject.c:ucs1lib_find_char
Line
Count
Source
51
242M
{
52
242M
    const STRINGLIB_CHAR *p, *e;
53
54
242M
    p = s;
55
242M
    e = s + n;
56
242M
    if (n > MEMCHR_CUT_OFF) {
57
22.7M
#ifdef STRINGLIB_FAST_MEMCHR
58
22.7M
        p = STRINGLIB_FAST_MEMCHR(s, ch, n);
59
22.7M
        if (p != NULL)
60
21.3M
            return (p - s);
61
1.36M
        return -1;
62
#else
63
        /* use memchr if we can choose a needle without too many likely
64
           false positives */
65
        const STRINGLIB_CHAR *s1, *e1;
66
        unsigned char needle = ch & 0xff;
67
        /* If looking for a multiple of 256, we'd have too
68
           many false positives looking for the '\0' byte in UCS2
69
           and UCS4 representations. */
70
        if (needle != 0) {
71
            do {
72
                void *candidate = memchr(p, needle,
73
                                         (e - p) * sizeof(STRINGLIB_CHAR));
74
                if (candidate == NULL)
75
                    return -1;
76
                s1 = p;
77
                p = (const STRINGLIB_CHAR *)
78
                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
79
                if (*p == ch)
80
                    return (p - s);
81
                /* False positive */
82
                p++;
83
                if (p - s1 > MEMCHR_CUT_OFF)
84
                    continue;
85
                if (e - p <= MEMCHR_CUT_OFF)
86
                    break;
87
                e1 = p + MEMCHR_CUT_OFF;
88
                while (p != e1) {
89
                    if (*p == ch)
90
                        return (p - s);
91
                    p++;
92
                }
93
            }
94
            while (e - p > MEMCHR_CUT_OFF);
95
        }
96
#endif
97
22.7M
    }
98
1.20G
    while (p < e) {
99
998M
        if (*p == ch)
100
8.61M
            return (p - s);
101
989M
        p++;
102
989M
    }
103
211M
    return -1;
104
220M
}
unicodeobject.c:ucs2lib_find_char
Line
Count
Source
51
76.1M
{
52
76.1M
    const STRINGLIB_CHAR *p, *e;
53
54
76.1M
    p = s;
55
76.1M
    e = s + n;
56
76.1M
    if (n > MEMCHR_CUT_OFF) {
57
#ifdef STRINGLIB_FAST_MEMCHR
58
        p = STRINGLIB_FAST_MEMCHR(s, ch, n);
59
        if (p != NULL)
60
            return (p - s);
61
        return -1;
62
#else
63
        /* use memchr if we can choose a needle without too many likely
64
           false positives */
65
65.9M
        const STRINGLIB_CHAR *s1, *e1;
66
65.9M
        unsigned char needle = ch & 0xff;
67
        /* If looking for a multiple of 256, we'd have too
68
           many false positives looking for the '\0' byte in UCS2
69
           and UCS4 representations. */
70
65.9M
        if (needle != 0) {
71
65.5M
            do {
72
65.5M
                void *candidate = memchr(p, needle,
73
65.5M
                                         (e - p) * sizeof(STRINGLIB_CHAR));
74
65.5M
                if (candidate == NULL)
75
452k
                    return -1;
76
65.1M
                s1 = p;
77
65.1M
                p = (const STRINGLIB_CHAR *)
78
65.1M
                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
79
65.1M
                if (*p == ch)
80
65.0M
                    return (p - s);
81
                /* False positive */
82
117k
                p++;
83
117k
                if (p - s1 > MEMCHR_CUT_OFF)
84
54.2k
                    continue;
85
62.9k
                if (e - p <= MEMCHR_CUT_OFF)
86
3.75k
                    break;
87
59.1k
                e1 = p + MEMCHR_CUT_OFF;
88
1.70M
                while (p != e1) {
89
1.67M
                    if (*p == ch)
90
27.8k
                        return (p - s);
91
1.64M
                    p++;
92
1.64M
                }
93
59.1k
            }
94
65.4M
            while (e - p > MEMCHR_CUT_OFF);
95
65.4M
        }
96
65.9M
#endif
97
65.9M
    }
98
155M
    while (p < e) {
99
152M
        if (*p == ch)
100
7.61M
            return (p - s);
101
145M
        p++;
102
145M
    }
103
3.09M
    return -1;
104
10.7M
}
unicodeobject.c:ucs4lib_find_char
Line
Count
Source
51
57.3M
{
52
57.3M
    const STRINGLIB_CHAR *p, *e;
53
54
57.3M
    p = s;
55
57.3M
    e = s + n;
56
57.3M
    if (n > MEMCHR_CUT_OFF) {
57
57.2M
#ifdef STRINGLIB_FAST_MEMCHR
58
57.2M
        p = STRINGLIB_FAST_MEMCHR(s, ch, n);
59
57.2M
        if (p != NULL)
60
57.2M
            return (p - s);
61
29.6k
        return -1;
62
#else
63
        /* use memchr if we can choose a needle without too many likely
64
           false positives */
65
        const STRINGLIB_CHAR *s1, *e1;
66
        unsigned char needle = ch & 0xff;
67
        /* If looking for a multiple of 256, we'd have too
68
           many false positives looking for the '\0' byte in UCS2
69
           and UCS4 representations. */
70
        if (needle != 0) {
71
            do {
72
                void *candidate = memchr(p, needle,
73
                                         (e - p) * sizeof(STRINGLIB_CHAR));
74
                if (candidate == NULL)
75
                    return -1;
76
                s1 = p;
77
                p = (const STRINGLIB_CHAR *)
78
                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
79
                if (*p == ch)
80
                    return (p - s);
81
                /* False positive */
82
                p++;
83
                if (p - s1 > MEMCHR_CUT_OFF)
84
                    continue;
85
                if (e - p <= MEMCHR_CUT_OFF)
86
                    break;
87
                e1 = p + MEMCHR_CUT_OFF;
88
                while (p != e1) {
89
                    if (*p == ch)
90
                        return (p - s);
91
                    p++;
92
                }
93
            }
94
            while (e - p > MEMCHR_CUT_OFF);
95
        }
96
#endif
97
57.2M
    }
98
307k
    while (p < e) {
99
274k
        if (*p == ch)
100
38.2k
            return (p - s);
101
236k
        p++;
102
236k
    }
103
33.3k
    return -1;
104
71.5k
}
unicodeobject.c:asciilib_find_char
Line
Count
Source
51
21.5M
{
52
21.5M
    const STRINGLIB_CHAR *p, *e;
53
54
21.5M
    p = s;
55
21.5M
    e = s + n;
56
21.5M
    if (n > MEMCHR_CUT_OFF) {
57
17.3M
#ifdef STRINGLIB_FAST_MEMCHR
58
17.3M
        p = STRINGLIB_FAST_MEMCHR(s, ch, n);
59
17.3M
        if (p != NULL)
60
16.4M
            return (p - s);
61
929k
        return -1;
62
#else
63
        /* use memchr if we can choose a needle without too many likely
64
           false positives */
65
        const STRINGLIB_CHAR *s1, *e1;
66
        unsigned char needle = ch & 0xff;
67
        /* If looking for a multiple of 256, we'd have too
68
           many false positives looking for the '\0' byte in UCS2
69
           and UCS4 representations. */
70
        if (needle != 0) {
71
            do {
72
                void *candidate = memchr(p, needle,
73
                                         (e - p) * sizeof(STRINGLIB_CHAR));
74
                if (candidate == NULL)
75
                    return -1;
76
                s1 = p;
77
                p = (const STRINGLIB_CHAR *)
78
                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
79
                if (*p == ch)
80
                    return (p - s);
81
                /* False positive */
82
                p++;
83
                if (p - s1 > MEMCHR_CUT_OFF)
84
                    continue;
85
                if (e - p <= MEMCHR_CUT_OFF)
86
                    break;
87
                e1 = p + MEMCHR_CUT_OFF;
88
                while (p != e1) {
89
                    if (*p == ch)
90
                        return (p - s);
91
                    p++;
92
                }
93
            }
94
            while (e - p > MEMCHR_CUT_OFF);
95
        }
96
#endif
97
17.3M
    }
98
11.5M
    while (p < e) {
99
11.5M
        if (*p == ch)
100
4.14M
            return (p - s);
101
7.38M
        p++;
102
7.38M
    }
103
62.2k
    return -1;
104
4.20M
}
bytes_methods.c:stringlib_find_char
Line
Count
Source
51
5.26M
{
52
5.26M
    const STRINGLIB_CHAR *p, *e;
53
54
5.26M
    p = s;
55
5.26M
    e = s + n;
56
5.26M
    if (n > MEMCHR_CUT_OFF) {
57
1.95M
#ifdef STRINGLIB_FAST_MEMCHR
58
1.95M
        p = STRINGLIB_FAST_MEMCHR(s, ch, n);
59
1.95M
        if (p != NULL)
60
1.52M
            return (p - s);
61
426k
        return -1;
62
#else
63
        /* use memchr if we can choose a needle without too many likely
64
           false positives */
65
        const STRINGLIB_CHAR *s1, *e1;
66
        unsigned char needle = ch & 0xff;
67
        /* If looking for a multiple of 256, we'd have too
68
           many false positives looking for the '\0' byte in UCS2
69
           and UCS4 representations. */
70
        if (needle != 0) {
71
            do {
72
                void *candidate = memchr(p, needle,
73
                                         (e - p) * sizeof(STRINGLIB_CHAR));
74
                if (candidate == NULL)
75
                    return -1;
76
                s1 = p;
77
                p = (const STRINGLIB_CHAR *)
78
                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
79
                if (*p == ch)
80
                    return (p - s);
81
                /* False positive */
82
                p++;
83
                if (p - s1 > MEMCHR_CUT_OFF)
84
                    continue;
85
                if (e - p <= MEMCHR_CUT_OFF)
86
                    break;
87
                e1 = p + MEMCHR_CUT_OFF;
88
                while (p != e1) {
89
                    if (*p == ch)
90
                        return (p - s);
91
                    p++;
92
                }
93
            }
94
            while (e - p > MEMCHR_CUT_OFF);
95
        }
96
#endif
97
1.95M
    }
98
29.1M
    while (p < e) {
99
25.9M
        if (*p == ch)
100
122k
            return (p - s);
101
25.8M
        p++;
102
25.8M
    }
103
3.18M
    return -1;
104
3.31M
}
Unexecuted instantiation: bytearrayobject.c:stringlib_find_char
105
106
#undef MEMCHR_CUT_OFF
107
108
#if STRINGLIB_SIZEOF_CHAR == 1
109
146k
#  define MEMRCHR_CUT_OFF 15
110
#else
111
467k
#  define MEMRCHR_CUT_OFF 40
112
#endif
113
114
115
Py_LOCAL_INLINE(Py_ssize_t)
116
STRINGLIB(rfind_char)(const STRINGLIB_CHAR* s, Py_ssize_t n, STRINGLIB_CHAR ch)
117
566k
{
118
566k
    const STRINGLIB_CHAR *p;
119
566k
#ifdef HAVE_MEMRCHR
120
    /* memrchr() is a GNU extension, available since glibc 2.1.91.  it
121
       doesn't seem as optimized as memchr(), but is still quite
122
       faster than our hand-written loop below. There is no wmemrchr
123
       for 4-byte chars. */
124
125
566k
    if (n > MEMRCHR_CUT_OFF) {
126
#if STRINGLIB_SIZEOF_CHAR == 1
127
        p = memrchr(s, ch, n);
128
75.0k
        if (p != NULL)
129
71.9k
            return (p - s);
130
3.09k
        return -1;
131
#else
132
        /* use memrchr if we can choose a needle without too many likely
133
           false positives */
134
        const STRINGLIB_CHAR *s1;
135
        Py_ssize_t n1;
136
        unsigned char needle = ch & 0xff;
137
        /* If looking for a multiple of 256, we'd have too
138
           many false positives looking for the '\0' byte in UCS2
139
           and UCS4 representations. */
140
350k
        if (needle != 0) {
141
362k
            do {
142
362k
                void *candidate = memrchr(s, needle,
143
362k
                                          n * sizeof(STRINGLIB_CHAR));
144
362k
                if (candidate == NULL)
145
1.16k
                    return -1;
146
361k
                n1 = n;
147
361k
                p = (const STRINGLIB_CHAR *)
148
361k
                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
149
361k
                n = p - s;
150
361k
                if (*p == ch)
151
346k
                    return n;
152
                /* False positive */
153
15.6k
                if (n1 - n > MEMRCHR_CUT_OFF)
154
6.36k
                    continue;
155
9.26k
                if (n <= MEMRCHR_CUT_OFF)
156
980
                    break;
157
8.28k
                s1 = p - MEMRCHR_CUT_OFF;
158
323k
                while (p > s1) {
159
315k
                    p--;
160
315k
                    if (*p == ch)
161
642
                        return (p - s);
162
315k
                }
163
7.63k
                n = p - s;
164
7.63k
            }
165
350k
            while (n > MEMRCHR_CUT_OFF);
166
350k
        }
167
#endif
168
425k
    }
169
143k
#endif  /* HAVE_MEMRCHR */
170
143k
    p = s + n;
171
924k
    while (p > s) {
172
873k
        p--;
173
873k
        if (*p == ch)
174
92.7k
            return (p - s);
175
873k
    }
176
50.8k
    return -1;
177
143k
}
Unexecuted instantiation: bytesobject.c:stringlib_rfind_char
unicodeobject.c:ucs1lib_rfind_char
Line
Count
Source
117
64.4k
{
118
64.4k
    const STRINGLIB_CHAR *p;
119
64.4k
#ifdef HAVE_MEMRCHR
120
    /* memrchr() is a GNU extension, available since glibc 2.1.91.  it
121
       doesn't seem as optimized as memchr(), but is still quite
122
       faster than our hand-written loop below. There is no wmemrchr
123
       for 4-byte chars. */
124
125
64.4k
    if (n > MEMRCHR_CUT_OFF) {
126
61.0k
#if STRINGLIB_SIZEOF_CHAR == 1
127
61.0k
        p = memrchr(s, ch, n);
128
61.0k
        if (p != NULL)
129
59.8k
            return (p - s);
130
1.11k
        return -1;
131
#else
132
        /* use memrchr if we can choose a needle without too many likely
133
           false positives */
134
        const STRINGLIB_CHAR *s1;
135
        Py_ssize_t n1;
136
        unsigned char needle = ch & 0xff;
137
        /* If looking for a multiple of 256, we'd have too
138
           many false positives looking for the '\0' byte in UCS2
139
           and UCS4 representations. */
140
        if (needle != 0) {
141
            do {
142
                void *candidate = memrchr(s, needle,
143
                                          n * sizeof(STRINGLIB_CHAR));
144
                if (candidate == NULL)
145
                    return -1;
146
                n1 = n;
147
                p = (const STRINGLIB_CHAR *)
148
                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
149
                n = p - s;
150
                if (*p == ch)
151
                    return n;
152
                /* False positive */
153
                if (n1 - n > MEMRCHR_CUT_OFF)
154
                    continue;
155
                if (n <= MEMRCHR_CUT_OFF)
156
                    break;
157
                s1 = p - MEMRCHR_CUT_OFF;
158
                while (p > s1) {
159
                    p--;
160
                    if (*p == ch)
161
                        return (p - s);
162
                }
163
                n = p - s;
164
            }
165
            while (n > MEMRCHR_CUT_OFF);
166
        }
167
#endif
168
61.0k
    }
169
3.40k
#endif  /* HAVE_MEMRCHR */
170
3.40k
    p = s + n;
171
10.7k
    while (p > s) {
172
9.80k
        p--;
173
9.80k
        if (*p == ch)
174
2.46k
            return (p - s);
175
9.80k
    }
176
934
    return -1;
177
3.40k
}
unicodeobject.c:ucs2lib_rfind_char
Line
Count
Source
117
296k
{
118
296k
    const STRINGLIB_CHAR *p;
119
296k
#ifdef HAVE_MEMRCHR
120
    /* memrchr() is a GNU extension, available since glibc 2.1.91.  it
121
       doesn't seem as optimized as memchr(), but is still quite
122
       faster than our hand-written loop below. There is no wmemrchr
123
       for 4-byte chars. */
124
125
296k
    if (n > MEMRCHR_CUT_OFF) {
126
#if STRINGLIB_SIZEOF_CHAR == 1
127
        p = memrchr(s, ch, n);
128
        if (p != NULL)
129
            return (p - s);
130
        return -1;
131
#else
132
        /* use memrchr if we can choose a needle without too many likely
133
           false positives */
134
284k
        const STRINGLIB_CHAR *s1;
135
284k
        Py_ssize_t n1;
136
284k
        unsigned char needle = ch & 0xff;
137
        /* If looking for a multiple of 256, we'd have too
138
           many false positives looking for the '\0' byte in UCS2
139
           and UCS4 representations. */
140
284k
        if (needle != 0) {
141
288k
            do {
142
288k
                void *candidate = memrchr(s, needle,
143
288k
                                          n * sizeof(STRINGLIB_CHAR));
144
288k
                if (candidate == NULL)
145
684
                    return -1;
146
287k
                n1 = n;
147
287k
                p = (const STRINGLIB_CHAR *)
148
287k
                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
149
287k
                n = p - s;
150
287k
                if (*p == ch)
151
282k
                    return n;
152
                /* False positive */
153
4.72k
                if (n1 - n > MEMRCHR_CUT_OFF)
154
1.37k
                    continue;
155
3.35k
                if (n <= MEMRCHR_CUT_OFF)
156
416
                    break;
157
2.93k
                s1 = p - MEMRCHR_CUT_OFF;
158
112k
                while (p > s1) {
159
110k
                    p--;
160
110k
                    if (*p == ch)
161
288
                        return (p - s);
162
110k
                }
163
2.64k
                n = p - s;
164
2.64k
            }
165
284k
            while (n > MEMRCHR_CUT_OFF);
166
284k
        }
167
284k
#endif
168
284k
    }
169
12.4k
#endif  /* HAVE_MEMRCHR */
170
12.4k
    p = s + n;
171
80.7k
    while (p > s) {
172
78.7k
        p--;
173
78.7k
        if (*p == ch)
174
10.4k
            return (p - s);
175
78.7k
    }
176
1.99k
    return -1;
177
12.4k
}
unicodeobject.c:ucs4lib_rfind_char
Line
Count
Source
117
123k
{
118
123k
    const STRINGLIB_CHAR *p;
119
123k
#ifdef HAVE_MEMRCHR
120
    /* memrchr() is a GNU extension, available since glibc 2.1.91.  it
121
       doesn't seem as optimized as memchr(), but is still quite
122
       faster than our hand-written loop below. There is no wmemrchr
123
       for 4-byte chars. */
124
125
123k
    if (n > MEMRCHR_CUT_OFF) {
126
#if STRINGLIB_SIZEOF_CHAR == 1
127
        p = memrchr(s, ch, n);
128
        if (p != NULL)
129
            return (p - s);
130
        return -1;
131
#else
132
        /* use memrchr if we can choose a needle without too many likely
133
           false positives */
134
65.6k
        const STRINGLIB_CHAR *s1;
135
65.6k
        Py_ssize_t n1;
136
65.6k
        unsigned char needle = ch & 0xff;
137
        /* If looking for a multiple of 256, we'd have too
138
           many false positives looking for the '\0' byte in UCS2
139
           and UCS4 representations. */
140
65.6k
        if (needle != 0) {
141
74.7k
            do {
142
74.7k
                void *candidate = memrchr(s, needle,
143
74.7k
                                          n * sizeof(STRINGLIB_CHAR));
144
74.7k
                if (candidate == NULL)
145
480
                    return -1;
146
74.2k
                n1 = n;
147
74.2k
                p = (const STRINGLIB_CHAR *)
148
74.2k
                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
149
74.2k
                n = p - s;
150
74.2k
                if (*p == ch)
151
63.3k
                    return n;
152
                /* False positive */
153
10.8k
                if (n1 - n > MEMRCHR_CUT_OFF)
154
4.99k
                    continue;
155
5.90k
                if (n <= MEMRCHR_CUT_OFF)
156
564
                    break;
157
5.34k
                s1 = p - MEMRCHR_CUT_OFF;
158
210k
                while (p > s1) {
159
205k
                    p--;
160
205k
                    if (*p == ch)
161
354
                        return (p - s);
162
205k
                }
163
4.99k
                n = p - s;
164
4.99k
            }
165
65.6k
            while (n > MEMRCHR_CUT_OFF);
166
65.6k
        }
167
65.6k
#endif
168
65.6k
    }
169
59.7k
#endif  /* HAVE_MEMRCHR */
170
59.7k
    p = s + n;
171
485k
    while (p > s) {
172
483k
        p--;
173
483k
        if (*p == ch)
174
58.1k
            return (p - s);
175
483k
    }
176
1.63k
    return -1;
177
59.7k
}
unicodeobject.c:asciilib_rfind_char
Line
Count
Source
117
61.7k
{
118
61.7k
    const STRINGLIB_CHAR *p;
119
61.7k
#ifdef HAVE_MEMRCHR
120
    /* memrchr() is a GNU extension, available since glibc 2.1.91.  it
121
       doesn't seem as optimized as memchr(), but is still quite
122
       faster than our hand-written loop below. There is no wmemrchr
123
       for 4-byte chars. */
124
125
61.7k
    if (n > MEMRCHR_CUT_OFF) {
126
11.4k
#if STRINGLIB_SIZEOF_CHAR == 1
127
11.4k
        p = memrchr(s, ch, n);
128
11.4k
        if (p != NULL)
129
11.2k
            return (p - s);
130
146
        return -1;
131
#else
132
        /* use memrchr if we can choose a needle without too many likely
133
           false positives */
134
        const STRINGLIB_CHAR *s1;
135
        Py_ssize_t n1;
136
        unsigned char needle = ch & 0xff;
137
        /* If looking for a multiple of 256, we'd have too
138
           many false positives looking for the '\0' byte in UCS2
139
           and UCS4 representations. */
140
        if (needle != 0) {
141
            do {
142
                void *candidate = memrchr(s, needle,
143
                                          n * sizeof(STRINGLIB_CHAR));
144
                if (candidate == NULL)
145
                    return -1;
146
                n1 = n;
147
                p = (const STRINGLIB_CHAR *)
148
                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
149
                n = p - s;
150
                if (*p == ch)
151
                    return n;
152
                /* False positive */
153
                if (n1 - n > MEMRCHR_CUT_OFF)
154
                    continue;
155
                if (n <= MEMRCHR_CUT_OFF)
156
                    break;
157
                s1 = p - MEMRCHR_CUT_OFF;
158
                while (p > s1) {
159
                    p--;
160
                    if (*p == ch)
161
                        return (p - s);
162
                }
163
                n = p - s;
164
            }
165
            while (n > MEMRCHR_CUT_OFF);
166
        }
167
#endif
168
11.4k
    }
169
50.3k
#endif  /* HAVE_MEMRCHR */
170
50.3k
    p = s + n;
171
259k
    while (p > s) {
172
225k
        p--;
173
225k
        if (*p == ch)
174
16.8k
            return (p - s);
175
225k
    }
176
33.5k
    return -1;
177
50.3k
}
bytes_methods.c:stringlib_rfind_char
Line
Count
Source
117
20.2k
{
118
20.2k
    const STRINGLIB_CHAR *p;
119
20.2k
#ifdef HAVE_MEMRCHR
120
    /* memrchr() is a GNU extension, available since glibc 2.1.91.  it
121
       doesn't seem as optimized as memchr(), but is still quite
122
       faster than our hand-written loop below. There is no wmemrchr
123
       for 4-byte chars. */
124
125
20.2k
    if (n > MEMRCHR_CUT_OFF) {
126
2.60k
#if STRINGLIB_SIZEOF_CHAR == 1
127
2.60k
        p = memrchr(s, ch, n);
128
2.60k
        if (p != NULL)
129
770
            return (p - s);
130
1.83k
        return -1;
131
#else
132
        /* use memrchr if we can choose a needle without too many likely
133
           false positives */
134
        const STRINGLIB_CHAR *s1;
135
        Py_ssize_t n1;
136
        unsigned char needle = ch & 0xff;
137
        /* If looking for a multiple of 256, we'd have too
138
           many false positives looking for the '\0' byte in UCS2
139
           and UCS4 representations. */
140
        if (needle != 0) {
141
            do {
142
                void *candidate = memrchr(s, needle,
143
                                          n * sizeof(STRINGLIB_CHAR));
144
                if (candidate == NULL)
145
                    return -1;
146
                n1 = n;
147
                p = (const STRINGLIB_CHAR *)
148
                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
149
                n = p - s;
150
                if (*p == ch)
151
                    return n;
152
                /* False positive */
153
                if (n1 - n > MEMRCHR_CUT_OFF)
154
                    continue;
155
                if (n <= MEMRCHR_CUT_OFF)
156
                    break;
157
                s1 = p - MEMRCHR_CUT_OFF;
158
                while (p > s1) {
159
                    p--;
160
                    if (*p == ch)
161
                        return (p - s);
162
                }
163
                n = p - s;
164
            }
165
            while (n > MEMRCHR_CUT_OFF);
166
        }
167
#endif
168
2.60k
    }
169
17.6k
#endif  /* HAVE_MEMRCHR */
170
17.6k
    p = s + n;
171
88.7k
    while (p > s) {
172
76.0k
        p--;
173
76.0k
        if (*p == ch)
174
4.94k
            return (p - s);
175
76.0k
    }
176
12.7k
    return -1;
177
17.6k
}
Unexecuted instantiation: bytearrayobject.c:stringlib_rfind_char
178
179
#undef MEMRCHR_CUT_OFF
180
181
/* Change to a 1 to see logging comments walk through the algorithm. */
182
#if 0 && STRINGLIB_SIZEOF_CHAR == 1
183
# define LOG(...) printf(__VA_ARGS__)
184
# define LOG_STRING(s, n) printf("\"%.*s\"", (int)(n), s)
185
# define LOG_LINEUP() do {                                         \
186
    LOG("> "); LOG_STRING(haystack, len_haystack); LOG("\n> ");    \
187
    LOG("%*s",(int)(window_last - haystack + 1 - len_needle), ""); \
188
    LOG_STRING(needle, len_needle); LOG("\n");                     \
189
} while(0)
190
#else
191
# define LOG(...)
192
# define LOG_STRING(s, n)
193
# define LOG_LINEUP()
194
#endif
195
196
Py_LOCAL_INLINE(Py_ssize_t)
197
STRINGLIB(_lex_search)(const STRINGLIB_CHAR *needle, Py_ssize_t len_needle,
198
                       Py_ssize_t *return_period, int invert_alphabet)
199
42
{
200
    /* Do a lexicographic search. Essentially this:
201
           >>> max(needle[i:] for i in range(len(needle)+1))
202
       Also find the period of the right half.   */
203
42
    Py_ssize_t max_suffix = 0;
204
42
    Py_ssize_t candidate = 1;
205
42
    Py_ssize_t k = 0;
206
    // The period of the right half.
207
42
    Py_ssize_t period = 1;
208
209
420
    while (candidate + k < len_needle) {
210
        // each loop increases candidate + k + max_suffix
211
378
        STRINGLIB_CHAR a = needle[candidate + k];
212
378
        STRINGLIB_CHAR b = needle[max_suffix + k];
213
        // check if the suffix at candidate is better than max_suffix
214
378
        if (invert_alphabet ? (b < a) : (a < b)) {
215
            // Fell short of max_suffix.
216
            // The next k + 1 characters are non-increasing
217
            // from candidate, so they won't start a maximal suffix.
218
273
            candidate += k + 1;
219
273
            k = 0;
220
            // We've ruled out any period smaller than what's
221
            // been scanned since max_suffix.
222
273
            period = candidate - max_suffix;
223
273
        }
224
105
        else if (a == b) {
225
21
            if (k + 1 != period) {
226
                // Keep scanning the equal strings
227
0
                k++;
228
0
            }
229
21
            else {
230
                // Matched a whole period.
231
                // Start matching the next period.
232
21
                candidate += period;
233
21
                k = 0;
234
21
            }
235
21
        }
236
84
        else {
237
            // Did better than max_suffix, so replace it.
238
84
            max_suffix = candidate;
239
84
            candidate++;
240
84
            k = 0;
241
84
            period = 1;
242
84
        }
243
378
    }
244
42
    *return_period = period;
245
42
    return max_suffix;
246
42
}
Unexecuted instantiation: bytesobject.c:stringlib__lex_search
Unexecuted instantiation: unicodeobject.c:asciilib__lex_search
unicodeobject.c:ucs1lib__lex_search
Line
Count
Source
199
42
{
200
    /* Do a lexicographic search. Essentially this:
201
           >>> max(needle[i:] for i in range(len(needle)+1))
202
       Also find the period of the right half.   */
203
42
    Py_ssize_t max_suffix = 0;
204
42
    Py_ssize_t candidate = 1;
205
42
    Py_ssize_t k = 0;
206
    // The period of the right half.
207
42
    Py_ssize_t period = 1;
208
209
420
    while (candidate + k < len_needle) {
210
        // each loop increases candidate + k + max_suffix
211
378
        STRINGLIB_CHAR a = needle[candidate + k];
212
378
        STRINGLIB_CHAR b = needle[max_suffix + k];
213
        // check if the suffix at candidate is better than max_suffix
214
378
        if (invert_alphabet ? (b < a) : (a < b)) {
215
            // Fell short of max_suffix.
216
            // The next k + 1 characters are non-increasing
217
            // from candidate, so they won't start a maximal suffix.
218
273
            candidate += k + 1;
219
273
            k = 0;
220
            // We've ruled out any period smaller than what's
221
            // been scanned since max_suffix.
222
273
            period = candidate - max_suffix;
223
273
        }
224
105
        else if (a == b) {
225
21
            if (k + 1 != period) {
226
                // Keep scanning the equal strings
227
0
                k++;
228
0
            }
229
21
            else {
230
                // Matched a whole period.
231
                // Start matching the next period.
232
21
                candidate += period;
233
21
                k = 0;
234
21
            }
235
21
        }
236
84
        else {
237
            // Did better than max_suffix, so replace it.
238
84
            max_suffix = candidate;
239
84
            candidate++;
240
84
            k = 0;
241
84
            period = 1;
242
84
        }
243
378
    }
244
42
    *return_period = period;
245
42
    return max_suffix;
246
42
}
Unexecuted instantiation: unicodeobject.c:ucs2lib__lex_search
Unexecuted instantiation: unicodeobject.c:ucs4lib__lex_search
Unexecuted instantiation: bytes_methods.c:stringlib__lex_search
Unexecuted instantiation: bytearrayobject.c:stringlib__lex_search
247
248
Py_LOCAL_INLINE(Py_ssize_t)
249
STRINGLIB(_factorize)(const STRINGLIB_CHAR *needle,
250
                      Py_ssize_t len_needle,
251
                      Py_ssize_t *return_period)
252
21
{
253
    /* Do a "critical factorization", making it so that:
254
       >>> needle = (left := needle[:cut]) + (right := needle[cut:])
255
       where the "local period" of the cut is maximal.
256
257
       The local period of the cut is the minimal length of a string w
258
       such that (left endswith w or w endswith left)
259
       and (right startswith w or w startswith right).
260
261
       The Critical Factorization Theorem says that this maximal local
262
       period is the global period of the string.
263
264
       Crochemore and Perrin (1991) show that this cut can be computed
265
       as the later of two cuts: one that gives a lexicographically
266
       maximal right half, and one that gives the same with the
267
       with respect to a reversed alphabet-ordering.
268
269
       This is what we want to happen:
270
           >>> x = "GCAGAGAG"
271
           >>> cut, period = factorize(x)
272
           >>> x[:cut], (right := x[cut:])
273
           ('GC', 'AGAGAG')
274
           >>> period  # right half period
275
           2
276
           >>> right[period:] == right[:-period]
277
           True
278
279
       This is how the local period lines up in the above example:
280
                GC | AGAGAG
281
           AGAGAGC = AGAGAGC
282
       The length of this minimal repetition is 7, which is indeed the
283
       period of the original string. */
284
285
21
    Py_ssize_t cut1, period1, cut2, period2, cut, period;
286
21
    cut1 = STRINGLIB(_lex_search)(needle, len_needle, &period1, 0);
287
21
    cut2 = STRINGLIB(_lex_search)(needle, len_needle, &period2, 1);
288
289
    // Take the later cut.
290
21
    if (cut1 > cut2) {
291
21
        period = period1;
292
21
        cut = cut1;
293
21
    }
294
0
    else {
295
0
        period = period2;
296
0
        cut = cut2;
297
0
    }
298
299
21
    LOG("split: "); LOG_STRING(needle, cut);
300
21
    LOG(" + "); LOG_STRING(needle + cut, len_needle - cut);
301
21
    LOG("\n");
302
303
21
    *return_period = period;
304
21
    return cut;
305
21
}
Unexecuted instantiation: bytesobject.c:stringlib__factorize
Unexecuted instantiation: unicodeobject.c:asciilib__factorize
unicodeobject.c:ucs1lib__factorize
Line
Count
Source
252
21
{
253
    /* Do a "critical factorization", making it so that:
254
       >>> needle = (left := needle[:cut]) + (right := needle[cut:])
255
       where the "local period" of the cut is maximal.
256
257
       The local period of the cut is the minimal length of a string w
258
       such that (left endswith w or w endswith left)
259
       and (right startswith w or w startswith right).
260
261
       The Critical Factorization Theorem says that this maximal local
262
       period is the global period of the string.
263
264
       Crochemore and Perrin (1991) show that this cut can be computed
265
       as the later of two cuts: one that gives a lexicographically
266
       maximal right half, and one that gives the same with the
267
       with respect to a reversed alphabet-ordering.
268
269
       This is what we want to happen:
270
           >>> x = "GCAGAGAG"
271
           >>> cut, period = factorize(x)
272
           >>> x[:cut], (right := x[cut:])
273
           ('GC', 'AGAGAG')
274
           >>> period  # right half period
275
           2
276
           >>> right[period:] == right[:-period]
277
           True
278
279
       This is how the local period lines up in the above example:
280
                GC | AGAGAG
281
           AGAGAGC = AGAGAGC
282
       The length of this minimal repetition is 7, which is indeed the
283
       period of the original string. */
284
285
21
    Py_ssize_t cut1, period1, cut2, period2, cut, period;
286
21
    cut1 = STRINGLIB(_lex_search)(needle, len_needle, &period1, 0);
287
21
    cut2 = STRINGLIB(_lex_search)(needle, len_needle, &period2, 1);
288
289
    // Take the later cut.
290
21
    if (cut1 > cut2) {
291
21
        period = period1;
292
21
        cut = cut1;
293
21
    }
294
0
    else {
295
0
        period = period2;
296
0
        cut = cut2;
297
0
    }
298
299
21
    LOG("split: "); LOG_STRING(needle, cut);
300
21
    LOG(" + "); LOG_STRING(needle + cut, len_needle - cut);
301
21
    LOG("\n");
302
303
21
    *return_period = period;
304
21
    return cut;
305
21
}
Unexecuted instantiation: unicodeobject.c:ucs2lib__factorize
Unexecuted instantiation: unicodeobject.c:ucs4lib__factorize
Unexecuted instantiation: bytes_methods.c:stringlib__factorize
Unexecuted instantiation: bytearrayobject.c:stringlib__factorize
306
307
308
231
#define SHIFT_TYPE uint8_t
309
#define MAX_SHIFT UINT8_MAX
310
311
72.1k
#define TABLE_SIZE_BITS 6u
312
72.1k
#define TABLE_SIZE (1U << TABLE_SIZE_BITS)
313
70.7k
#define TABLE_MASK (TABLE_SIZE - 1U)
314
315
typedef struct STRINGLIB(_pre) {
316
    const STRINGLIB_CHAR *needle;
317
    Py_ssize_t len_needle;
318
    Py_ssize_t cut;
319
    Py_ssize_t period;
320
    Py_ssize_t gap;
321
    int is_periodic;
322
    SHIFT_TYPE table[TABLE_SIZE];
323
} STRINGLIB(prework);
324
325
326
static void
327
STRINGLIB(_preprocess)(const STRINGLIB_CHAR *needle, Py_ssize_t len_needle,
328
                       STRINGLIB(prework) *p)
329
21
{
330
21
    p->needle = needle;
331
21
    p->len_needle = len_needle;
332
21
    p->cut = STRINGLIB(_factorize)(needle, len_needle, &(p->period));
333
21
    assert(p->period + p->cut <= len_needle);
334
21
    p->is_periodic = (0 == memcmp(needle,
335
21
                                  needle + p->period,
336
21
                                  p->cut * STRINGLIB_SIZEOF_CHAR));
337
21
    if (p->is_periodic) {
338
0
        assert(p->cut <= len_needle/2);
339
0
        assert(p->cut < p->period);
340
0
    }
341
21
    else {
342
        // A lower bound on the period
343
21
        p->period = Py_MAX(p->cut, len_needle - p->cut) + 1;
344
21
    }
345
    // The gap between the last character and the previous
346
    // occurrence of an equivalent character (modulo TABLE_SIZE)
347
21
    p->gap = len_needle;
348
21
    STRINGLIB_CHAR last = needle[len_needle - 1] & TABLE_MASK;
349
147
    for (Py_ssize_t i = len_needle - 2; i >= 0; i--) {
350
147
        STRINGLIB_CHAR x = needle[i] & TABLE_MASK;
351
147
        if (x == last) {
352
21
            p->gap = len_needle - 1 - i;
353
21
            break;
354
21
        }
355
147
    }
356
    // Fill up a compressed Boyer-Moore "Bad Character" table
357
21
    Py_ssize_t not_found_shift = Py_MIN(len_needle, MAX_SHIFT);
358
1.36k
    for (Py_ssize_t i = 0; i < (Py_ssize_t)TABLE_SIZE; i++) {
359
1.34k
        p->table[i] = Py_SAFE_DOWNCAST(not_found_shift,
360
1.34k
                                       Py_ssize_t, SHIFT_TYPE);
361
1.34k
    }
362
231
    for (Py_ssize_t i = len_needle - not_found_shift; i < len_needle; i++) {
363
210
        SHIFT_TYPE shift = Py_SAFE_DOWNCAST(len_needle - 1 - i,
364
210
                                            Py_ssize_t, SHIFT_TYPE);
365
210
        p->table[needle[i] & TABLE_MASK] = shift;
366
210
    }
367
21
}
Unexecuted instantiation: bytesobject.c:stringlib__preprocess
Unexecuted instantiation: unicodeobject.c:asciilib__preprocess
unicodeobject.c:ucs1lib__preprocess
Line
Count
Source
329
21
{
330
21
    p->needle = needle;
331
21
    p->len_needle = len_needle;
332
21
    p->cut = STRINGLIB(_factorize)(needle, len_needle, &(p->period));
333
21
    assert(p->period + p->cut <= len_needle);
334
21
    p->is_periodic = (0 == memcmp(needle,
335
21
                                  needle + p->period,
336
21
                                  p->cut * STRINGLIB_SIZEOF_CHAR));
337
21
    if (p->is_periodic) {
338
0
        assert(p->cut <= len_needle/2);
339
0
        assert(p->cut < p->period);
340
0
    }
341
21
    else {
342
        // A lower bound on the period
343
21
        p->period = Py_MAX(p->cut, len_needle - p->cut) + 1;
344
21
    }
345
    // The gap between the last character and the previous
346
    // occurrence of an equivalent character (modulo TABLE_SIZE)
347
21
    p->gap = len_needle;
348
21
    STRINGLIB_CHAR last = needle[len_needle - 1] & TABLE_MASK;
349
147
    for (Py_ssize_t i = len_needle - 2; i >= 0; i--) {
350
147
        STRINGLIB_CHAR x = needle[i] & TABLE_MASK;
351
147
        if (x == last) {
352
21
            p->gap = len_needle - 1 - i;
353
21
            break;
354
21
        }
355
147
    }
356
    // Fill up a compressed Boyer-Moore "Bad Character" table
357
21
    Py_ssize_t not_found_shift = Py_MIN(len_needle, MAX_SHIFT);
358
1.36k
    for (Py_ssize_t i = 0; i < (Py_ssize_t)TABLE_SIZE; i++) {
359
1.34k
        p->table[i] = Py_SAFE_DOWNCAST(not_found_shift,
360
1.34k
                                       Py_ssize_t, SHIFT_TYPE);
361
1.34k
    }
362
231
    for (Py_ssize_t i = len_needle - not_found_shift; i < len_needle; i++) {
363
210
        SHIFT_TYPE shift = Py_SAFE_DOWNCAST(len_needle - 1 - i,
364
210
                                            Py_ssize_t, SHIFT_TYPE);
365
210
        p->table[needle[i] & TABLE_MASK] = shift;
366
210
    }
367
21
}
Unexecuted instantiation: unicodeobject.c:ucs2lib__preprocess
Unexecuted instantiation: unicodeobject.c:ucs4lib__preprocess
Unexecuted instantiation: bytes_methods.c:stringlib__preprocess
Unexecuted instantiation: bytearrayobject.c:stringlib__preprocess
368
369
static Py_ssize_t
370
STRINGLIB(_two_way)(const STRINGLIB_CHAR *haystack, Py_ssize_t len_haystack,
371
                    STRINGLIB(prework) *p)
372
21
{
373
    // Crochemore and Perrin's (1991) Two-Way algorithm.
374
    // See http://www-igm.univ-mlv.fr/~lecroq/string/node26.html#SECTION00260
375
21
    const Py_ssize_t len_needle = p->len_needle;
376
21
    const Py_ssize_t cut = p->cut;
377
21
    Py_ssize_t period = p->period;
378
21
    const STRINGLIB_CHAR *const needle = p->needle;
379
21
    const STRINGLIB_CHAR *window_last = haystack + len_needle - 1;
380
21
    const STRINGLIB_CHAR *const haystack_end = haystack + len_haystack;
381
21
    SHIFT_TYPE *table = p->table;
382
21
    const STRINGLIB_CHAR *window;
383
21
    LOG("===== Two-way: \"%s\" in \"%s\". =====\n", needle, haystack);
384
385
21
    Py_ssize_t gap = p->gap;
386
21
    Py_ssize_t gap_jump_end = Py_MIN(len_needle, cut + gap);
387
21
    if (p->is_periodic) {
388
0
        LOG("Needle is periodic.\n");
389
0
        Py_ssize_t memory = 0;
390
0
      periodicwindowloop:
391
0
        while (window_last < haystack_end) {
392
0
            assert(memory == 0);
393
0
            for (;;) {
394
0
                LOG_LINEUP();
395
0
                Py_ssize_t shift = table[(*window_last) & TABLE_MASK];
396
0
                window_last += shift;
397
0
                if (shift == 0) {
398
0
                    break;
399
0
                }
400
0
                if (window_last >= haystack_end) {
401
0
                    return -1;
402
0
                }
403
0
                LOG("Horspool skip\n");
404
0
            }
405
0
          no_shift:
406
0
            window = window_last - len_needle + 1;
407
0
            assert((window[len_needle - 1] & TABLE_MASK) ==
408
0
                   (needle[len_needle - 1] & TABLE_MASK));
409
0
            Py_ssize_t i = Py_MAX(cut, memory);
410
0
            for (; i < len_needle; i++) {
411
0
                if (needle[i] != window[i]) {
412
0
                    if (i < gap_jump_end) {
413
0
                        LOG("Early right half mismatch: jump by gap.\n");
414
0
                        assert(gap >= i - cut + 1);
415
0
                        window_last += gap;
416
0
                    }
417
0
                    else {
418
0
                        LOG("Late right half mismatch: jump by n (>gap)\n");
419
0
                        assert(i - cut + 1 > gap);
420
0
                        window_last += i - cut + 1;
421
0
                    }
422
0
                    memory = 0;
423
0
                    goto periodicwindowloop;
424
0
                }
425
0
            }
426
0
            for (i = memory; i < cut; i++) {
427
0
                if (needle[i] != window[i]) {
428
0
                    LOG("Left half does not match.\n");
429
0
                    window_last += period;
430
0
                    memory = len_needle - period;
431
0
                    if (window_last >= haystack_end) {
432
0
                        return -1;
433
0
                    }
434
0
                    Py_ssize_t shift = table[(*window_last) & TABLE_MASK];
435
0
                    if (shift) {
436
                        // A mismatch has been identified to the right
437
                        // of where i will next start, so we can jump
438
                        // at least as far as if the mismatch occurred
439
                        // on the first comparison.
440
0
                        Py_ssize_t mem_jump = Py_MAX(cut, memory) - cut + 1;
441
0
                        LOG("Skip with Memory.\n");
442
0
                        memory = 0;
443
0
                        window_last += Py_MAX(shift, mem_jump);
444
0
                        goto periodicwindowloop;
445
0
                    }
446
0
                    goto no_shift;
447
0
                }
448
0
            }
449
0
            LOG("Found a match!\n");
450
0
            return window - haystack;
451
0
        }
452
0
    }
453
21
    else {
454
21
        period = Py_MAX(gap, period);
455
21
        LOG("Needle is not periodic.\n");
456
14.0k
      windowloop:
457
14.0k
        while (window_last < haystack_end) {
458
70.3k
            for (;;) {
459
70.3k
                LOG_LINEUP();
460
70.3k
                Py_ssize_t shift = table[(*window_last) & TABLE_MASK];
461
70.3k
                window_last += shift;
462
70.3k
                if (shift == 0) {
463
14.0k
                    break;
464
14.0k
                }
465
56.3k
                if (window_last >= haystack_end) {
466
17
                    return -1;
467
17
                }
468
56.3k
                LOG("Horspool skip\n");
469
56.3k
            }
470
14.0k
            window = window_last - len_needle + 1;
471
14.0k
            assert((window[len_needle - 1] & TABLE_MASK) ==
472
14.0k
                   (needle[len_needle - 1] & TABLE_MASK));
473
14.0k
            Py_ssize_t i = cut;
474
14.2k
            for (; i < len_needle; i++) {
475
14.1k
                if (needle[i] != window[i]) {
476
13.9k
                    if (i < gap_jump_end) {
477
13.9k
                        LOG("Early right half mismatch: jump by gap.\n");
478
13.9k
                        assert(gap >= i - cut + 1);
479
13.9k
                        window_last += gap;
480
13.9k
                    }
481
0
                    else {
482
0
                        LOG("Late right half mismatch: jump by n (>gap)\n");
483
0
                        assert(i - cut + 1 > gap);
484
0
                        window_last += i - cut + 1;
485
0
                    }
486
13.9k
                    goto windowloop;
487
13.9k
                }
488
14.1k
            }
489
113
            for (Py_ssize_t i = 0; i < cut; i++) {
490
110
                if (needle[i] != window[i]) {
491
86
                    LOG("Left half does not match.\n");
492
86
                    window_last += period;
493
86
                    goto windowloop;
494
86
                }
495
110
            }
496
3
            LOG("Found a match!\n");
497
3
            return window - haystack;
498
89
        }
499
14.0k
    }
500
1
    LOG("Not found. Returning -1.\n");
501
1
    return -1;
502
21
}
Unexecuted instantiation: bytesobject.c:stringlib__two_way
Unexecuted instantiation: unicodeobject.c:asciilib__two_way
unicodeobject.c:ucs1lib__two_way
Line
Count
Source
372
21
{
373
    // Crochemore and Perrin's (1991) Two-Way algorithm.
374
    // See http://www-igm.univ-mlv.fr/~lecroq/string/node26.html#SECTION00260
375
21
    const Py_ssize_t len_needle = p->len_needle;
376
21
    const Py_ssize_t cut = p->cut;
377
21
    Py_ssize_t period = p->period;
378
21
    const STRINGLIB_CHAR *const needle = p->needle;
379
21
    const STRINGLIB_CHAR *window_last = haystack + len_needle - 1;
380
21
    const STRINGLIB_CHAR *const haystack_end = haystack + len_haystack;
381
21
    SHIFT_TYPE *table = p->table;
382
21
    const STRINGLIB_CHAR *window;
383
21
    LOG("===== Two-way: \"%s\" in \"%s\". =====\n", needle, haystack);
384
385
21
    Py_ssize_t gap = p->gap;
386
21
    Py_ssize_t gap_jump_end = Py_MIN(len_needle, cut + gap);
387
21
    if (p->is_periodic) {
388
0
        LOG("Needle is periodic.\n");
389
0
        Py_ssize_t memory = 0;
390
0
      periodicwindowloop:
391
0
        while (window_last < haystack_end) {
392
0
            assert(memory == 0);
393
0
            for (;;) {
394
0
                LOG_LINEUP();
395
0
                Py_ssize_t shift = table[(*window_last) & TABLE_MASK];
396
0
                window_last += shift;
397
0
                if (shift == 0) {
398
0
                    break;
399
0
                }
400
0
                if (window_last >= haystack_end) {
401
0
                    return -1;
402
0
                }
403
0
                LOG("Horspool skip\n");
404
0
            }
405
0
          no_shift:
406
0
            window = window_last - len_needle + 1;
407
0
            assert((window[len_needle - 1] & TABLE_MASK) ==
408
0
                   (needle[len_needle - 1] & TABLE_MASK));
409
0
            Py_ssize_t i = Py_MAX(cut, memory);
410
0
            for (; i < len_needle; i++) {
411
0
                if (needle[i] != window[i]) {
412
0
                    if (i < gap_jump_end) {
413
0
                        LOG("Early right half mismatch: jump by gap.\n");
414
0
                        assert(gap >= i - cut + 1);
415
0
                        window_last += gap;
416
0
                    }
417
0
                    else {
418
0
                        LOG("Late right half mismatch: jump by n (>gap)\n");
419
0
                        assert(i - cut + 1 > gap);
420
0
                        window_last += i - cut + 1;
421
0
                    }
422
0
                    memory = 0;
423
0
                    goto periodicwindowloop;
424
0
                }
425
0
            }
426
0
            for (i = memory; i < cut; i++) {
427
0
                if (needle[i] != window[i]) {
428
0
                    LOG("Left half does not match.\n");
429
0
                    window_last += period;
430
0
                    memory = len_needle - period;
431
0
                    if (window_last >= haystack_end) {
432
0
                        return -1;
433
0
                    }
434
0
                    Py_ssize_t shift = table[(*window_last) & TABLE_MASK];
435
0
                    if (shift) {
436
                        // A mismatch has been identified to the right
437
                        // of where i will next start, so we can jump
438
                        // at least as far as if the mismatch occurred
439
                        // on the first comparison.
440
0
                        Py_ssize_t mem_jump = Py_MAX(cut, memory) - cut + 1;
441
0
                        LOG("Skip with Memory.\n");
442
0
                        memory = 0;
443
0
                        window_last += Py_MAX(shift, mem_jump);
444
0
                        goto periodicwindowloop;
445
0
                    }
446
0
                    goto no_shift;
447
0
                }
448
0
            }
449
0
            LOG("Found a match!\n");
450
0
            return window - haystack;
451
0
        }
452
0
    }
453
21
    else {
454
21
        period = Py_MAX(gap, period);
455
21
        LOG("Needle is not periodic.\n");
456
14.0k
      windowloop:
457
14.0k
        while (window_last < haystack_end) {
458
70.3k
            for (;;) {
459
70.3k
                LOG_LINEUP();
460
70.3k
                Py_ssize_t shift = table[(*window_last) & TABLE_MASK];
461
70.3k
                window_last += shift;
462
70.3k
                if (shift == 0) {
463
14.0k
                    break;
464
14.0k
                }
465
56.3k
                if (window_last >= haystack_end) {
466
17
                    return -1;
467
17
                }
468
56.3k
                LOG("Horspool skip\n");
469
56.3k
            }
470
14.0k
            window = window_last - len_needle + 1;
471
14.0k
            assert((window[len_needle - 1] & TABLE_MASK) ==
472
14.0k
                   (needle[len_needle - 1] & TABLE_MASK));
473
14.0k
            Py_ssize_t i = cut;
474
14.2k
            for (; i < len_needle; i++) {
475
14.1k
                if (needle[i] != window[i]) {
476
13.9k
                    if (i < gap_jump_end) {
477
13.9k
                        LOG("Early right half mismatch: jump by gap.\n");
478
13.9k
                        assert(gap >= i - cut + 1);
479
13.9k
                        window_last += gap;
480
13.9k
                    }
481
0
                    else {
482
0
                        LOG("Late right half mismatch: jump by n (>gap)\n");
483
0
                        assert(i - cut + 1 > gap);
484
0
                        window_last += i - cut + 1;
485
0
                    }
486
13.9k
                    goto windowloop;
487
13.9k
                }
488
14.1k
            }
489
113
            for (Py_ssize_t i = 0; i < cut; i++) {
490
110
                if (needle[i] != window[i]) {
491
86
                    LOG("Left half does not match.\n");
492
86
                    window_last += period;
493
86
                    goto windowloop;
494
86
                }
495
110
            }
496
3
            LOG("Found a match!\n");
497
3
            return window - haystack;
498
89
        }
499
14.0k
    }
500
1
    LOG("Not found. Returning -1.\n");
501
1
    return -1;
502
21
}
Unexecuted instantiation: unicodeobject.c:ucs2lib__two_way
Unexecuted instantiation: unicodeobject.c:ucs4lib__two_way
Unexecuted instantiation: bytes_methods.c:stringlib__two_way
Unexecuted instantiation: bytearrayobject.c:stringlib__two_way
503
504
505
static Py_ssize_t
506
STRINGLIB(_two_way_find)(const STRINGLIB_CHAR *haystack,
507
                         Py_ssize_t len_haystack,
508
                         const STRINGLIB_CHAR *needle,
509
                         Py_ssize_t len_needle)
510
21
{
511
21
    LOG("###### Finding \"%s\" in \"%s\".\n", needle, haystack);
512
21
    STRINGLIB(prework) p;
513
21
    STRINGLIB(_preprocess)(needle, len_needle, &p);
514
21
    return STRINGLIB(_two_way)(haystack, len_haystack, &p);
515
21
}
Unexecuted instantiation: bytesobject.c:stringlib__two_way_find
Unexecuted instantiation: unicodeobject.c:asciilib__two_way_find
unicodeobject.c:ucs1lib__two_way_find
Line
Count
Source
510
21
{
511
21
    LOG("###### Finding \"%s\" in \"%s\".\n", needle, haystack);
512
21
    STRINGLIB(prework) p;
513
21
    STRINGLIB(_preprocess)(needle, len_needle, &p);
514
21
    return STRINGLIB(_two_way)(haystack, len_haystack, &p);
515
21
}
Unexecuted instantiation: unicodeobject.c:ucs2lib__two_way_find
Unexecuted instantiation: unicodeobject.c:ucs4lib__two_way_find
Unexecuted instantiation: bytes_methods.c:stringlib__two_way_find
Unexecuted instantiation: bytearrayobject.c:stringlib__two_way_find
516
517
518
static Py_ssize_t
519
STRINGLIB(_two_way_count)(const STRINGLIB_CHAR *haystack,
520
                          Py_ssize_t len_haystack,
521
                          const STRINGLIB_CHAR *needle,
522
                          Py_ssize_t len_needle,
523
                          Py_ssize_t maxcount)
524
0
{
525
0
    LOG("###### Counting \"%s\" in \"%s\".\n", needle, haystack);
526
0
    STRINGLIB(prework) p;
527
0
    STRINGLIB(_preprocess)(needle, len_needle, &p);
528
0
    Py_ssize_t index = 0, count = 0;
529
0
    while (1) {
530
0
        Py_ssize_t result;
531
0
        result = STRINGLIB(_two_way)(haystack + index,
532
0
                                     len_haystack - index, &p);
533
0
        if (result == -1) {
534
0
            return count;
535
0
        }
536
0
        count++;
537
0
        if (count == maxcount) {
538
0
            return maxcount;
539
0
        }
540
0
        index += result + len_needle;
541
0
    }
542
0
    return count;
543
0
}
Unexecuted instantiation: bytesobject.c:stringlib__two_way_count
Unexecuted instantiation: unicodeobject.c:asciilib__two_way_count
Unexecuted instantiation: unicodeobject.c:ucs1lib__two_way_count
Unexecuted instantiation: unicodeobject.c:ucs2lib__two_way_count
Unexecuted instantiation: unicodeobject.c:ucs4lib__two_way_count
Unexecuted instantiation: bytes_methods.c:stringlib__two_way_count
Unexecuted instantiation: bytearrayobject.c:stringlib__two_way_count
544
545
#undef SHIFT_TYPE
546
#undef NOT_FOUND
547
#undef SHIFT_OVERFLOW
548
#undef TABLE_SIZE_BITS
549
#undef TABLE_SIZE
550
#undef TABLE_MASK
551
552
#undef LOG
553
#undef LOG_STRING
554
#undef LOG_LINEUP
555
556
static inline Py_ssize_t
557
STRINGLIB(default_find)(const STRINGLIB_CHAR* s, Py_ssize_t n,
558
                        const STRINGLIB_CHAR* p, Py_ssize_t m,
559
                        Py_ssize_t maxcount, int mode)
560
23.3M
{
561
23.3M
    const Py_ssize_t w = n - m;
562
23.3M
    Py_ssize_t mlast = m - 1, count = 0;
563
23.3M
    Py_ssize_t gap = mlast;
564
23.3M
    const STRINGLIB_CHAR last = p[mlast];
565
23.3M
    const STRINGLIB_CHAR *const ss = &s[mlast];
566
567
23.3M
    unsigned long mask = 0;
568
86.9M
    for (Py_ssize_t i = 0; i < mlast; i++) {
569
63.5M
        STRINGLIB_BLOOM_ADD(mask, p[i]);
570
63.5M
        if (p[i] == last) {
571
700k
            gap = mlast - i - 1;
572
700k
        }
573
63.5M
    }
574
23.3M
    STRINGLIB_BLOOM_ADD(mask, last);
575
576
3.31G
    for (Py_ssize_t i = 0; i <= w; i++) {
577
3.30G
        if (ss[i] == last) {
578
            /* candidate match */
579
47.9M
            Py_ssize_t j;
580
71.8M
            for (j = 0; j < mlast; j++) {
581
48.0M
                if (s[i+j] != p[j]) {
582
24.1M
                    break;
583
24.1M
                }
584
48.0M
            }
585
47.9M
            if (j == mlast) {
586
                /* got a match! */
587
23.8M
                if (mode != FAST_COUNT) {
588
12.0M
                    return i;
589
12.0M
                }
590
11.8M
                count++;
591
11.8M
                if (count == maxcount) {
592
0
                    return maxcount;
593
0
                }
594
11.8M
                i = i + mlast;
595
11.8M
                continue;
596
11.8M
            }
597
            /* miss: check if next character is part of pattern */
598
24.1M
            if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) {
599
4.97M
                i = i + m;
600
4.97M
            }
601
19.1M
            else {
602
19.1M
                i = i + gap;
603
19.1M
            }
604
24.1M
        }
605
3.25G
        else {
606
            /* skip: check if next character is part of pattern */
607
3.25G
            if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) {
608
3.17G
                i = i + m;
609
3.17G
            }
610
3.25G
        }
611
3.30G
    }
612
11.3M
    return mode == FAST_COUNT ? count : -1;
613
23.3M
}
Unexecuted instantiation: bytesobject.c:stringlib_default_find
unicodeobject.c:asciilib_default_find
Line
Count
Source
560
1.84M
{
561
1.84M
    const Py_ssize_t w = n - m;
562
1.84M
    Py_ssize_t mlast = m - 1, count = 0;
563
1.84M
    Py_ssize_t gap = mlast;
564
1.84M
    const STRINGLIB_CHAR last = p[mlast];
565
1.84M
    const STRINGLIB_CHAR *const ss = &s[mlast];
566
567
1.84M
    unsigned long mask = 0;
568
3.69M
    for (Py_ssize_t i = 0; i < mlast; i++) {
569
1.85M
        STRINGLIB_BLOOM_ADD(mask, p[i]);
570
1.85M
        if (p[i] == last) {
571
25.0k
            gap = mlast - i - 1;
572
25.0k
        }
573
1.85M
    }
574
1.84M
    STRINGLIB_BLOOM_ADD(mask, last);
575
576
172M
    for (Py_ssize_t i = 0; i <= w; i++) {
577
172M
        if (ss[i] == last) {
578
            /* candidate match */
579
4.43M
            Py_ssize_t j;
580
6.25M
            for (j = 0; j < mlast; j++) {
581
4.44M
                if (s[i+j] != p[j]) {
582
2.62M
                    break;
583
2.62M
                }
584
4.44M
            }
585
4.43M
            if (j == mlast) {
586
                /* got a match! */
587
1.81M
                if (mode != FAST_COUNT) {
588
1.81M
                    return i;
589
1.81M
                }
590
0
                count++;
591
0
                if (count == maxcount) {
592
0
                    return maxcount;
593
0
                }
594
0
                i = i + mlast;
595
0
                continue;
596
0
            }
597
            /* miss: check if next character is part of pattern */
598
2.62M
            if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) {
599
84.5k
                i = i + m;
600
84.5k
            }
601
2.54M
            else {
602
2.54M
                i = i + gap;
603
2.54M
            }
604
2.62M
        }
605
168M
        else {
606
            /* skip: check if next character is part of pattern */
607
168M
            if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) {
608
161M
                i = i + m;
609
161M
            }
610
168M
        }
611
172M
    }
612
35.7k
    return mode == FAST_COUNT ? count : -1;
613
1.84M
}
unicodeobject.c:ucs1lib_default_find
Line
Count
Source
560
13.1M
{
561
13.1M
    const Py_ssize_t w = n - m;
562
13.1M
    Py_ssize_t mlast = m - 1, count = 0;
563
13.1M
    Py_ssize_t gap = mlast;
564
13.1M
    const STRINGLIB_CHAR last = p[mlast];
565
13.1M
    const STRINGLIB_CHAR *const ss = &s[mlast];
566
567
13.1M
    unsigned long mask = 0;
568
66.3M
    for (Py_ssize_t i = 0; i < mlast; i++) {
569
53.1M
        STRINGLIB_BLOOM_ADD(mask, p[i]);
570
53.1M
        if (p[i] == last) {
571
612k
            gap = mlast - i - 1;
572
612k
        }
573
53.1M
    }
574
13.1M
    STRINGLIB_BLOOM_ADD(mask, last);
575
576
607M
    for (Py_ssize_t i = 0; i <= w; i++) {
577
596M
        if (ss[i] == last) {
578
            /* candidate match */
579
13.6M
            Py_ssize_t j;
580
19.2M
            for (j = 0; j < mlast; j++) {
581
13.6M
                if (s[i+j] != p[j]) {
582
8.01M
                    break;
583
8.01M
                }
584
13.6M
            }
585
13.6M
            if (j == mlast) {
586
                /* got a match! */
587
5.59M
                if (mode != FAST_COUNT) {
588
1.93M
                    return i;
589
1.93M
                }
590
3.66M
                count++;
591
3.66M
                if (count == maxcount) {
592
0
                    return maxcount;
593
0
                }
594
3.66M
                i = i + mlast;
595
3.66M
                continue;
596
3.66M
            }
597
            /* miss: check if next character is part of pattern */
598
8.01M
            if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) {
599
1.88M
                i = i + m;
600
1.88M
            }
601
6.13M
            else {
602
6.13M
                i = i + gap;
603
6.13M
            }
604
8.01M
        }
605
582M
        else {
606
            /* skip: check if next character is part of pattern */
607
582M
            if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) {
608
542M
                i = i + m;
609
542M
            }
610
582M
        }
611
596M
    }
612
11.1M
    return mode == FAST_COUNT ? count : -1;
613
13.1M
}
unicodeobject.c:ucs2lib_default_find
Line
Count
Source
560
3.83M
{
561
3.83M
    const Py_ssize_t w = n - m;
562
3.83M
    Py_ssize_t mlast = m - 1, count = 0;
563
3.83M
    Py_ssize_t gap = mlast;
564
3.83M
    const STRINGLIB_CHAR last = p[mlast];
565
3.83M
    const STRINGLIB_CHAR *const ss = &s[mlast];
566
567
3.83M
    unsigned long mask = 0;
568
7.70M
    for (Py_ssize_t i = 0; i < mlast; i++) {
569
3.87M
        STRINGLIB_BLOOM_ADD(mask, p[i]);
570
3.87M
        if (p[i] == last) {
571
37.1k
            gap = mlast - i - 1;
572
37.1k
        }
573
3.87M
    }
574
3.83M
    STRINGLIB_BLOOM_ADD(mask, last);
575
576
1.06G
    for (Py_ssize_t i = 0; i <= w; i++) {
577
1.06G
        if (ss[i] == last) {
578
            /* candidate match */
579
15.6M
            Py_ssize_t j;
580
23.0M
            for (j = 0; j < mlast; j++) {
581
15.7M
                if (s[i+j] != p[j]) {
582
8.34M
                    break;
583
8.34M
                }
584
15.7M
            }
585
15.6M
            if (j == mlast) {
586
                /* got a match! */
587
7.34M
                if (mode != FAST_COUNT) {
588
3.73M
                    return i;
589
3.73M
                }
590
3.61M
                count++;
591
3.61M
                if (count == maxcount) {
592
0
                    return maxcount;
593
0
                }
594
3.61M
                i = i + mlast;
595
3.61M
                continue;
596
3.61M
            }
597
            /* miss: check if next character is part of pattern */
598
8.34M
            if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) {
599
1.15M
                i = i + m;
600
1.15M
            }
601
7.19M
            else {
602
7.19M
                i = i + gap;
603
7.19M
            }
604
8.34M
        }
605
1.04G
        else {
606
            /* skip: check if next character is part of pattern */
607
1.04G
            if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) {
608
1.02G
                i = i + m;
609
1.02G
            }
610
1.04G
        }
611
1.06G
    }
612
100k
    return mode == FAST_COUNT ? count : -1;
613
3.83M
}
unicodeobject.c:ucs4lib_default_find
Line
Count
Source
560
4.58M
{
561
4.58M
    const Py_ssize_t w = n - m;
562
4.58M
    Py_ssize_t mlast = m - 1, count = 0;
563
4.58M
    Py_ssize_t gap = mlast;
564
4.58M
    const STRINGLIB_CHAR last = p[mlast];
565
4.58M
    const STRINGLIB_CHAR *const ss = &s[mlast];
566
567
4.58M
    unsigned long mask = 0;
568
9.19M
    for (Py_ssize_t i = 0; i < mlast; i++) {
569
4.61M
        STRINGLIB_BLOOM_ADD(mask, p[i]);
570
4.61M
        if (p[i] == last) {
571
22.5k
            gap = mlast - i - 1;
572
22.5k
        }
573
4.61M
    }
574
4.58M
    STRINGLIB_BLOOM_ADD(mask, last);
575
576
1.46G
    for (Py_ssize_t i = 0; i <= w; i++) {
577
1.46G
        if (ss[i] == last) {
578
            /* candidate match */
579
14.2M
            Py_ssize_t j;
580
23.3M
            for (j = 0; j < mlast; j++) {
581
14.2M
                if (s[i+j] != p[j]) {
582
5.13M
                    break;
583
5.13M
                }
584
14.2M
            }
585
14.2M
            if (j == mlast) {
586
                /* got a match! */
587
9.08M
                if (mode != FAST_COUNT) {
588
4.56M
                    return i;
589
4.56M
                }
590
4.52M
                count++;
591
4.52M
                if (count == maxcount) {
592
0
                    return maxcount;
593
0
                }
594
4.52M
                i = i + mlast;
595
4.52M
                continue;
596
4.52M
            }
597
            /* miss: check if next character is part of pattern */
598
5.13M
            if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) {
599
1.85M
                i = i + m;
600
1.85M
            }
601
3.27M
            else {
602
3.27M
                i = i + gap;
603
3.27M
            }
604
5.13M
        }
605
1.45G
        else {
606
            /* skip: check if next character is part of pattern */
607
1.45G
            if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) {
608
1.44G
                i = i + m;
609
1.44G
            }
610
1.45G
        }
611
1.46G
    }
612
27.1k
    return mode == FAST_COUNT ? count : -1;
613
4.58M
}
bytes_methods.c:stringlib_default_find
Line
Count
Source
560
2.76k
{
561
2.76k
    const Py_ssize_t w = n - m;
562
2.76k
    Py_ssize_t mlast = m - 1, count = 0;
563
2.76k
    Py_ssize_t gap = mlast;
564
2.76k
    const STRINGLIB_CHAR last = p[mlast];
565
2.76k
    const STRINGLIB_CHAR *const ss = &s[mlast];
566
567
2.76k
    unsigned long mask = 0;
568
11.0k
    for (Py_ssize_t i = 0; i < mlast; i++) {
569
8.29k
        STRINGLIB_BLOOM_ADD(mask, p[i]);
570
8.29k
        if (p[i] == last) {
571
2.76k
            gap = mlast - i - 1;
572
2.76k
        }
573
8.29k
    }
574
2.76k
    STRINGLIB_BLOOM_ADD(mask, last);
575
576
617k
    for (Py_ssize_t i = 0; i <= w; i++) {
577
617k
        if (ss[i] == last) {
578
            /* candidate match */
579
8.39k
            Py_ssize_t j;
580
16.2k
            for (j = 0; j < mlast; j++) {
581
13.7k
                if (s[i+j] != p[j]) {
582
5.86k
                    break;
583
5.86k
                }
584
13.7k
            }
585
8.39k
            if (j == mlast) {
586
                /* got a match! */
587
2.53k
                if (mode != FAST_COUNT) {
588
2.53k
                    return i;
589
2.53k
                }
590
0
                count++;
591
0
                if (count == maxcount) {
592
0
                    return maxcount;
593
0
                }
594
0
                i = i + mlast;
595
0
                continue;
596
0
            }
597
            /* miss: check if next character is part of pattern */
598
5.86k
            if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) {
599
568
                i = i + m;
600
568
            }
601
5.29k
            else {
602
5.29k
                i = i + gap;
603
5.29k
            }
604
5.86k
        }
605
609k
        else {
606
            /* skip: check if next character is part of pattern */
607
609k
            if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) {
608
31.4k
                i = i + m;
609
31.4k
            }
610
609k
        }
611
617k
    }
612
234
    return mode == FAST_COUNT ? count : -1;
613
2.76k
}
Unexecuted instantiation: bytearrayobject.c:stringlib_default_find
614
615
616
static Py_ssize_t
617
STRINGLIB(adaptive_find)(const STRINGLIB_CHAR* s, Py_ssize_t n,
618
                         const STRINGLIB_CHAR* p, Py_ssize_t m,
619
                         Py_ssize_t maxcount, int mode)
620
0
{
621
0
    const Py_ssize_t w = n - m;
622
0
    Py_ssize_t mlast = m - 1, count = 0;
623
0
    Py_ssize_t gap = mlast;
624
0
    Py_ssize_t hits = 0, res;
625
0
    const STRINGLIB_CHAR last = p[mlast];
626
0
    const STRINGLIB_CHAR *const ss = &s[mlast];
627
628
0
    unsigned long mask = 0;
629
0
    for (Py_ssize_t i = 0; i < mlast; i++) {
630
0
        STRINGLIB_BLOOM_ADD(mask, p[i]);
631
0
        if (p[i] == last) {
632
0
            gap = mlast - i - 1;
633
0
        }
634
0
    }
635
0
    STRINGLIB_BLOOM_ADD(mask, last);
636
637
0
    for (Py_ssize_t i = 0; i <= w; i++) {
638
0
        if (ss[i] == last) {
639
            /* candidate match */
640
0
            Py_ssize_t j;
641
0
            for (j = 0; j < mlast; j++) {
642
0
                if (s[i+j] != p[j]) {
643
0
                    break;
644
0
                }
645
0
            }
646
0
            if (j == mlast) {
647
                /* got a match! */
648
0
                if (mode != FAST_COUNT) {
649
0
                    return i;
650
0
                }
651
0
                count++;
652
0
                if (count == maxcount) {
653
0
                    return maxcount;
654
0
                }
655
0
                i = i + mlast;
656
0
                continue;
657
0
            }
658
0
            hits += j + 1;
659
0
            if (hits > m / 4 && w - i > 2000) {
660
0
                if (mode == FAST_SEARCH) {
661
0
                    res = STRINGLIB(_two_way_find)(s + i, n - i, p, m);
662
0
                    return res == -1 ? -1 : res + i;
663
0
                }
664
0
                else {
665
0
                    res = STRINGLIB(_two_way_count)(s + i, n - i, p, m,
666
0
                                                    maxcount - count);
667
0
                    return res + count;
668
0
                }
669
0
            }
670
            /* miss: check if next character is part of pattern */
671
0
            if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) {
672
0
                i = i + m;
673
0
            }
674
0
            else {
675
0
                i = i + gap;
676
0
            }
677
0
        }
678
0
        else {
679
            /* skip: check if next character is part of pattern */
680
0
            if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) {
681
0
                i = i + m;
682
0
            }
683
0
        }
684
0
    }
685
0
    return mode == FAST_COUNT ? count : -1;
686
0
}
Unexecuted instantiation: bytesobject.c:stringlib_adaptive_find
Unexecuted instantiation: unicodeobject.c:asciilib_adaptive_find
Unexecuted instantiation: unicodeobject.c:ucs1lib_adaptive_find
Unexecuted instantiation: unicodeobject.c:ucs2lib_adaptive_find
Unexecuted instantiation: unicodeobject.c:ucs4lib_adaptive_find
Unexecuted instantiation: bytes_methods.c:stringlib_adaptive_find
Unexecuted instantiation: bytearrayobject.c:stringlib_adaptive_find
687
688
689
static Py_ssize_t
690
STRINGLIB(default_rfind)(const STRINGLIB_CHAR* s, Py_ssize_t n,
691
                         const STRINGLIB_CHAR* p, Py_ssize_t m,
692
                         Py_ssize_t maxcount, int mode)
693
6.55k
{
694
    /* create compressed boyer-moore delta 1 table */
695
6.55k
    unsigned long mask = 0;
696
6.55k
    Py_ssize_t i, j, mlast = m - 1, skip = m - 1, w = n - m;
697
698
    /* process pattern[0] outside the loop */
699
6.55k
    STRINGLIB_BLOOM_ADD(mask, p[0]);
700
    /* process pattern[:0:-1] */
701
26.2k
    for (i = mlast; i > 0; i--) {
702
19.6k
        STRINGLIB_BLOOM_ADD(mask, p[i]);
703
19.6k
        if (p[i] == p[0]) {
704
0
            skip = i - 1;
705
0
        }
706
19.6k
    }
707
708
1.64M
    for (i = w; i >= 0; i--) {
709
1.64M
        if (s[i] == p[0]) {
710
            /* candidate match */
711
52.4k
            for (j = mlast; j > 0; j--) {
712
46.0k
                if (s[i+j] != p[j]) {
713
26.2k
                    break;
714
26.2k
                }
715
46.0k
            }
716
32.6k
            if (j == 0) {
717
                /* got a match! */
718
6.46k
                return i;
719
6.46k
            }
720
            /* miss: check if previous character is part of pattern */
721
26.2k
            if (i > 0 && !STRINGLIB_BLOOM(mask, s[i-1])) {
722
24.3k
                i = i - m;
723
24.3k
            }
724
1.85k
            else {
725
1.85k
                i = i - skip;
726
1.85k
            }
727
26.2k
        }
728
1.61M
        else {
729
            /* skip: check if previous character is part of pattern */
730
1.61M
            if (i > 0 && !STRINGLIB_BLOOM(mask, s[i-1])) {
731
1.51M
                i = i - m;
732
1.51M
            }
733
1.61M
        }
734
1.64M
    }
735
97
    return -1;
736
6.55k
}
Unexecuted instantiation: bytesobject.c:stringlib_default_rfind
Unexecuted instantiation: unicodeobject.c:asciilib_default_rfind
Unexecuted instantiation: unicodeobject.c:ucs1lib_default_rfind
Unexecuted instantiation: unicodeobject.c:ucs2lib_default_rfind
Unexecuted instantiation: unicodeobject.c:ucs4lib_default_rfind
bytes_methods.c:stringlib_default_rfind
Line
Count
Source
693
6.55k
{
694
    /* create compressed boyer-moore delta 1 table */
695
6.55k
    unsigned long mask = 0;
696
6.55k
    Py_ssize_t i, j, mlast = m - 1, skip = m - 1, w = n - m;
697
698
    /* process pattern[0] outside the loop */
699
6.55k
    STRINGLIB_BLOOM_ADD(mask, p[0]);
700
    /* process pattern[:0:-1] */
701
26.2k
    for (i = mlast; i > 0; i--) {
702
19.6k
        STRINGLIB_BLOOM_ADD(mask, p[i]);
703
19.6k
        if (p[i] == p[0]) {
704
0
            skip = i - 1;
705
0
        }
706
19.6k
    }
707
708
1.64M
    for (i = w; i >= 0; i--) {
709
1.64M
        if (s[i] == p[0]) {
710
            /* candidate match */
711
52.4k
            for (j = mlast; j > 0; j--) {
712
46.0k
                if (s[i+j] != p[j]) {
713
26.2k
                    break;
714
26.2k
                }
715
46.0k
            }
716
32.6k
            if (j == 0) {
717
                /* got a match! */
718
6.46k
                return i;
719
6.46k
            }
720
            /* miss: check if previous character is part of pattern */
721
26.2k
            if (i > 0 && !STRINGLIB_BLOOM(mask, s[i-1])) {
722
24.3k
                i = i - m;
723
24.3k
            }
724
1.85k
            else {
725
1.85k
                i = i - skip;
726
1.85k
            }
727
26.2k
        }
728
1.61M
        else {
729
            /* skip: check if previous character is part of pattern */
730
1.61M
            if (i > 0 && !STRINGLIB_BLOOM(mask, s[i-1])) {
731
1.51M
                i = i - m;
732
1.51M
            }
733
1.61M
        }
734
1.64M
    }
735
97
    return -1;
736
6.55k
}
Unexecuted instantiation: bytearrayobject.c:stringlib_default_rfind
737
738
739
static inline Py_ssize_t
740
STRINGLIB(count_char)(const STRINGLIB_CHAR *s, Py_ssize_t n,
741
                      const STRINGLIB_CHAR p0, Py_ssize_t maxcount)
742
0
{
743
0
    Py_ssize_t i, count = 0;
744
0
    for (i = 0; i < n; i++) {
745
0
        if (s[i] == p0) {
746
0
            count++;
747
0
            if (count == maxcount) {
748
0
                return maxcount;
749
0
            }
750
0
        }
751
0
    }
752
0
    return count;
753
0
}
Unexecuted instantiation: bytesobject.c:stringlib_count_char
Unexecuted instantiation: unicodeobject.c:asciilib_count_char
Unexecuted instantiation: unicodeobject.c:ucs1lib_count_char
Unexecuted instantiation: unicodeobject.c:ucs2lib_count_char
Unexecuted instantiation: unicodeobject.c:ucs4lib_count_char
Unexecuted instantiation: bytes_methods.c:stringlib_count_char
Unexecuted instantiation: bytearrayobject.c:stringlib_count_char
754
755
756
static inline Py_ssize_t
757
STRINGLIB(count_char_no_maxcount)(const STRINGLIB_CHAR *s, Py_ssize_t n,
758
                                  const STRINGLIB_CHAR p0)
759
/* A specialized function of count_char that does not cut off at a maximum.
760
   As a result, the compiler is able to vectorize the loop. */
761
65.5M
{
762
65.5M
    Py_ssize_t count = 0;
763
8.05G
    for (Py_ssize_t i = 0; i < n; i++) {
764
7.99G
        if (s[i] == p0) {
765
225M
            count++;
766
225M
        }
767
7.99G
    }
768
65.5M
    return count;
769
65.5M
}
Unexecuted instantiation: bytesobject.c:stringlib_count_char_no_maxcount
Unexecuted instantiation: unicodeobject.c:asciilib_count_char_no_maxcount
unicodeobject.c:ucs1lib_count_char_no_maxcount
Line
Count
Source
761
48.7M
{
762
48.7M
    Py_ssize_t count = 0;
763
1.64G
    for (Py_ssize_t i = 0; i < n; i++) {
764
1.59G
        if (s[i] == p0) {
765
45.7M
            count++;
766
45.7M
        }
767
1.59G
    }
768
48.7M
    return count;
769
48.7M
}
unicodeobject.c:ucs2lib_count_char_no_maxcount
Line
Count
Source
761
10.5M
{
762
10.5M
    Py_ssize_t count = 0;
763
2.10G
    for (Py_ssize_t i = 0; i < n; i++) {
764
2.09G
        if (s[i] == p0) {
765
77.7M
            count++;
766
77.7M
        }
767
2.09G
    }
768
10.5M
    return count;
769
10.5M
}
unicodeobject.c:ucs4lib_count_char_no_maxcount
Line
Count
Source
761
1.96M
{
762
1.96M
    Py_ssize_t count = 0;
763
2.12G
    for (Py_ssize_t i = 0; i < n; i++) {
764
2.12G
        if (s[i] == p0) {
765
64.7M
            count++;
766
64.7M
        }
767
2.12G
    }
768
1.96M
    return count;
769
1.96M
}
bytes_methods.c:stringlib_count_char_no_maxcount
Line
Count
Source
761
4.24M
{
762
4.24M
    Py_ssize_t count = 0;
763
2.17G
    for (Py_ssize_t i = 0; i < n; i++) {
764
2.17G
        if (s[i] == p0) {
765
36.8M
            count++;
766
36.8M
        }
767
2.17G
    }
768
4.24M
    return count;
769
4.24M
}
Unexecuted instantiation: bytearrayobject.c:stringlib_count_char_no_maxcount
770
771
772
Py_LOCAL_INLINE(Py_ssize_t)
773
FASTSEARCH(const STRINGLIB_CHAR* s, Py_ssize_t n,
774
           const STRINGLIB_CHAR* p, Py_ssize_t m,
775
           Py_ssize_t maxcount, int mode)
776
240M
{
777
240M
    if (n < m || (mode == FAST_COUNT && maxcount == 0)) {
778
12.6k
        return -1;
779
12.6k
    }
780
781
    /* look for special cases */
782
240M
    if (m <= 1) {
783
217M
        if (m <= 0) {
784
0
            return -1;
785
0
        }
786
        /* use special case for 1-character strings */
787
217M
        if (mode == FAST_SEARCH)
788
151M
            return STRINGLIB(find_char)(s, n, p[0]);
789
65.6M
        else if (mode == FAST_RSEARCH)
790
61.7k
            return STRINGLIB(rfind_char)(s, n, p[0]);
791
65.5M
        else {
792
65.5M
            if (maxcount == PY_SSIZE_T_MAX) {
793
65.5M
                return STRINGLIB(count_char_no_maxcount)(s, n, p[0]);
794
65.5M
            }
795
0
            return STRINGLIB(count_char)(s, n, p[0], maxcount);
796
65.5M
        }
797
217M
    }
798
799
23.3M
    if (mode != FAST_RSEARCH) {
800
23.3M
        if (n < 2500 || (m < 100 && n < 30000) || m < 6) {
801
23.3M
            return STRINGLIB(default_find)(s, n, p, m, maxcount, mode);
802
23.3M
        }
803
21
        else if ((m >> 2) * 3 < (n >> 2)) {
804
            /* 33% threshold, but don't overflow. */
805
            /* For larger problems where the needle isn't a huge
806
               percentage of the size of the haystack, the relatively
807
               expensive O(m) startup cost of the two-way algorithm
808
               will surely pay off. */
809
21
            if (mode == FAST_SEARCH) {
810
21
                return STRINGLIB(_two_way_find)(s, n, p, m);
811
21
            }
812
0
            else {
813
0
                return STRINGLIB(_two_way_count)(s, n, p, m, maxcount);
814
0
            }
815
21
        }
816
0
        else {
817
            /* To ensure that we have good worst-case behavior,
818
               here's an adaptive version of the algorithm, where if
819
               we match O(m) characters without any matches of the
820
               entire needle, then we predict that the startup cost of
821
               the two-way algorithm will probably be worth it. */
822
0
            return STRINGLIB(adaptive_find)(s, n, p, m, maxcount, mode);
823
0
        }
824
23.3M
    }
825
6.55k
    else {
826
        /* FAST_RSEARCH */
827
6.55k
        return STRINGLIB(default_rfind)(s, n, p, m, maxcount, mode);
828
6.55k
    }
829
23.3M
}
bytesobject.c:fastsearch
Line
Count
Source
776
3.46k
{
777
3.46k
    if (n < m || (mode == FAST_COUNT && maxcount == 0)) {
778
0
        return -1;
779
0
    }
780
781
    /* look for special cases */
782
3.46k
    if (m <= 1) {
783
3.46k
        if (m <= 0) {
784
0
            return -1;
785
0
        }
786
        /* use special case for 1-character strings */
787
3.46k
        if (mode == FAST_SEARCH)
788
3.46k
            return STRINGLIB(find_char)(s, n, p[0]);
789
0
        else if (mode == FAST_RSEARCH)
790
0
            return STRINGLIB(rfind_char)(s, n, p[0]);
791
0
        else {
792
0
            if (maxcount == PY_SSIZE_T_MAX) {
793
0
                return STRINGLIB(count_char_no_maxcount)(s, n, p[0]);
794
0
            }
795
0
            return STRINGLIB(count_char)(s, n, p[0], maxcount);
796
0
        }
797
3.46k
    }
798
799
0
    if (mode != FAST_RSEARCH) {
800
0
        if (n < 2500 || (m < 100 && n < 30000) || m < 6) {
801
0
            return STRINGLIB(default_find)(s, n, p, m, maxcount, mode);
802
0
        }
803
0
        else if ((m >> 2) * 3 < (n >> 2)) {
804
            /* 33% threshold, but don't overflow. */
805
            /* For larger problems where the needle isn't a huge
806
               percentage of the size of the haystack, the relatively
807
               expensive O(m) startup cost of the two-way algorithm
808
               will surely pay off. */
809
0
            if (mode == FAST_SEARCH) {
810
0
                return STRINGLIB(_two_way_find)(s, n, p, m);
811
0
            }
812
0
            else {
813
0
                return STRINGLIB(_two_way_count)(s, n, p, m, maxcount);
814
0
            }
815
0
        }
816
0
        else {
817
            /* To ensure that we have good worst-case behavior,
818
               here's an adaptive version of the algorithm, where if
819
               we match O(m) characters without any matches of the
820
               entire needle, then we predict that the startup cost of
821
               the two-way algorithm will probably be worth it. */
822
0
            return STRINGLIB(adaptive_find)(s, n, p, m, maxcount, mode);
823
0
        }
824
0
    }
825
0
    else {
826
        /* FAST_RSEARCH */
827
0
        return STRINGLIB(default_rfind)(s, n, p, m, maxcount, mode);
828
0
    }
829
0
}
unicodeobject.c:asciilib_fastsearch
Line
Count
Source
776
23.4M
{
777
23.4M
    if (n < m || (mode == FAST_COUNT && maxcount == 0)) {
778
0
        return -1;
779
0
    }
780
781
    /* look for special cases */
782
23.4M
    if (m <= 1) {
783
21.6M
        if (m <= 0) {
784
0
            return -1;
785
0
        }
786
        /* use special case for 1-character strings */
787
21.6M
        if (mode == FAST_SEARCH)
788
21.5M
            return STRINGLIB(find_char)(s, n, p[0]);
789
61.7k
        else if (mode == FAST_RSEARCH)
790
61.7k
            return STRINGLIB(rfind_char)(s, n, p[0]);
791
0
        else {
792
0
            if (maxcount == PY_SSIZE_T_MAX) {
793
0
                return STRINGLIB(count_char_no_maxcount)(s, n, p[0]);
794
0
            }
795
0
            return STRINGLIB(count_char)(s, n, p[0], maxcount);
796
0
        }
797
21.6M
    }
798
799
1.84M
    if (mode != FAST_RSEARCH) {
800
1.84M
        if (n < 2500 || (m < 100 && n < 30000) || m < 6) {
801
1.84M
            return STRINGLIB(default_find)(s, n, p, m, maxcount, mode);
802
1.84M
        }
803
0
        else if ((m >> 2) * 3 < (n >> 2)) {
804
            /* 33% threshold, but don't overflow. */
805
            /* For larger problems where the needle isn't a huge
806
               percentage of the size of the haystack, the relatively
807
               expensive O(m) startup cost of the two-way algorithm
808
               will surely pay off. */
809
0
            if (mode == FAST_SEARCH) {
810
0
                return STRINGLIB(_two_way_find)(s, n, p, m);
811
0
            }
812
0
            else {
813
0
                return STRINGLIB(_two_way_count)(s, n, p, m, maxcount);
814
0
            }
815
0
        }
816
0
        else {
817
            /* To ensure that we have good worst-case behavior,
818
               here's an adaptive version of the algorithm, where if
819
               we match O(m) characters without any matches of the
820
               entire needle, then we predict that the startup cost of
821
               the two-way algorithm will probably be worth it. */
822
0
            return STRINGLIB(adaptive_find)(s, n, p, m, maxcount, mode);
823
0
        }
824
1.84M
    }
825
0
    else {
826
        /* FAST_RSEARCH */
827
0
        return STRINGLIB(default_rfind)(s, n, p, m, maxcount, mode);
828
0
    }
829
1.84M
}
unicodeobject.c:ucs1lib_fastsearch
Line
Count
Source
776
69.8M
{
777
69.8M
    if (n < m || (mode == FAST_COUNT && maxcount == 0)) {
778
0
        return -1;
779
0
    }
780
781
    /* look for special cases */
782
69.8M
    if (m <= 1) {
783
56.7M
        if (m <= 0) {
784
0
            return -1;
785
0
        }
786
        /* use special case for 1-character strings */
787
56.7M
        if (mode == FAST_SEARCH)
788
7.97M
            return STRINGLIB(find_char)(s, n, p[0]);
789
48.7M
        else if (mode == FAST_RSEARCH)
790
0
            return STRINGLIB(rfind_char)(s, n, p[0]);
791
48.7M
        else {
792
48.7M
            if (maxcount == PY_SSIZE_T_MAX) {
793
48.7M
                return STRINGLIB(count_char_no_maxcount)(s, n, p[0]);
794
48.7M
            }
795
0
            return STRINGLIB(count_char)(s, n, p[0], maxcount);
796
48.7M
        }
797
56.7M
    }
798
799
13.1M
    if (mode != FAST_RSEARCH) {
800
13.1M
        if (n < 2500 || (m < 100 && n < 30000) || m < 6) {
801
13.1M
            return STRINGLIB(default_find)(s, n, p, m, maxcount, mode);
802
13.1M
        }
803
21
        else if ((m >> 2) * 3 < (n >> 2)) {
804
            /* 33% threshold, but don't overflow. */
805
            /* For larger problems where the needle isn't a huge
806
               percentage of the size of the haystack, the relatively
807
               expensive O(m) startup cost of the two-way algorithm
808
               will surely pay off. */
809
21
            if (mode == FAST_SEARCH) {
810
21
                return STRINGLIB(_two_way_find)(s, n, p, m);
811
21
            }
812
0
            else {
813
0
                return STRINGLIB(_two_way_count)(s, n, p, m, maxcount);
814
0
            }
815
21
        }
816
0
        else {
817
            /* To ensure that we have good worst-case behavior,
818
               here's an adaptive version of the algorithm, where if
819
               we match O(m) characters without any matches of the
820
               entire needle, then we predict that the startup cost of
821
               the two-way algorithm will probably be worth it. */
822
0
            return STRINGLIB(adaptive_find)(s, n, p, m, maxcount, mode);
823
0
        }
824
13.1M
    }
825
0
    else {
826
        /* FAST_RSEARCH */
827
0
        return STRINGLIB(default_rfind)(s, n, p, m, maxcount, mode);
828
0
    }
829
13.1M
}
unicodeobject.c:ucs2lib_fastsearch
Line
Count
Source
776
81.1M
{
777
81.1M
    if (n < m || (mode == FAST_COUNT && maxcount == 0)) {
778
12.6k
        return -1;
779
12.6k
    }
780
781
    /* look for special cases */
782
81.1M
    if (m <= 1) {
783
77.3M
        if (m <= 0) {
784
0
            return -1;
785
0
        }
786
        /* use special case for 1-character strings */
787
77.3M
        if (mode == FAST_SEARCH)
788
66.7M
            return STRINGLIB(find_char)(s, n, p[0]);
789
10.5M
        else if (mode == FAST_RSEARCH)
790
0
            return STRINGLIB(rfind_char)(s, n, p[0]);
791
10.5M
        else {
792
10.5M
            if (maxcount == PY_SSIZE_T_MAX) {
793
10.5M
                return STRINGLIB(count_char_no_maxcount)(s, n, p[0]);
794
10.5M
            }
795
0
            return STRINGLIB(count_char)(s, n, p[0], maxcount);
796
10.5M
        }
797
77.3M
    }
798
799
3.83M
    if (mode != FAST_RSEARCH) {
800
3.83M
        if (n < 2500 || (m < 100 && n < 30000) || m < 6) {
801
3.83M
            return STRINGLIB(default_find)(s, n, p, m, maxcount, mode);
802
3.83M
        }
803
0
        else if ((m >> 2) * 3 < (n >> 2)) {
804
            /* 33% threshold, but don't overflow. */
805
            /* For larger problems where the needle isn't a huge
806
               percentage of the size of the haystack, the relatively
807
               expensive O(m) startup cost of the two-way algorithm
808
               will surely pay off. */
809
0
            if (mode == FAST_SEARCH) {
810
0
                return STRINGLIB(_two_way_find)(s, n, p, m);
811
0
            }
812
0
            else {
813
0
                return STRINGLIB(_two_way_count)(s, n, p, m, maxcount);
814
0
            }
815
0
        }
816
0
        else {
817
            /* To ensure that we have good worst-case behavior,
818
               here's an adaptive version of the algorithm, where if
819
               we match O(m) characters without any matches of the
820
               entire needle, then we predict that the startup cost of
821
               the two-way algorithm will probably be worth it. */
822
0
            return STRINGLIB(adaptive_find)(s, n, p, m, maxcount, mode);
823
0
        }
824
3.83M
    }
825
0
    else {
826
        /* FAST_RSEARCH */
827
0
        return STRINGLIB(default_rfind)(s, n, p, m, maxcount, mode);
828
0
    }
829
3.83M
}
unicodeobject.c:ucs4lib_fastsearch
Line
Count
Source
776
61.8M
{
777
61.8M
    if (n < m || (mode == FAST_COUNT && maxcount == 0)) {
778
0
        return -1;
779
0
    }
780
781
    /* look for special cases */
782
61.8M
    if (m <= 1) {
783
57.3M
        if (m <= 0) {
784
0
            return -1;
785
0
        }
786
        /* use special case for 1-character strings */
787
57.3M
        if (mode == FAST_SEARCH)
788
55.3M
            return STRINGLIB(find_char)(s, n, p[0]);
789
1.96M
        else if (mode == FAST_RSEARCH)
790
0
            return STRINGLIB(rfind_char)(s, n, p[0]);
791
1.96M
        else {
792
1.96M
            if (maxcount == PY_SSIZE_T_MAX) {
793
1.96M
                return STRINGLIB(count_char_no_maxcount)(s, n, p[0]);
794
1.96M
            }
795
0
            return STRINGLIB(count_char)(s, n, p[0], maxcount);
796
1.96M
        }
797
57.3M
    }
798
799
4.58M
    if (mode != FAST_RSEARCH) {
800
4.58M
        if (n < 2500 || (m < 100 && n < 30000) || m < 6) {
801
4.58M
            return STRINGLIB(default_find)(s, n, p, m, maxcount, mode);
802
4.58M
        }
803
0
        else if ((m >> 2) * 3 < (n >> 2)) {
804
            /* 33% threshold, but don't overflow. */
805
            /* For larger problems where the needle isn't a huge
806
               percentage of the size of the haystack, the relatively
807
               expensive O(m) startup cost of the two-way algorithm
808
               will surely pay off. */
809
0
            if (mode == FAST_SEARCH) {
810
0
                return STRINGLIB(_two_way_find)(s, n, p, m);
811
0
            }
812
0
            else {
813
0
                return STRINGLIB(_two_way_count)(s, n, p, m, maxcount);
814
0
            }
815
0
        }
816
0
        else {
817
            /* To ensure that we have good worst-case behavior,
818
               here's an adaptive version of the algorithm, where if
819
               we match O(m) characters without any matches of the
820
               entire needle, then we predict that the startup cost of
821
               the two-way algorithm will probably be worth it. */
822
0
            return STRINGLIB(adaptive_find)(s, n, p, m, maxcount, mode);
823
0
        }
824
4.58M
    }
825
0
    else {
826
        /* FAST_RSEARCH */
827
0
        return STRINGLIB(default_rfind)(s, n, p, m, maxcount, mode);
828
0
    }
829
4.58M
}
bytes_methods.c:fastsearch
Line
Count
Source
776
4.25M
{
777
4.25M
    if (n < m || (mode == FAST_COUNT && maxcount == 0)) {
778
8
        return -1;
779
8
    }
780
781
    /* look for special cases */
782
4.25M
    if (m <= 1) {
783
4.24M
        if (m <= 0) {
784
0
            return -1;
785
0
        }
786
        /* use special case for 1-character strings */
787
4.24M
        if (mode == FAST_SEARCH)
788
0
            return STRINGLIB(find_char)(s, n, p[0]);
789
4.24M
        else if (mode == FAST_RSEARCH)
790
0
            return STRINGLIB(rfind_char)(s, n, p[0]);
791
4.24M
        else {
792
4.24M
            if (maxcount == PY_SSIZE_T_MAX) {
793
4.24M
                return STRINGLIB(count_char_no_maxcount)(s, n, p[0]);
794
4.24M
            }
795
0
            return STRINGLIB(count_char)(s, n, p[0], maxcount);
796
4.24M
        }
797
4.24M
    }
798
799
9.32k
    if (mode != FAST_RSEARCH) {
800
2.76k
        if (n < 2500 || (m < 100 && n < 30000) || m < 6) {
801
2.76k
            return STRINGLIB(default_find)(s, n, p, m, maxcount, mode);
802
2.76k
        }
803
0
        else if ((m >> 2) * 3 < (n >> 2)) {
804
            /* 33% threshold, but don't overflow. */
805
            /* For larger problems where the needle isn't a huge
806
               percentage of the size of the haystack, the relatively
807
               expensive O(m) startup cost of the two-way algorithm
808
               will surely pay off. */
809
0
            if (mode == FAST_SEARCH) {
810
0
                return STRINGLIB(_two_way_find)(s, n, p, m);
811
0
            }
812
0
            else {
813
0
                return STRINGLIB(_two_way_count)(s, n, p, m, maxcount);
814
0
            }
815
0
        }
816
0
        else {
817
            /* To ensure that we have good worst-case behavior,
818
               here's an adaptive version of the algorithm, where if
819
               we match O(m) characters without any matches of the
820
               entire needle, then we predict that the startup cost of
821
               the two-way algorithm will probably be worth it. */
822
0
            return STRINGLIB(adaptive_find)(s, n, p, m, maxcount, mode);
823
0
        }
824
2.76k
    }
825
6.55k
    else {
826
        /* FAST_RSEARCH */
827
6.55k
        return STRINGLIB(default_rfind)(s, n, p, m, maxcount, mode);
828
6.55k
    }
829
9.32k
}
Unexecuted instantiation: bytearrayobject.c:fastsearch
830