/src/cpython/Objects/stringlib/fastsearch.h
Line | Count | Source (jump to first uncovered line) |
1 | | /* stringlib: fastsearch implementation */ |
2 | | |
3 | | #define STRINGLIB_FASTSEARCH_H |
4 | | |
5 | | /* fast search/count implementation, based on a mix between boyer- |
6 | | moore and horspool, with a few more bells and whistles on the top. |
7 | | for some more background, see: |
8 | | https://web.archive.org/web/20201107074620/http://effbot.org/zone/stringlib.htm */ |
9 | | |
10 | | /* note: fastsearch may access s[n], which isn't a problem when using |
11 | | Python's ordinary string types, but may cause problems if you're |
12 | | using this code in other contexts. also, the count mode returns -1 |
13 | | if there cannot possibly be a match in the target string, and 0 if |
14 | | it has actually checked for matches, but didn't find any. callers |
15 | | beware! */ |
16 | | |
17 | | /* If the strings are long enough, use Crochemore and Perrin's Two-Way |
18 | | algorithm, which has worst-case O(n) runtime and best-case O(n/k). |
19 | | Also compute a table of shifts to achieve O(n/k) in more cases, |
20 | | and often (data dependent) deduce larger shifts than pure C&P can |
21 | | deduce. See stringlib_find_two_way_notes.txt in this folder for a |
22 | | detailed explanation. */ |
23 | | |
24 | 513M | #define FAST_COUNT 0 |
25 | 352M | #define FAST_SEARCH 1 |
26 | 70.7M | #define FAST_RSEARCH 2 |
27 | | |
28 | | #if LONG_BIT >= 128 |
29 | | #define STRINGLIB_BLOOM_WIDTH 128 |
30 | | #elif LONG_BIT >= 64 |
31 | 5.38G | #define STRINGLIB_BLOOM_WIDTH 64 |
32 | | #elif LONG_BIT >= 32 |
33 | | #define STRINGLIB_BLOOM_WIDTH 32 |
34 | | #else |
35 | | #error "LONG_BIT is smaller than 32" |
36 | | #endif |
37 | | |
38 | | #define STRINGLIB_BLOOM_ADD(mask, ch) \ |
39 | 29.6M | ((mask |= (1UL << ((ch) & (STRINGLIB_BLOOM_WIDTH -1))))) |
40 | | #define STRINGLIB_BLOOM(mask, ch) \ |
41 | 5.35G | ((mask & (1UL << ((ch) & (STRINGLIB_BLOOM_WIDTH -1))))) |
42 | | |
43 | | #ifdef STRINGLIB_FAST_MEMCHR |
44 | 183M | # define MEMCHR_CUT_OFF 15 |
45 | | #else |
46 | 66.7M | # define MEMCHR_CUT_OFF 40 |
47 | | #endif |
48 | | |
49 | | Py_LOCAL_INLINE(Py_ssize_t) |
50 | | STRINGLIB(find_char)(const STRINGLIB_CHAR* s, Py_ssize_t n, STRINGLIB_CHAR ch) |
51 | 249M | { |
52 | 249M | const STRINGLIB_CHAR *p, *e; |
53 | | |
54 | 249M | p = s; |
55 | 249M | e = s + n; |
56 | 249M | if (n > MEMCHR_CUT_OFF) { |
57 | | #ifdef STRINGLIB_FAST_MEMCHR |
58 | 93.5M | p = STRINGLIB_FAST_MEMCHR(s, ch, n); |
59 | 93.5M | if (p != NULL) |
60 | 91.9M | return (p - s); |
61 | 1.58M | return -1; |
62 | | #else |
63 | | /* use memchr if we can choose a needle without too many likely |
64 | | false positives */ |
65 | | const STRINGLIB_CHAR *s1, *e1; |
66 | | unsigned char needle = ch & 0xff; |
67 | | /* If looking for a multiple of 256, we'd have too |
68 | | many false positives looking for the '\0' byte in UCS2 |
69 | | and UCS4 representations. */ |
70 | 58.2M | if (needle != 0) { |
71 | 57.8M | do { |
72 | 57.8M | void *candidate = memchr(p, needle, |
73 | 57.8M | (e - p) * sizeof(STRINGLIB_CHAR)); |
74 | 57.8M | if (candidate == NULL) |
75 | 505k | return -1; |
76 | 57.3M | s1 = p; |
77 | 57.3M | p = (const STRINGLIB_CHAR *) |
78 | 57.3M | _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR)); |
79 | 57.3M | if (*p == ch) |
80 | 57.2M | return (p - s); |
81 | | /* False positive */ |
82 | 124k | p++; |
83 | 124k | if (p - s1 > MEMCHR_CUT_OFF) |
84 | 58.0k | continue; |
85 | 66.4k | if (e - p <= MEMCHR_CUT_OFF) |
86 | 4.16k | break; |
87 | 62.2k | e1 = p + MEMCHR_CUT_OFF; |
88 | 1.84M | while (p != e1) { |
89 | 1.81M | if (*p == ch) |
90 | 27.6k | return (p - s); |
91 | 1.78M | p++; |
92 | 1.78M | } |
93 | 62.2k | } |
94 | 57.7M | while (e - p > MEMCHR_CUT_OFF); |
95 | 57.7M | } |
96 | | #endif |
97 | 151M | } |
98 | 388M | while (p < e) { |
99 | 305M | if (*p == ch) |
100 | 14.8M | return (p - s); |
101 | 290M | p++; |
102 | 290M | } |
103 | 83.3M | return -1; |
104 | 98.1M | } Unexecuted instantiation: bytesobject.c:stringlib_find_char unicodeobject.c:ucs1lib_find_char Line | Count | Source | 51 | 103M | { | 52 | 103M | const STRINGLIB_CHAR *p, *e; | 53 | | | 54 | 103M | p = s; | 55 | 103M | e = s + n; | 56 | 103M | if (n > MEMCHR_CUT_OFF) { | 57 | 18.4M | #ifdef STRINGLIB_FAST_MEMCHR | 58 | 18.4M | p = STRINGLIB_FAST_MEMCHR(s, ch, n); | 59 | 18.4M | if (p != NULL) | 60 | 17.5M | return (p - s); | 61 | 895k | return -1; | 62 | | #else | 63 | | /* use memchr if we can choose a needle without too many likely | 64 | | false positives */ | 65 | | const STRINGLIB_CHAR *s1, *e1; | 66 | | unsigned char needle = ch & 0xff; | 67 | | /* If looking for a multiple of 256, we'd have too | 68 | | many false positives looking for the '\0' byte in UCS2 | 69 | | and UCS4 representations. */ | 70 | | if (needle != 0) { | 71 | | do { | 72 | | void *candidate = memchr(p, needle, | 73 | | (e - p) * sizeof(STRINGLIB_CHAR)); | 74 | | if (candidate == NULL) | 75 | | return -1; | 76 | | s1 = p; | 77 | | p = (const STRINGLIB_CHAR *) | 78 | | _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR)); | 79 | | if (*p == ch) | 80 | | return (p - s); | 81 | | /* False positive */ | 82 | | p++; | 83 | | if (p - s1 > MEMCHR_CUT_OFF) | 84 | | continue; | 85 | | if (e - p <= MEMCHR_CUT_OFF) | 86 | | break; | 87 | | e1 = p + MEMCHR_CUT_OFF; | 88 | | while (p != e1) { | 89 | | if (*p == ch) | 90 | | return (p - s); | 91 | | p++; | 92 | | } | 93 | | } | 94 | | while (e - p > MEMCHR_CUT_OFF); | 95 | | } | 96 | | #endif | 97 | 18.4M | } | 98 | 240M | while (p < e) { | 99 | 159M | if (*p == ch) | 100 | 4.52M | return (p - s); | 101 | 155M | p++; | 102 | 155M | } | 103 | 80.5M | return -1; | 104 | 85.0M | } |
unicodeobject.c:ucs2lib_find_char Line | Count | Source | 51 | 66.4M | { | 52 | 66.4M | const STRINGLIB_CHAR *p, *e; | 53 | | | 54 | 66.4M | p = s; | 55 | 66.4M | e = s + n; | 56 | 66.4M | if (n > MEMCHR_CUT_OFF) { | 57 | | #ifdef STRINGLIB_FAST_MEMCHR | 58 | | p = STRINGLIB_FAST_MEMCHR(s, ch, n); | 59 | | if (p != NULL) | 60 | | return (p - s); | 61 | | return -1; | 62 | | #else | 63 | | /* use memchr if we can choose a needle without too many likely | 64 | | false positives */ | 65 | 58.2M | const STRINGLIB_CHAR *s1, *e1; | 66 | 58.2M | unsigned char needle = ch & 0xff; | 67 | | /* If looking for a multiple of 256, we'd have too | 68 | | many false positives looking for the '\0' byte in UCS2 | 69 | | and UCS4 representations. */ | 70 | 58.2M | if (needle != 0) { | 71 | 57.8M | do { | 72 | 57.8M | void *candidate = memchr(p, needle, | 73 | 57.8M | (e - p) * sizeof(STRINGLIB_CHAR)); | 74 | 57.8M | if (candidate == NULL) | 75 | 505k | return -1; | 76 | 57.3M | s1 = p; | 77 | 57.3M | p = (const STRINGLIB_CHAR *) | 78 | 57.3M | _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR)); | 79 | 57.3M | if (*p == ch) | 80 | 57.2M | return (p - s); | 81 | | /* False positive */ | 82 | 124k | p++; | 83 | 124k | if (p - s1 > MEMCHR_CUT_OFF) | 84 | 58.0k | continue; | 85 | 66.4k | if (e - p <= MEMCHR_CUT_OFF) | 86 | 4.16k | break; | 87 | 62.2k | e1 = p + MEMCHR_CUT_OFF; | 88 | 1.84M | while (p != e1) { | 89 | 1.81M | if (*p == ch) | 90 | 27.6k | return (p - s); | 91 | 1.78M | p++; | 92 | 1.78M | } | 93 | 62.2k | } | 94 | 57.7M | while (e - p > MEMCHR_CUT_OFF); | 95 | 57.7M | } | 96 | 58.2M | #endif | 97 | 58.2M | } | 98 | 135M | while (p < e) { | 99 | 133M | if (*p == ch) | 100 | 5.95M | return (p - s); | 101 | 127M | p++; | 102 | 127M | } | 103 | 2.70M | return -1; | 104 | 8.66M | } |
unicodeobject.c:ucs4lib_find_char Line | Count | Source | 51 | 60.9M | { | 52 | 60.9M | const STRINGLIB_CHAR *p, *e; | 53 | | | 54 | 60.9M | p = s; | 55 | 60.9M | e = s + n; | 56 | 60.9M | if (n > MEMCHR_CUT_OFF) { | 57 | 60.8M | #ifdef STRINGLIB_FAST_MEMCHR | 58 | 60.8M | p = STRINGLIB_FAST_MEMCHR(s, ch, n); | 59 | 60.8M | if (p != NULL) | 60 | 60.8M | return (p - s); | 61 | 28.4k | return -1; | 62 | | #else | 63 | | /* use memchr if we can choose a needle without too many likely | 64 | | false positives */ | 65 | | const STRINGLIB_CHAR *s1, *e1; | 66 | | unsigned char needle = ch & 0xff; | 67 | | /* If looking for a multiple of 256, we'd have too | 68 | | many false positives looking for the '\0' byte in UCS2 | 69 | | and UCS4 representations. */ | 70 | | if (needle != 0) { | 71 | | do { | 72 | | void *candidate = memchr(p, needle, | 73 | | (e - p) * sizeof(STRINGLIB_CHAR)); | 74 | | if (candidate == NULL) | 75 | | return -1; | 76 | | s1 = p; | 77 | | p = (const STRINGLIB_CHAR *) | 78 | | _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR)); | 79 | | if (*p == ch) | 80 | | return (p - s); | 81 | | /* False positive */ | 82 | | p++; | 83 | | if (p - s1 > MEMCHR_CUT_OFF) | 84 | | continue; | 85 | | if (e - p <= MEMCHR_CUT_OFF) | 86 | | break; | 87 | | e1 = p + MEMCHR_CUT_OFF; | 88 | | while (p != e1) { | 89 | | if (*p == ch) | 90 | | return (p - s); | 91 | | p++; | 92 | | } | 93 | | } | 94 | | while (e - p > MEMCHR_CUT_OFF); | 95 | | } | 96 | | #endif | 97 | 60.8M | } | 98 | 292k | while (p < e) { | 99 | 246k | if (*p == ch) | 100 | 31.6k | return (p - s); | 101 | 214k | p++; | 102 | 214k | } | 103 | 46.3k | return -1; | 104 | 77.9k | } |
unicodeobject.c:asciilib_find_char Line | Count | Source | 51 | 18.6M | { | 52 | 18.6M | const STRINGLIB_CHAR *p, *e; | 53 | | | 54 | 18.6M | p = s; | 55 | 18.6M | e = s + n; | 56 | 18.6M | if (n > MEMCHR_CUT_OFF) { | 57 | 14.2M | #ifdef STRINGLIB_FAST_MEMCHR | 58 | 14.2M | p = STRINGLIB_FAST_MEMCHR(s, ch, n); | 59 | 14.2M | if (p != NULL) | 60 | 13.5M | return (p - s); | 61 | 656k | return -1; | 62 | | #else | 63 | | /* use memchr if we can choose a needle without too many likely | 64 | | false positives */ | 65 | | const STRINGLIB_CHAR *s1, *e1; | 66 | | unsigned char needle = ch & 0xff; | 67 | | /* If looking for a multiple of 256, we'd have too | 68 | | many false positives looking for the '\0' byte in UCS2 | 69 | | and UCS4 representations. */ | 70 | | if (needle != 0) { | 71 | | do { | 72 | | void *candidate = memchr(p, needle, | 73 | | (e - p) * sizeof(STRINGLIB_CHAR)); | 74 | | if (candidate == NULL) | 75 | | return -1; | 76 | | s1 = p; | 77 | | p = (const STRINGLIB_CHAR *) | 78 | | _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR)); | 79 | | if (*p == ch) | 80 | | return (p - s); | 81 | | /* False positive */ | 82 | | p++; | 83 | | if (p - s1 > MEMCHR_CUT_OFF) | 84 | | continue; | 85 | | if (e - p <= MEMCHR_CUT_OFF) | 86 | | break; | 87 | | e1 = p + MEMCHR_CUT_OFF; | 88 | | while (p != e1) { | 89 | | if (*p == ch) | 90 | | return (p - s); | 91 | | p++; | 92 | | } | 93 | | } | 94 | | while (e - p > MEMCHR_CUT_OFF); | 95 | | } | 96 | | #endif | 97 | 14.2M | } | 98 | 12.0M | while (p < e) { | 99 | 12.0M | if (*p == ch) | 100 | 4.32M | return (p - s); | 101 | 7.71M | p++; | 102 | 7.71M | } | 103 | 45.8k | return -1; | 104 | 4.37M | } |
bytes_methods.c:stringlib_find_char Line | Count | Source | 51 | 1.71k | { | 52 | 1.71k | const STRINGLIB_CHAR *p, *e; | 53 | | | 54 | 1.71k | p = s; | 55 | 1.71k | e = s + n; | 56 | 1.71k | if (n > MEMCHR_CUT_OFF) { | 57 | 1.71k | #ifdef STRINGLIB_FAST_MEMCHR | 58 | 1.71k | p = STRINGLIB_FAST_MEMCHR(s, ch, n); | 59 | 1.71k | if (p != NULL) | 60 | 1.45k | return (p - s); | 61 | 258 | return -1; | 62 | | #else | 63 | | /* use memchr if we can choose a needle without too many likely | 64 | | false positives */ | 65 | | const STRINGLIB_CHAR *s1, *e1; | 66 | | unsigned char needle = ch & 0xff; | 67 | | /* If looking for a multiple of 256, we'd have too | 68 | | many false positives looking for the '\0' byte in UCS2 | 69 | | and UCS4 representations. */ | 70 | | if (needle != 0) { | 71 | | do { | 72 | | void *candidate = memchr(p, needle, | 73 | | (e - p) * sizeof(STRINGLIB_CHAR)); | 74 | | if (candidate == NULL) | 75 | | return -1; | 76 | | s1 = p; | 77 | | p = (const STRINGLIB_CHAR *) | 78 | | _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR)); | 79 | | if (*p == ch) | 80 | | return (p - s); | 81 | | /* False positive */ | 82 | | p++; | 83 | | if (p - s1 > MEMCHR_CUT_OFF) | 84 | | continue; | 85 | | if (e - p <= MEMCHR_CUT_OFF) | 86 | | break; | 87 | | e1 = p + MEMCHR_CUT_OFF; | 88 | | while (p != e1) { | 89 | | if (*p == ch) | 90 | | return (p - s); | 91 | | p++; | 92 | | } | 93 | | } | 94 | | while (e - p > MEMCHR_CUT_OFF); | 95 | | } | 96 | | #endif | 97 | 1.71k | } | 98 | 0 | while (p < e) { | 99 | 0 | if (*p == ch) | 100 | 0 | return (p - s); | 101 | 0 | p++; | 102 | 0 | } | 103 | 0 | return -1; | 104 | 0 | } |
Unexecuted instantiation: bytearrayobject.c:stringlib_find_char |
105 | | |
106 | | #undef MEMCHR_CUT_OFF |
107 | | |
108 | | #if STRINGLIB_SIZEOF_CHAR == 1 |
109 | 37.1k | # define MEMRCHR_CUT_OFF 15 |
110 | | #else |
111 | 163k | # define MEMRCHR_CUT_OFF 40 |
112 | | #endif |
113 | | |
114 | | |
115 | | Py_LOCAL_INLINE(Py_ssize_t) |
116 | | STRINGLIB(rfind_char)(const STRINGLIB_CHAR* s, Py_ssize_t n, STRINGLIB_CHAR ch) |
117 | 181k | { |
118 | 181k | const STRINGLIB_CHAR *p; |
119 | 181k | #ifdef HAVE_MEMRCHR |
120 | | /* memrchr() is a GNU extension, available since glibc 2.1.91. it |
121 | | doesn't seem as optimized as memchr(), but is still quite |
122 | | faster than our hand-written loop below. There is no wmemrchr |
123 | | for 4-byte chars. */ |
124 | | |
125 | 181k | if (n > MEMRCHR_CUT_OFF) { |
126 | | #if STRINGLIB_SIZEOF_CHAR == 1 |
127 | | p = memrchr(s, ch, n); |
128 | 10.7k | if (p != NULL) |
129 | 7.23k | return (p - s); |
130 | 3.53k | return -1; |
131 | | #else |
132 | | /* use memrchr if we can choose a needle without too many likely |
133 | | false positives */ |
134 | | const STRINGLIB_CHAR *s1; |
135 | | Py_ssize_t n1; |
136 | | unsigned char needle = ch & 0xff; |
137 | | /* If looking for a multiple of 256, we'd have too |
138 | | many false positives looking for the '\0' byte in UCS2 |
139 | | and UCS4 representations. */ |
140 | 83.2k | if (needle != 0) { |
141 | 87.8k | do { |
142 | 87.8k | void *candidate = memrchr(s, needle, |
143 | 87.8k | n * sizeof(STRINGLIB_CHAR)); |
144 | 87.8k | if (candidate == NULL) |
145 | 644 | return -1; |
146 | 87.1k | n1 = n; |
147 | 87.1k | p = (const STRINGLIB_CHAR *) |
148 | 87.1k | _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR)); |
149 | 87.1k | n = p - s; |
150 | 87.1k | if (*p == ch) |
151 | 79.9k | return n; |
152 | | /* False positive */ |
153 | 7.22k | if (n1 - n > MEMRCHR_CUT_OFF) |
154 | 3.53k | continue; |
155 | 3.68k | if (n <= MEMRCHR_CUT_OFF) |
156 | 833 | break; |
157 | 2.85k | s1 = p - MEMRCHR_CUT_OFF; |
158 | 104k | while (p > s1) { |
159 | 102k | p--; |
160 | 102k | if (*p == ch) |
161 | 518 | return (p - s); |
162 | 102k | } |
163 | 2.33k | n = p - s; |
164 | 2.33k | } |
165 | 83.2k | while (n > MEMRCHR_CUT_OFF); |
166 | 83.2k | } |
167 | | #endif |
168 | 93.9k | } |
169 | 89.5k | #endif /* HAVE_MEMRCHR */ |
170 | 89.5k | p = s + n; |
171 | 697k | while (p > s) { |
172 | 679k | p--; |
173 | 679k | if (*p == ch) |
174 | 72.0k | return (p - s); |
175 | 679k | } |
176 | 17.4k | return -1; |
177 | 89.5k | } Unexecuted instantiation: bytesobject.c:stringlib_rfind_char unicodeobject.c:ucs1lib_rfind_char Line | Count | Source | 117 | 7.95k | { | 118 | 7.95k | const STRINGLIB_CHAR *p; | 119 | 7.95k | #ifdef HAVE_MEMRCHR | 120 | | /* memrchr() is a GNU extension, available since glibc 2.1.91. it | 121 | | doesn't seem as optimized as memchr(), but is still quite | 122 | | faster than our hand-written loop below. There is no wmemrchr | 123 | | for 4-byte chars. */ | 124 | | | 125 | 7.95k | if (n > MEMRCHR_CUT_OFF) { | 126 | 4.39k | #if STRINGLIB_SIZEOF_CHAR == 1 | 127 | 4.39k | p = memrchr(s, ch, n); | 128 | 4.39k | if (p != NULL) | 129 | 3.39k | return (p - s); | 130 | 1.00k | return -1; | 131 | | #else | 132 | | /* use memrchr if we can choose a needle without too many likely | 133 | | false positives */ | 134 | | const STRINGLIB_CHAR *s1; | 135 | | Py_ssize_t n1; | 136 | | unsigned char needle = ch & 0xff; | 137 | | /* If looking for a multiple of 256, we'd have too | 138 | | many false positives looking for the '\0' byte in UCS2 | 139 | | and UCS4 representations. */ | 140 | | if (needle != 0) { | 141 | | do { | 142 | | void *candidate = memrchr(s, needle, | 143 | | n * sizeof(STRINGLIB_CHAR)); | 144 | | if (candidate == NULL) | 145 | | return -1; | 146 | | n1 = n; | 147 | | p = (const STRINGLIB_CHAR *) | 148 | | _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR)); | 149 | | n = p - s; | 150 | | if (*p == ch) | 151 | | return n; | 152 | | /* False positive */ | 153 | | if (n1 - n > MEMRCHR_CUT_OFF) | 154 | | continue; | 155 | | if (n <= MEMRCHR_CUT_OFF) | 156 | | break; | 157 | | s1 = p - MEMRCHR_CUT_OFF; | 158 | | while (p > s1) { | 159 | | p--; | 160 | | if (*p == ch) | 161 | | return (p - s); | 162 | | } | 163 | | n = p - s; | 164 | | } | 165 | | while (n > MEMRCHR_CUT_OFF); | 166 | | } | 167 | | #endif | 168 | 4.39k | } | 169 | 3.55k | #endif /* HAVE_MEMRCHR */ | 170 | 3.55k | p = s + n; | 171 | 10.3k | while (p > s) { | 172 | 9.55k | p--; | 173 | 9.55k | if (*p == ch) | 174 | 2.80k | return (p - s); | 175 | 9.55k | } | 176 | 755 | return -1; | 177 | 3.55k | } |
unicodeobject.c:ucs2lib_rfind_char Line | Count | Source | 117 | 33.6k | { | 118 | 33.6k | const STRINGLIB_CHAR *p; | 119 | 33.6k | #ifdef HAVE_MEMRCHR | 120 | | /* memrchr() is a GNU extension, available since glibc 2.1.91. it | 121 | | doesn't seem as optimized as memchr(), but is still quite | 122 | | faster than our hand-written loop below. There is no wmemrchr | 123 | | for 4-byte chars. */ | 124 | | | 125 | 33.6k | if (n > MEMRCHR_CUT_OFF) { | 126 | | #if STRINGLIB_SIZEOF_CHAR == 1 | 127 | | p = memrchr(s, ch, n); | 128 | | if (p != NULL) | 129 | | return (p - s); | 130 | | return -1; | 131 | | #else | 132 | | /* use memrchr if we can choose a needle without too many likely | 133 | | false positives */ | 134 | 15.0k | const STRINGLIB_CHAR *s1; | 135 | 15.0k | Py_ssize_t n1; | 136 | 15.0k | unsigned char needle = ch & 0xff; | 137 | | /* If looking for a multiple of 256, we'd have too | 138 | | many false positives looking for the '\0' byte in UCS2 | 139 | | and UCS4 representations. */ | 140 | 15.0k | if (needle != 0) { | 141 | 17.7k | do { | 142 | 17.7k | void *candidate = memrchr(s, needle, | 143 | 17.7k | n * sizeof(STRINGLIB_CHAR)); | 144 | 17.7k | if (candidate == NULL) | 145 | 390 | return -1; | 146 | 17.3k | n1 = n; | 147 | 17.3k | p = (const STRINGLIB_CHAR *) | 148 | 17.3k | _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR)); | 149 | 17.3k | n = p - s; | 150 | 17.3k | if (*p == ch) | 151 | 13.7k | return n; | 152 | | /* False positive */ | 153 | 3.64k | if (n1 - n > MEMRCHR_CUT_OFF) | 154 | 1.41k | continue; | 155 | 2.22k | if (n <= MEMRCHR_CUT_OFF) | 156 | 418 | break; | 157 | 1.80k | s1 = p - MEMRCHR_CUT_OFF; | 158 | 68.7k | while (p > s1) { | 159 | 67.1k | p--; | 160 | 67.1k | if (*p == ch) | 161 | 217 | return (p - s); | 162 | 67.1k | } | 163 | 1.58k | n = p - s; | 164 | 1.58k | } | 165 | 15.0k | while (n > MEMRCHR_CUT_OFF); | 166 | 15.0k | } | 167 | 15.0k | #endif | 168 | 15.0k | } | 169 | 19.3k | #endif /* HAVE_MEMRCHR */ | 170 | 19.3k | p = s + n; | 171 | 173k | while (p > s) { | 172 | 171k | p--; | 173 | 171k | if (*p == ch) | 174 | 17.5k | return (p - s); | 175 | 171k | } | 176 | 1.73k | return -1; | 177 | 19.3k | } |
unicodeobject.c:ucs4lib_rfind_char Line | Count | Source | 117 | 110k | { | 118 | 110k | const STRINGLIB_CHAR *p; | 119 | 110k | #ifdef HAVE_MEMRCHR | 120 | | /* memrchr() is a GNU extension, available since glibc 2.1.91. it | 121 | | doesn't seem as optimized as memchr(), but is still quite | 122 | | faster than our hand-written loop below. There is no wmemrchr | 123 | | for 4-byte chars. */ | 124 | | | 125 | 110k | if (n > MEMRCHR_CUT_OFF) { | 126 | | #if STRINGLIB_SIZEOF_CHAR == 1 | 127 | | p = memrchr(s, ch, n); | 128 | | if (p != NULL) | 129 | | return (p - s); | 130 | | return -1; | 131 | | #else | 132 | | /* use memrchr if we can choose a needle without too many likely | 133 | | false positives */ | 134 | 68.1k | const STRINGLIB_CHAR *s1; | 135 | 68.1k | Py_ssize_t n1; | 136 | 68.1k | unsigned char needle = ch & 0xff; | 137 | | /* If looking for a multiple of 256, we'd have too | 138 | | many false positives looking for the '\0' byte in UCS2 | 139 | | and UCS4 representations. */ | 140 | 68.1k | if (needle != 0) { | 141 | 70.0k | do { | 142 | 70.0k | void *candidate = memrchr(s, needle, | 143 | 70.0k | n * sizeof(STRINGLIB_CHAR)); | 144 | 70.0k | if (candidate == NULL) | 145 | 254 | return -1; | 146 | 69.8k | n1 = n; | 147 | 69.8k | p = (const STRINGLIB_CHAR *) | 148 | 69.8k | _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR)); | 149 | 69.8k | n = p - s; | 150 | 69.8k | if (*p == ch) | 151 | 66.2k | return n; | 152 | | /* False positive */ | 153 | 3.58k | if (n1 - n > MEMRCHR_CUT_OFF) | 154 | 2.11k | continue; | 155 | 1.46k | if (n <= MEMRCHR_CUT_OFF) | 156 | 415 | break; | 157 | 1.04k | s1 = p - MEMRCHR_CUT_OFF; | 158 | 35.9k | while (p > s1) { | 159 | 35.1k | p--; | 160 | 35.1k | if (*p == ch) | 161 | 301 | return (p - s); | 162 | 35.1k | } | 163 | 746 | n = p - s; | 164 | 746 | } | 165 | 68.1k | while (n > MEMRCHR_CUT_OFF); | 166 | 68.1k | } | 167 | 68.1k | #endif | 168 | 68.1k | } | 169 | 43.9k | #endif /* HAVE_MEMRCHR */ | 170 | 43.9k | p = s + n; | 171 | 392k | while (p > s) { | 172 | 390k | p--; | 173 | 390k | if (*p == ch) | 174 | 42.5k | return (p - s); | 175 | 390k | } | 176 | 1.35k | return -1; | 177 | 43.9k | } |
unicodeobject.c:asciilib_rfind_char Line | Count | Source | 117 | 9.41k | { | 118 | 9.41k | const STRINGLIB_CHAR *p; | 119 | 9.41k | #ifdef HAVE_MEMRCHR | 120 | | /* memrchr() is a GNU extension, available since glibc 2.1.91. it | 121 | | doesn't seem as optimized as memchr(), but is still quite | 122 | | faster than our hand-written loop below. There is no wmemrchr | 123 | | for 4-byte chars. */ | 124 | | | 125 | 9.41k | if (n > MEMRCHR_CUT_OFF) { | 126 | 3.11k | #if STRINGLIB_SIZEOF_CHAR == 1 | 127 | 3.11k | p = memrchr(s, ch, n); | 128 | 3.11k | if (p != NULL) | 129 | 3.02k | return (p - s); | 130 | 86 | return -1; | 131 | | #else | 132 | | /* use memrchr if we can choose a needle without too many likely | 133 | | false positives */ | 134 | | const STRINGLIB_CHAR *s1; | 135 | | Py_ssize_t n1; | 136 | | unsigned char needle = ch & 0xff; | 137 | | /* If looking for a multiple of 256, we'd have too | 138 | | many false positives looking for the '\0' byte in UCS2 | 139 | | and UCS4 representations. */ | 140 | | if (needle != 0) { | 141 | | do { | 142 | | void *candidate = memrchr(s, needle, | 143 | | n * sizeof(STRINGLIB_CHAR)); | 144 | | if (candidate == NULL) | 145 | | return -1; | 146 | | n1 = n; | 147 | | p = (const STRINGLIB_CHAR *) | 148 | | _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR)); | 149 | | n = p - s; | 150 | | if (*p == ch) | 151 | | return n; | 152 | | /* False positive */ | 153 | | if (n1 - n > MEMRCHR_CUT_OFF) | 154 | | continue; | 155 | | if (n <= MEMRCHR_CUT_OFF) | 156 | | break; | 157 | | s1 = p - MEMRCHR_CUT_OFF; | 158 | | while (p > s1) { | 159 | | p--; | 160 | | if (*p == ch) | 161 | | return (p - s); | 162 | | } | 163 | | n = p - s; | 164 | | } | 165 | | while (n > MEMRCHR_CUT_OFF); | 166 | | } | 167 | | #endif | 168 | 3.11k | } | 169 | 6.30k | #endif /* HAVE_MEMRCHR */ | 170 | 6.30k | p = s + n; | 171 | 35.6k | while (p > s) { | 172 | 33.5k | p--; | 173 | 33.5k | if (*p == ch) | 174 | 4.20k | return (p - s); | 175 | 33.5k | } | 176 | 2.10k | return -1; | 177 | 6.30k | } |
bytes_methods.c:stringlib_rfind_char Line | Count | Source | 117 | 19.7k | { | 118 | 19.7k | const STRINGLIB_CHAR *p; | 119 | 19.7k | #ifdef HAVE_MEMRCHR | 120 | | /* memrchr() is a GNU extension, available since glibc 2.1.91. it | 121 | | doesn't seem as optimized as memchr(), but is still quite | 122 | | faster than our hand-written loop below. There is no wmemrchr | 123 | | for 4-byte chars. */ | 124 | | | 125 | 19.7k | if (n > MEMRCHR_CUT_OFF) { | 126 | 3.26k | #if STRINGLIB_SIZEOF_CHAR == 1 | 127 | 3.26k | p = memrchr(s, ch, n); | 128 | 3.26k | if (p != NULL) | 129 | 817 | return (p - s); | 130 | 2.44k | return -1; | 131 | | #else | 132 | | /* use memrchr if we can choose a needle without too many likely | 133 | | false positives */ | 134 | | const STRINGLIB_CHAR *s1; | 135 | | Py_ssize_t n1; | 136 | | unsigned char needle = ch & 0xff; | 137 | | /* If looking for a multiple of 256, we'd have too | 138 | | many false positives looking for the '\0' byte in UCS2 | 139 | | and UCS4 representations. */ | 140 | | if (needle != 0) { | 141 | | do { | 142 | | void *candidate = memrchr(s, needle, | 143 | | n * sizeof(STRINGLIB_CHAR)); | 144 | | if (candidate == NULL) | 145 | | return -1; | 146 | | n1 = n; | 147 | | p = (const STRINGLIB_CHAR *) | 148 | | _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR)); | 149 | | n = p - s; | 150 | | if (*p == ch) | 151 | | return n; | 152 | | /* False positive */ | 153 | | if (n1 - n > MEMRCHR_CUT_OFF) | 154 | | continue; | 155 | | if (n <= MEMRCHR_CUT_OFF) | 156 | | break; | 157 | | s1 = p - MEMRCHR_CUT_OFF; | 158 | | while (p > s1) { | 159 | | p--; | 160 | | if (*p == ch) | 161 | | return (p - s); | 162 | | } | 163 | | n = p - s; | 164 | | } | 165 | | while (n > MEMRCHR_CUT_OFF); | 166 | | } | 167 | | #endif | 168 | 3.26k | } | 169 | 16.4k | #endif /* HAVE_MEMRCHR */ | 170 | 16.4k | p = s + n; | 171 | 85.7k | while (p > s) { | 172 | 74.2k | p--; | 173 | 74.2k | if (*p == ch) | 174 | 4.94k | return (p - s); | 175 | 74.2k | } | 176 | 11.5k | return -1; | 177 | 16.4k | } |
Unexecuted instantiation: bytearrayobject.c:stringlib_rfind_char |
178 | | |
179 | | #undef MEMRCHR_CUT_OFF |
180 | | |
181 | | /* Change to a 1 to see logging comments walk through the algorithm. */ |
182 | | #if 0 && STRINGLIB_SIZEOF_CHAR == 1 |
183 | | # define LOG(...) printf(__VA_ARGS__) |
184 | | # define LOG_STRING(s, n) printf("\"%.*s\"", (int)(n), s) |
185 | | # define LOG_LINEUP() do { \ |
186 | | LOG("> "); LOG_STRING(haystack, len_haystack); LOG("\n> "); \ |
187 | | LOG("%*s",(int)(window_last - haystack + 1 - len_needle), ""); \ |
188 | | LOG_STRING(needle, len_needle); LOG("\n"); \ |
189 | | } while(0) |
190 | | #else |
191 | | # define LOG(...) |
192 | | # define LOG_STRING(s, n) |
193 | | # define LOG_LINEUP() |
194 | | #endif |
195 | | |
196 | | Py_LOCAL_INLINE(Py_ssize_t) |
197 | | STRINGLIB(_lex_search)(const STRINGLIB_CHAR *needle, Py_ssize_t len_needle, |
198 | | Py_ssize_t *return_period, int invert_alphabet) |
199 | 42 | { |
200 | | /* Do a lexicographic search. Essentially this: |
201 | | >>> max(needle[i:] for i in range(len(needle)+1)) |
202 | | Also find the period of the right half. */ |
203 | 42 | Py_ssize_t max_suffix = 0; |
204 | 42 | Py_ssize_t candidate = 1; |
205 | 42 | Py_ssize_t k = 0; |
206 | | // The period of the right half. |
207 | 42 | Py_ssize_t period = 1; |
208 | | |
209 | 420 | while (candidate + k < len_needle) { |
210 | | // each loop increases candidate + k + max_suffix |
211 | 378 | STRINGLIB_CHAR a = needle[candidate + k]; |
212 | 378 | STRINGLIB_CHAR b = needle[max_suffix + k]; |
213 | | // check if the suffix at candidate is better than max_suffix |
214 | 378 | if (invert_alphabet ? (b < a) : (a < b)) { |
215 | | // Fell short of max_suffix. |
216 | | // The next k + 1 characters are non-increasing |
217 | | // from candidate, so they won't start a maximal suffix. |
218 | 273 | candidate += k + 1; |
219 | 273 | k = 0; |
220 | | // We've ruled out any period smaller than what's |
221 | | // been scanned since max_suffix. |
222 | 273 | period = candidate - max_suffix; |
223 | 273 | } |
224 | 105 | else if (a == b) { |
225 | 21 | if (k + 1 != period) { |
226 | | // Keep scanning the equal strings |
227 | 0 | k++; |
228 | 0 | } |
229 | 21 | else { |
230 | | // Matched a whole period. |
231 | | // Start matching the next period. |
232 | 21 | candidate += period; |
233 | 21 | k = 0; |
234 | 21 | } |
235 | 21 | } |
236 | 84 | else { |
237 | | // Did better than max_suffix, so replace it. |
238 | 84 | max_suffix = candidate; |
239 | 84 | candidate++; |
240 | 84 | k = 0; |
241 | 84 | period = 1; |
242 | 84 | } |
243 | 378 | } |
244 | 42 | *return_period = period; |
245 | 42 | return max_suffix; |
246 | 42 | } Unexecuted instantiation: bytesobject.c:stringlib__lex_search Unexecuted instantiation: unicodeobject.c:asciilib__lex_search unicodeobject.c:ucs1lib__lex_search Line | Count | Source | 199 | 42 | { | 200 | | /* Do a lexicographic search. Essentially this: | 201 | | >>> max(needle[i:] for i in range(len(needle)+1)) | 202 | | Also find the period of the right half. */ | 203 | 42 | Py_ssize_t max_suffix = 0; | 204 | 42 | Py_ssize_t candidate = 1; | 205 | 42 | Py_ssize_t k = 0; | 206 | | // The period of the right half. | 207 | 42 | Py_ssize_t period = 1; | 208 | | | 209 | 420 | while (candidate + k < len_needle) { | 210 | | // each loop increases candidate + k + max_suffix | 211 | 378 | STRINGLIB_CHAR a = needle[candidate + k]; | 212 | 378 | STRINGLIB_CHAR b = needle[max_suffix + k]; | 213 | | // check if the suffix at candidate is better than max_suffix | 214 | 378 | if (invert_alphabet ? (b < a) : (a < b)) { | 215 | | // Fell short of max_suffix. | 216 | | // The next k + 1 characters are non-increasing | 217 | | // from candidate, so they won't start a maximal suffix. | 218 | 273 | candidate += k + 1; | 219 | 273 | k = 0; | 220 | | // We've ruled out any period smaller than what's | 221 | | // been scanned since max_suffix. | 222 | 273 | period = candidate - max_suffix; | 223 | 273 | } | 224 | 105 | else if (a == b) { | 225 | 21 | if (k + 1 != period) { | 226 | | // Keep scanning the equal strings | 227 | 0 | k++; | 228 | 0 | } | 229 | 21 | else { | 230 | | // Matched a whole period. | 231 | | // Start matching the next period. | 232 | 21 | candidate += period; | 233 | 21 | k = 0; | 234 | 21 | } | 235 | 21 | } | 236 | 84 | else { | 237 | | // Did better than max_suffix, so replace it. | 238 | 84 | max_suffix = candidate; | 239 | 84 | candidate++; | 240 | 84 | k = 0; | 241 | 84 | period = 1; | 242 | 84 | } | 243 | 378 | } | 244 | 42 | *return_period = period; | 245 | 42 | return max_suffix; | 246 | 42 | } |
Unexecuted instantiation: unicodeobject.c:ucs2lib__lex_search Unexecuted instantiation: unicodeobject.c:ucs4lib__lex_search Unexecuted instantiation: bytes_methods.c:stringlib__lex_search Unexecuted instantiation: bytearrayobject.c:stringlib__lex_search |
247 | | |
248 | | Py_LOCAL_INLINE(Py_ssize_t) |
249 | | STRINGLIB(_factorize)(const STRINGLIB_CHAR *needle, |
250 | | Py_ssize_t len_needle, |
251 | | Py_ssize_t *return_period) |
252 | 21 | { |
253 | | /* Do a "critical factorization", making it so that: |
254 | | >>> needle = (left := needle[:cut]) + (right := needle[cut:]) |
255 | | where the "local period" of the cut is maximal. |
256 | | |
257 | | The local period of the cut is the minimal length of a string w |
258 | | such that (left endswith w or w endswith left) |
259 | | and (right startswith w or w startswith right). |
260 | | |
261 | | The Critical Factorization Theorem says that this maximal local |
262 | | period is the global period of the string. |
263 | | |
264 | | Crochemore and Perrin (1991) show that this cut can be computed |
265 | | as the later of two cuts: one that gives a lexicographically |
266 | | maximal right half, and one that gives the same with the |
267 | | with respect to a reversed alphabet-ordering. |
268 | | |
269 | | This is what we want to happen: |
270 | | >>> x = "GCAGAGAG" |
271 | | >>> cut, period = factorize(x) |
272 | | >>> x[:cut], (right := x[cut:]) |
273 | | ('GC', 'AGAGAG') |
274 | | >>> period # right half period |
275 | | 2 |
276 | | >>> right[period:] == right[:-period] |
277 | | True |
278 | | |
279 | | This is how the local period lines up in the above example: |
280 | | GC | AGAGAG |
281 | | AGAGAGC = AGAGAGC |
282 | | The length of this minimal repetition is 7, which is indeed the |
283 | | period of the original string. */ |
284 | | |
285 | 21 | Py_ssize_t cut1, period1, cut2, period2, cut, period; |
286 | 21 | cut1 = STRINGLIB(_lex_search)(needle, len_needle, &period1, 0); |
287 | 21 | cut2 = STRINGLIB(_lex_search)(needle, len_needle, &period2, 1); |
288 | | |
289 | | // Take the later cut. |
290 | 21 | if (cut1 > cut2) { |
291 | 21 | period = period1; |
292 | 21 | cut = cut1; |
293 | 21 | } |
294 | 0 | else { |
295 | 0 | period = period2; |
296 | 0 | cut = cut2; |
297 | 0 | } |
298 | | |
299 | 21 | LOG("split: "); LOG_STRING(needle, cut); |
300 | 21 | LOG(" + "); LOG_STRING(needle + cut, len_needle - cut); |
301 | 21 | LOG("\n"); |
302 | | |
303 | 21 | *return_period = period; |
304 | 21 | return cut; |
305 | 21 | } Unexecuted instantiation: bytesobject.c:stringlib__factorize Unexecuted instantiation: unicodeobject.c:asciilib__factorize unicodeobject.c:ucs1lib__factorize Line | Count | Source | 252 | 21 | { | 253 | | /* Do a "critical factorization", making it so that: | 254 | | >>> needle = (left := needle[:cut]) + (right := needle[cut:]) | 255 | | where the "local period" of the cut is maximal. | 256 | | | 257 | | The local period of the cut is the minimal length of a string w | 258 | | such that (left endswith w or w endswith left) | 259 | | and (right startswith w or w startswith right). | 260 | | | 261 | | The Critical Factorization Theorem says that this maximal local | 262 | | period is the global period of the string. | 263 | | | 264 | | Crochemore and Perrin (1991) show that this cut can be computed | 265 | | as the later of two cuts: one that gives a lexicographically | 266 | | maximal right half, and one that gives the same with the | 267 | | with respect to a reversed alphabet-ordering. | 268 | | | 269 | | This is what we want to happen: | 270 | | >>> x = "GCAGAGAG" | 271 | | >>> cut, period = factorize(x) | 272 | | >>> x[:cut], (right := x[cut:]) | 273 | | ('GC', 'AGAGAG') | 274 | | >>> period # right half period | 275 | | 2 | 276 | | >>> right[period:] == right[:-period] | 277 | | True | 278 | | | 279 | | This is how the local period lines up in the above example: | 280 | | GC | AGAGAG | 281 | | AGAGAGC = AGAGAGC | 282 | | The length of this minimal repetition is 7, which is indeed the | 283 | | period of the original string. */ | 284 | | | 285 | 21 | Py_ssize_t cut1, period1, cut2, period2, cut, period; | 286 | 21 | cut1 = STRINGLIB(_lex_search)(needle, len_needle, &period1, 0); | 287 | 21 | cut2 = STRINGLIB(_lex_search)(needle, len_needle, &period2, 1); | 288 | | | 289 | | // Take the later cut. | 290 | 21 | if (cut1 > cut2) { | 291 | 21 | period = period1; | 292 | 21 | cut = cut1; | 293 | 21 | } | 294 | 0 | else { | 295 | 0 | period = period2; | 296 | 0 | cut = cut2; | 297 | 0 | } | 298 | | | 299 | 21 | LOG("split: "); LOG_STRING(needle, cut); | 300 | 21 | LOG(" + "); LOG_STRING(needle + cut, len_needle - cut); | 301 | 21 | LOG("\n"); | 302 | | | 303 | 21 | *return_period = period; | 304 | 21 | return cut; | 305 | 21 | } |
Unexecuted instantiation: unicodeobject.c:ucs2lib__factorize Unexecuted instantiation: unicodeobject.c:ucs4lib__factorize Unexecuted instantiation: bytes_methods.c:stringlib__factorize Unexecuted instantiation: bytearrayobject.c:stringlib__factorize |
306 | | |
307 | | |
308 | 231 | #define SHIFT_TYPE uint8_t |
309 | | #define MAX_SHIFT UINT8_MAX |
310 | | |
311 | 73.2k | #define TABLE_SIZE_BITS 6u |
312 | 73.2k | #define TABLE_SIZE (1U << TABLE_SIZE_BITS) |
313 | 71.8k | #define TABLE_MASK (TABLE_SIZE - 1U) |
314 | | |
315 | | typedef struct STRINGLIB(_pre) { |
316 | | const STRINGLIB_CHAR *needle; |
317 | | Py_ssize_t len_needle; |
318 | | Py_ssize_t cut; |
319 | | Py_ssize_t period; |
320 | | Py_ssize_t gap; |
321 | | int is_periodic; |
322 | | SHIFT_TYPE table[TABLE_SIZE]; |
323 | | } STRINGLIB(prework); |
324 | | |
325 | | |
326 | | static void |
327 | | STRINGLIB(_preprocess)(const STRINGLIB_CHAR *needle, Py_ssize_t len_needle, |
328 | | STRINGLIB(prework) *p) |
329 | 21 | { |
330 | 21 | p->needle = needle; |
331 | 21 | p->len_needle = len_needle; |
332 | 21 | p->cut = STRINGLIB(_factorize)(needle, len_needle, &(p->period)); |
333 | 21 | assert(p->period + p->cut <= len_needle); |
334 | 21 | p->is_periodic = (0 == memcmp(needle, |
335 | 21 | needle + p->period, |
336 | 21 | p->cut * STRINGLIB_SIZEOF_CHAR)); |
337 | 21 | if (p->is_periodic) { |
338 | 0 | assert(p->cut <= len_needle/2); |
339 | 0 | assert(p->cut < p->period); |
340 | 0 | } |
341 | 21 | else { |
342 | | // A lower bound on the period |
343 | 21 | p->period = Py_MAX(p->cut, len_needle - p->cut) + 1; |
344 | 21 | } |
345 | | // The gap between the last character and the previous |
346 | | // occurrence of an equivalent character (modulo TABLE_SIZE) |
347 | 21 | p->gap = len_needle; |
348 | 21 | STRINGLIB_CHAR last = needle[len_needle - 1] & TABLE_MASK; |
349 | 147 | for (Py_ssize_t i = len_needle - 2; i >= 0; i--) { |
350 | 147 | STRINGLIB_CHAR x = needle[i] & TABLE_MASK; |
351 | 147 | if (x == last) { |
352 | 21 | p->gap = len_needle - 1 - i; |
353 | 21 | break; |
354 | 21 | } |
355 | 147 | } |
356 | | // Fill up a compressed Boyer-Moore "Bad Character" table |
357 | 21 | Py_ssize_t not_found_shift = Py_MIN(len_needle, MAX_SHIFT); |
358 | 1.36k | for (Py_ssize_t i = 0; i < (Py_ssize_t)TABLE_SIZE; i++) { |
359 | 1.34k | p->table[i] = Py_SAFE_DOWNCAST(not_found_shift, |
360 | 1.34k | Py_ssize_t, SHIFT_TYPE); |
361 | 1.34k | } |
362 | 231 | for (Py_ssize_t i = len_needle - not_found_shift; i < len_needle; i++) { |
363 | 210 | SHIFT_TYPE shift = Py_SAFE_DOWNCAST(len_needle - 1 - i, |
364 | 210 | Py_ssize_t, SHIFT_TYPE); |
365 | 210 | p->table[needle[i] & TABLE_MASK] = shift; |
366 | 210 | } |
367 | 21 | } Unexecuted instantiation: bytesobject.c:stringlib__preprocess Unexecuted instantiation: unicodeobject.c:asciilib__preprocess unicodeobject.c:ucs1lib__preprocess Line | Count | Source | 329 | 21 | { | 330 | 21 | p->needle = needle; | 331 | 21 | p->len_needle = len_needle; | 332 | 21 | p->cut = STRINGLIB(_factorize)(needle, len_needle, &(p->period)); | 333 | 21 | assert(p->period + p->cut <= len_needle); | 334 | 21 | p->is_periodic = (0 == memcmp(needle, | 335 | 21 | needle + p->period, | 336 | 21 | p->cut * STRINGLIB_SIZEOF_CHAR)); | 337 | 21 | if (p->is_periodic) { | 338 | 0 | assert(p->cut <= len_needle/2); | 339 | 0 | assert(p->cut < p->period); | 340 | 0 | } | 341 | 21 | else { | 342 | | // A lower bound on the period | 343 | 21 | p->period = Py_MAX(p->cut, len_needle - p->cut) + 1; | 344 | 21 | } | 345 | | // The gap between the last character and the previous | 346 | | // occurrence of an equivalent character (modulo TABLE_SIZE) | 347 | 21 | p->gap = len_needle; | 348 | 21 | STRINGLIB_CHAR last = needle[len_needle - 1] & TABLE_MASK; | 349 | 147 | for (Py_ssize_t i = len_needle - 2; i >= 0; i--) { | 350 | 147 | STRINGLIB_CHAR x = needle[i] & TABLE_MASK; | 351 | 147 | if (x == last) { | 352 | 21 | p->gap = len_needle - 1 - i; | 353 | 21 | break; | 354 | 21 | } | 355 | 147 | } | 356 | | // Fill up a compressed Boyer-Moore "Bad Character" table | 357 | 21 | Py_ssize_t not_found_shift = Py_MIN(len_needle, MAX_SHIFT); | 358 | 1.36k | for (Py_ssize_t i = 0; i < (Py_ssize_t)TABLE_SIZE; i++) { | 359 | 1.34k | p->table[i] = Py_SAFE_DOWNCAST(not_found_shift, | 360 | 1.34k | Py_ssize_t, SHIFT_TYPE); | 361 | 1.34k | } | 362 | 231 | for (Py_ssize_t i = len_needle - not_found_shift; i < len_needle; i++) { | 363 | 210 | SHIFT_TYPE shift = Py_SAFE_DOWNCAST(len_needle - 1 - i, | 364 | 210 | Py_ssize_t, SHIFT_TYPE); | 365 | 210 | p->table[needle[i] & TABLE_MASK] = shift; | 366 | 210 | } | 367 | 21 | } |
Unexecuted instantiation: unicodeobject.c:ucs2lib__preprocess Unexecuted instantiation: unicodeobject.c:ucs4lib__preprocess Unexecuted instantiation: bytes_methods.c:stringlib__preprocess Unexecuted instantiation: bytearrayobject.c:stringlib__preprocess |
368 | | |
369 | | static Py_ssize_t |
370 | | STRINGLIB(_two_way)(const STRINGLIB_CHAR *haystack, Py_ssize_t len_haystack, |
371 | | STRINGLIB(prework) *p) |
372 | 21 | { |
373 | | // Crochemore and Perrin's (1991) Two-Way algorithm. |
374 | | // See http://www-igm.univ-mlv.fr/~lecroq/string/node26.html#SECTION00260 |
375 | 21 | const Py_ssize_t len_needle = p->len_needle; |
376 | 21 | const Py_ssize_t cut = p->cut; |
377 | 21 | Py_ssize_t period = p->period; |
378 | 21 | const STRINGLIB_CHAR *const needle = p->needle; |
379 | 21 | const STRINGLIB_CHAR *window_last = haystack + len_needle - 1; |
380 | 21 | const STRINGLIB_CHAR *const haystack_end = haystack + len_haystack; |
381 | 21 | SHIFT_TYPE *table = p->table; |
382 | 21 | const STRINGLIB_CHAR *window; |
383 | 21 | LOG("===== Two-way: \"%s\" in \"%s\". =====\n", needle, haystack); |
384 | | |
385 | 21 | Py_ssize_t gap = p->gap; |
386 | 21 | Py_ssize_t gap_jump_end = Py_MIN(len_needle, cut + gap); |
387 | 21 | if (p->is_periodic) { |
388 | 0 | LOG("Needle is periodic.\n"); |
389 | 0 | Py_ssize_t memory = 0; |
390 | 0 | periodicwindowloop: |
391 | 0 | while (window_last < haystack_end) { |
392 | 0 | assert(memory == 0); |
393 | 0 | for (;;) { |
394 | 0 | LOG_LINEUP(); |
395 | 0 | Py_ssize_t shift = table[(*window_last) & TABLE_MASK]; |
396 | 0 | window_last += shift; |
397 | 0 | if (shift == 0) { |
398 | 0 | break; |
399 | 0 | } |
400 | 0 | if (window_last >= haystack_end) { |
401 | 0 | return -1; |
402 | 0 | } |
403 | 0 | LOG("Horspool skip\n"); |
404 | 0 | } |
405 | 0 | no_shift: |
406 | 0 | window = window_last - len_needle + 1; |
407 | 0 | assert((window[len_needle - 1] & TABLE_MASK) == |
408 | 0 | (needle[len_needle - 1] & TABLE_MASK)); |
409 | 0 | Py_ssize_t i = Py_MAX(cut, memory); |
410 | 0 | for (; i < len_needle; i++) { |
411 | 0 | if (needle[i] != window[i]) { |
412 | 0 | if (i < gap_jump_end) { |
413 | 0 | LOG("Early right half mismatch: jump by gap.\n"); |
414 | 0 | assert(gap >= i - cut + 1); |
415 | 0 | window_last += gap; |
416 | 0 | } |
417 | 0 | else { |
418 | 0 | LOG("Late right half mismatch: jump by n (>gap)\n"); |
419 | 0 | assert(i - cut + 1 > gap); |
420 | 0 | window_last += i - cut + 1; |
421 | 0 | } |
422 | 0 | memory = 0; |
423 | 0 | goto periodicwindowloop; |
424 | 0 | } |
425 | 0 | } |
426 | 0 | for (i = memory; i < cut; i++) { |
427 | 0 | if (needle[i] != window[i]) { |
428 | 0 | LOG("Left half does not match.\n"); |
429 | 0 | window_last += period; |
430 | 0 | memory = len_needle - period; |
431 | 0 | if (window_last >= haystack_end) { |
432 | 0 | return -1; |
433 | 0 | } |
434 | 0 | Py_ssize_t shift = table[(*window_last) & TABLE_MASK]; |
435 | 0 | if (shift) { |
436 | | // A mismatch has been identified to the right |
437 | | // of where i will next start, so we can jump |
438 | | // at least as far as if the mismatch occurred |
439 | | // on the first comparison. |
440 | 0 | Py_ssize_t mem_jump = Py_MAX(cut, memory) - cut + 1; |
441 | 0 | LOG("Skip with Memory.\n"); |
442 | 0 | memory = 0; |
443 | 0 | window_last += Py_MAX(shift, mem_jump); |
444 | 0 | goto periodicwindowloop; |
445 | 0 | } |
446 | 0 | goto no_shift; |
447 | 0 | } |
448 | 0 | } |
449 | 0 | LOG("Found a match!\n"); |
450 | 0 | return window - haystack; |
451 | 0 | } |
452 | 0 | } |
453 | 21 | else { |
454 | 21 | period = Py_MAX(gap, period); |
455 | 21 | LOG("Needle is not periodic.\n"); |
456 | 14.0k | windowloop: |
457 | 14.0k | while (window_last < haystack_end) { |
458 | 71.4k | for (;;) { |
459 | 71.4k | LOG_LINEUP(); |
460 | 71.4k | Py_ssize_t shift = table[(*window_last) & TABLE_MASK]; |
461 | 71.4k | window_last += shift; |
462 | 71.4k | if (shift == 0) { |
463 | 14.0k | break; |
464 | 14.0k | } |
465 | 57.4k | if (window_last >= haystack_end) { |
466 | 18 | return -1; |
467 | 18 | } |
468 | 57.4k | LOG("Horspool skip\n"); |
469 | 57.4k | } |
470 | 14.0k | window = window_last - len_needle + 1; |
471 | 14.0k | assert((window[len_needle - 1] & TABLE_MASK) == |
472 | 14.0k | (needle[len_needle - 1] & TABLE_MASK)); |
473 | 14.0k | Py_ssize_t i = cut; |
474 | 14.2k | for (; i < len_needle; i++) { |
475 | 14.1k | if (needle[i] != window[i]) { |
476 | 13.9k | if (i < gap_jump_end) { |
477 | 13.9k | LOG("Early right half mismatch: jump by gap.\n"); |
478 | 13.9k | assert(gap >= i - cut + 1); |
479 | 13.9k | window_last += gap; |
480 | 13.9k | } |
481 | 0 | else { |
482 | 0 | LOG("Late right half mismatch: jump by n (>gap)\n"); |
483 | 0 | assert(i - cut + 1 > gap); |
484 | 0 | window_last += i - cut + 1; |
485 | 0 | } |
486 | 13.9k | goto windowloop; |
487 | 13.9k | } |
488 | 14.1k | } |
489 | 104 | for (Py_ssize_t i = 0; i < cut; i++) { |
490 | 102 | if (needle[i] != window[i]) { |
491 | 86 | LOG("Left half does not match.\n"); |
492 | 86 | window_last += period; |
493 | 86 | goto windowloop; |
494 | 86 | } |
495 | 102 | } |
496 | 2 | LOG("Found a match!\n"); |
497 | 2 | return window - haystack; |
498 | 88 | } |
499 | 14.0k | } |
500 | 1 | LOG("Not found. Returning -1.\n"); |
501 | 1 | return -1; |
502 | 21 | } Unexecuted instantiation: bytesobject.c:stringlib__two_way Unexecuted instantiation: unicodeobject.c:asciilib__two_way unicodeobject.c:ucs1lib__two_way Line | Count | Source | 372 | 21 | { | 373 | | // Crochemore and Perrin's (1991) Two-Way algorithm. | 374 | | // See http://www-igm.univ-mlv.fr/~lecroq/string/node26.html#SECTION00260 | 375 | 21 | const Py_ssize_t len_needle = p->len_needle; | 376 | 21 | const Py_ssize_t cut = p->cut; | 377 | 21 | Py_ssize_t period = p->period; | 378 | 21 | const STRINGLIB_CHAR *const needle = p->needle; | 379 | 21 | const STRINGLIB_CHAR *window_last = haystack + len_needle - 1; | 380 | 21 | const STRINGLIB_CHAR *const haystack_end = haystack + len_haystack; | 381 | 21 | SHIFT_TYPE *table = p->table; | 382 | 21 | const STRINGLIB_CHAR *window; | 383 | 21 | LOG("===== Two-way: \"%s\" in \"%s\". =====\n", needle, haystack); | 384 | | | 385 | 21 | Py_ssize_t gap = p->gap; | 386 | 21 | Py_ssize_t gap_jump_end = Py_MIN(len_needle, cut + gap); | 387 | 21 | if (p->is_periodic) { | 388 | 0 | LOG("Needle is periodic.\n"); | 389 | 0 | Py_ssize_t memory = 0; | 390 | 0 | periodicwindowloop: | 391 | 0 | while (window_last < haystack_end) { | 392 | 0 | assert(memory == 0); | 393 | 0 | for (;;) { | 394 | 0 | LOG_LINEUP(); | 395 | 0 | Py_ssize_t shift = table[(*window_last) & TABLE_MASK]; | 396 | 0 | window_last += shift; | 397 | 0 | if (shift == 0) { | 398 | 0 | break; | 399 | 0 | } | 400 | 0 | if (window_last >= haystack_end) { | 401 | 0 | return -1; | 402 | 0 | } | 403 | 0 | LOG("Horspool skip\n"); | 404 | 0 | } | 405 | 0 | no_shift: | 406 | 0 | window = window_last - len_needle + 1; | 407 | 0 | assert((window[len_needle - 1] & TABLE_MASK) == | 408 | 0 | (needle[len_needle - 1] & TABLE_MASK)); | 409 | 0 | Py_ssize_t i = Py_MAX(cut, memory); | 410 | 0 | for (; i < len_needle; i++) { | 411 | 0 | if (needle[i] != window[i]) { | 412 | 0 | if (i < gap_jump_end) { | 413 | 0 | LOG("Early right half mismatch: jump by gap.\n"); | 414 | 0 | assert(gap >= i - cut + 1); | 415 | 0 | window_last += gap; | 416 | 0 | } | 417 | 0 | else { | 418 | 0 | LOG("Late right half mismatch: jump by n (>gap)\n"); | 419 | 0 | assert(i - cut + 1 > gap); | 420 | 0 | window_last += i - cut + 1; | 421 | 0 | } | 422 | 0 | memory = 0; | 423 | 0 | goto periodicwindowloop; | 424 | 0 | } | 425 | 0 | } | 426 | 0 | for (i = memory; i < cut; i++) { | 427 | 0 | if (needle[i] != window[i]) { | 428 | 0 | LOG("Left half does not match.\n"); | 429 | 0 | window_last += period; | 430 | 0 | memory = len_needle - period; | 431 | 0 | if (window_last >= haystack_end) { | 432 | 0 | return -1; | 433 | 0 | } | 434 | 0 | Py_ssize_t shift = table[(*window_last) & TABLE_MASK]; | 435 | 0 | if (shift) { | 436 | | // A mismatch has been identified to the right | 437 | | // of where i will next start, so we can jump | 438 | | // at least as far as if the mismatch occurred | 439 | | // on the first comparison. | 440 | 0 | Py_ssize_t mem_jump = Py_MAX(cut, memory) - cut + 1; | 441 | 0 | LOG("Skip with Memory.\n"); | 442 | 0 | memory = 0; | 443 | 0 | window_last += Py_MAX(shift, mem_jump); | 444 | 0 | goto periodicwindowloop; | 445 | 0 | } | 446 | 0 | goto no_shift; | 447 | 0 | } | 448 | 0 | } | 449 | 0 | LOG("Found a match!\n"); | 450 | 0 | return window - haystack; | 451 | 0 | } | 452 | 0 | } | 453 | 21 | else { | 454 | 21 | period = Py_MAX(gap, period); | 455 | 21 | LOG("Needle is not periodic.\n"); | 456 | 14.0k | windowloop: | 457 | 14.0k | while (window_last < haystack_end) { | 458 | 71.4k | for (;;) { | 459 | 71.4k | LOG_LINEUP(); | 460 | 71.4k | Py_ssize_t shift = table[(*window_last) & TABLE_MASK]; | 461 | 71.4k | window_last += shift; | 462 | 71.4k | if (shift == 0) { | 463 | 14.0k | break; | 464 | 14.0k | } | 465 | 57.4k | if (window_last >= haystack_end) { | 466 | 18 | return -1; | 467 | 18 | } | 468 | 57.4k | LOG("Horspool skip\n"); | 469 | 57.4k | } | 470 | 14.0k | window = window_last - len_needle + 1; | 471 | 14.0k | assert((window[len_needle - 1] & TABLE_MASK) == | 472 | 14.0k | (needle[len_needle - 1] & TABLE_MASK)); | 473 | 14.0k | Py_ssize_t i = cut; | 474 | 14.2k | for (; i < len_needle; i++) { | 475 | 14.1k | if (needle[i] != window[i]) { | 476 | 13.9k | if (i < gap_jump_end) { | 477 | 13.9k | LOG("Early right half mismatch: jump by gap.\n"); | 478 | 13.9k | assert(gap >= i - cut + 1); | 479 | 13.9k | window_last += gap; | 480 | 13.9k | } | 481 | 0 | else { | 482 | 0 | LOG("Late right half mismatch: jump by n (>gap)\n"); | 483 | 0 | assert(i - cut + 1 > gap); | 484 | 0 | window_last += i - cut + 1; | 485 | 0 | } | 486 | 13.9k | goto windowloop; | 487 | 13.9k | } | 488 | 14.1k | } | 489 | 104 | for (Py_ssize_t i = 0; i < cut; i++) { | 490 | 102 | if (needle[i] != window[i]) { | 491 | 86 | LOG("Left half does not match.\n"); | 492 | 86 | window_last += period; | 493 | 86 | goto windowloop; | 494 | 86 | } | 495 | 102 | } | 496 | 2 | LOG("Found a match!\n"); | 497 | 2 | return window - haystack; | 498 | 88 | } | 499 | 14.0k | } | 500 | 1 | LOG("Not found. Returning -1.\n"); | 501 | 1 | return -1; | 502 | 21 | } |
Unexecuted instantiation: unicodeobject.c:ucs2lib__two_way Unexecuted instantiation: unicodeobject.c:ucs4lib__two_way Unexecuted instantiation: bytes_methods.c:stringlib__two_way Unexecuted instantiation: bytearrayobject.c:stringlib__two_way |
503 | | |
504 | | |
505 | | static Py_ssize_t |
506 | | STRINGLIB(_two_way_find)(const STRINGLIB_CHAR *haystack, |
507 | | Py_ssize_t len_haystack, |
508 | | const STRINGLIB_CHAR *needle, |
509 | | Py_ssize_t len_needle) |
510 | 21 | { |
511 | 21 | LOG("###### Finding \"%s\" in \"%s\".\n", needle, haystack); |
512 | 21 | STRINGLIB(prework) p; |
513 | 21 | STRINGLIB(_preprocess)(needle, len_needle, &p); |
514 | 21 | return STRINGLIB(_two_way)(haystack, len_haystack, &p); |
515 | 21 | } Unexecuted instantiation: bytesobject.c:stringlib__two_way_find Unexecuted instantiation: unicodeobject.c:asciilib__two_way_find unicodeobject.c:ucs1lib__two_way_find Line | Count | Source | 510 | 21 | { | 511 | 21 | LOG("###### Finding \"%s\" in \"%s\".\n", needle, haystack); | 512 | 21 | STRINGLIB(prework) p; | 513 | 21 | STRINGLIB(_preprocess)(needle, len_needle, &p); | 514 | 21 | return STRINGLIB(_two_way)(haystack, len_haystack, &p); | 515 | 21 | } |
Unexecuted instantiation: unicodeobject.c:ucs2lib__two_way_find Unexecuted instantiation: unicodeobject.c:ucs4lib__two_way_find Unexecuted instantiation: bytes_methods.c:stringlib__two_way_find Unexecuted instantiation: bytearrayobject.c:stringlib__two_way_find |
516 | | |
517 | | |
518 | | static Py_ssize_t |
519 | | STRINGLIB(_two_way_count)(const STRINGLIB_CHAR *haystack, |
520 | | Py_ssize_t len_haystack, |
521 | | const STRINGLIB_CHAR *needle, |
522 | | Py_ssize_t len_needle, |
523 | | Py_ssize_t maxcount) |
524 | 0 | { |
525 | 0 | LOG("###### Counting \"%s\" in \"%s\".\n", needle, haystack); |
526 | 0 | STRINGLIB(prework) p; |
527 | 0 | STRINGLIB(_preprocess)(needle, len_needle, &p); |
528 | 0 | Py_ssize_t index = 0, count = 0; |
529 | 0 | while (1) { |
530 | 0 | Py_ssize_t result; |
531 | 0 | result = STRINGLIB(_two_way)(haystack + index, |
532 | 0 | len_haystack - index, &p); |
533 | 0 | if (result == -1) { |
534 | 0 | return count; |
535 | 0 | } |
536 | 0 | count++; |
537 | 0 | if (count == maxcount) { |
538 | 0 | return maxcount; |
539 | 0 | } |
540 | 0 | index += result + len_needle; |
541 | 0 | } |
542 | 0 | return count; |
543 | 0 | } Unexecuted instantiation: bytesobject.c:stringlib__two_way_count Unexecuted instantiation: unicodeobject.c:asciilib__two_way_count Unexecuted instantiation: unicodeobject.c:ucs1lib__two_way_count Unexecuted instantiation: unicodeobject.c:ucs2lib__two_way_count Unexecuted instantiation: unicodeobject.c:ucs4lib__two_way_count Unexecuted instantiation: bytes_methods.c:stringlib__two_way_count Unexecuted instantiation: bytearrayobject.c:stringlib__two_way_count |
544 | | |
545 | | #undef SHIFT_TYPE |
546 | | #undef NOT_FOUND |
547 | | #undef SHIFT_OVERFLOW |
548 | | #undef TABLE_SIZE_BITS |
549 | | #undef TABLE_SIZE |
550 | | #undef TABLE_MASK |
551 | | |
552 | | #undef LOG |
553 | | #undef LOG_STRING |
554 | | #undef LOG_LINEUP |
555 | | |
556 | | static inline Py_ssize_t |
557 | | STRINGLIB(default_find)(const STRINGLIB_CHAR* s, Py_ssize_t n, |
558 | | const STRINGLIB_CHAR* p, Py_ssize_t m, |
559 | | Py_ssize_t maxcount, int mode) |
560 | 14.7M | { |
561 | 14.7M | const Py_ssize_t w = n - m; |
562 | 14.7M | Py_ssize_t mlast = m - 1, count = 0; |
563 | 14.7M | Py_ssize_t gap = mlast; |
564 | 14.7M | const STRINGLIB_CHAR last = p[mlast]; |
565 | 14.7M | const STRINGLIB_CHAR *const ss = &s[mlast]; |
566 | | |
567 | 14.7M | unsigned long mask = 0; |
568 | 29.6M | for (Py_ssize_t i = 0; i < mlast; i++) { |
569 | 14.8M | STRINGLIB_BLOOM_ADD(mask, p[i]); |
570 | 14.8M | if (p[i] == last) { |
571 | 367k | gap = mlast - i - 1; |
572 | 367k | } |
573 | 14.8M | } |
574 | 14.7M | STRINGLIB_BLOOM_ADD(mask, last); |
575 | | |
576 | 5.38G | for (Py_ssize_t i = 0; i <= w; i++) { |
577 | 5.37G | if (ss[i] == last) { |
578 | | /* candidate match */ |
579 | 53.5M | Py_ssize_t j; |
580 | 72.7M | for (j = 0; j < mlast; j++) { |
581 | 53.6M | if (s[i+j] != p[j]) { |
582 | 34.4M | break; |
583 | 34.4M | } |
584 | 53.6M | } |
585 | 53.5M | if (j == mlast) { |
586 | | /* got a match! */ |
587 | 19.1M | if (mode != FAST_COUNT) { |
588 | 9.63M | return i; |
589 | 9.63M | } |
590 | 9.51M | count++; |
591 | 9.51M | if (count == maxcount) { |
592 | 0 | return maxcount; |
593 | 0 | } |
594 | 9.51M | i = i + mlast; |
595 | 9.51M | continue; |
596 | 9.51M | } |
597 | | /* miss: check if next character is part of pattern */ |
598 | 34.4M | if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) { |
599 | 7.79M | i = i + m; |
600 | 7.79M | } |
601 | 26.6M | else { |
602 | 26.6M | i = i + gap; |
603 | 26.6M | } |
604 | 34.4M | } |
605 | 5.32G | else { |
606 | | /* skip: check if next character is part of pattern */ |
607 | 5.32G | if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) { |
608 | 5.27G | i = i + m; |
609 | 5.27G | } |
610 | 5.32G | } |
611 | 5.37G | } |
612 | 5.15M | return mode == FAST_COUNT ? count : -1; |
613 | 14.7M | } Unexecuted instantiation: bytesobject.c:stringlib_default_find unicodeobject.c:asciilib_default_find Line | Count | Source | 560 | 1.59M | { | 561 | 1.59M | const Py_ssize_t w = n - m; | 562 | 1.59M | Py_ssize_t mlast = m - 1, count = 0; | 563 | 1.59M | Py_ssize_t gap = mlast; | 564 | 1.59M | const STRINGLIB_CHAR last = p[mlast]; | 565 | 1.59M | const STRINGLIB_CHAR *const ss = &s[mlast]; | 566 | | | 567 | 1.59M | unsigned long mask = 0; | 568 | 3.20M | for (Py_ssize_t i = 0; i < mlast; i++) { | 569 | 1.60M | STRINGLIB_BLOOM_ADD(mask, p[i]); | 570 | 1.60M | if (p[i] == last) { | 571 | 25.3k | gap = mlast - i - 1; | 572 | 25.3k | } | 573 | 1.60M | } | 574 | 1.59M | STRINGLIB_BLOOM_ADD(mask, last); | 575 | | | 576 | 151M | for (Py_ssize_t i = 0; i <= w; i++) { | 577 | 151M | if (ss[i] == last) { | 578 | | /* candidate match */ | 579 | 3.23M | Py_ssize_t j; | 580 | 4.81M | for (j = 0; j < mlast; j++) { | 581 | 3.23M | if (s[i+j] != p[j]) { | 582 | 1.65M | break; | 583 | 1.65M | } | 584 | 3.23M | } | 585 | 3.23M | if (j == mlast) { | 586 | | /* got a match! */ | 587 | 1.57M | if (mode != FAST_COUNT) { | 588 | 1.57M | return i; | 589 | 1.57M | } | 590 | 0 | count++; | 591 | 0 | if (count == maxcount) { | 592 | 0 | return maxcount; | 593 | 0 | } | 594 | 0 | i = i + mlast; | 595 | 0 | continue; | 596 | 0 | } | 597 | | /* miss: check if next character is part of pattern */ | 598 | 1.65M | if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) { | 599 | 28.2k | i = i + m; | 600 | 28.2k | } | 601 | 1.62M | else { | 602 | 1.62M | i = i + gap; | 603 | 1.62M | } | 604 | 1.65M | } | 605 | 148M | else { | 606 | | /* skip: check if next character is part of pattern */ | 607 | 148M | if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) { | 608 | 143M | i = i + m; | 609 | 143M | } | 610 | 148M | } | 611 | 151M | } | 612 | 23.4k | return mode == FAST_COUNT ? count : -1; | 613 | 1.59M | } |
unicodeobject.c:ucs1lib_default_find Line | Count | Source | 560 | 6.57M | { | 561 | 6.57M | const Py_ssize_t w = n - m; | 562 | 6.57M | Py_ssize_t mlast = m - 1, count = 0; | 563 | 6.57M | Py_ssize_t gap = mlast; | 564 | 6.57M | const STRINGLIB_CHAR last = p[mlast]; | 565 | 6.57M | const STRINGLIB_CHAR *const ss = &s[mlast]; | 566 | | | 567 | 6.57M | unsigned long mask = 0; | 568 | 13.2M | for (Py_ssize_t i = 0; i < mlast; i++) { | 569 | 6.63M | STRINGLIB_BLOOM_ADD(mask, p[i]); | 570 | 6.63M | if (p[i] == last) { | 571 | 280k | gap = mlast - i - 1; | 572 | 280k | } | 573 | 6.63M | } | 574 | 6.57M | STRINGLIB_BLOOM_ADD(mask, last); | 575 | | | 576 | 3.05G | for (Py_ssize_t i = 0; i <= w; i++) { | 577 | 3.04G | if (ss[i] == last) { | 578 | | /* candidate match */ | 579 | 23.3M | Py_ssize_t j; | 580 | 28.2M | for (j = 0; j < mlast; j++) { | 581 | 23.3M | if (s[i+j] != p[j]) { | 582 | 18.5M | break; | 583 | 18.5M | } | 584 | 23.3M | } | 585 | 23.3M | if (j == mlast) { | 586 | | /* got a match! */ | 587 | 4.82M | if (mode != FAST_COUNT) { | 588 | 1.62M | return i; | 589 | 1.62M | } | 590 | 3.19M | count++; | 591 | 3.19M | if (count == maxcount) { | 592 | 0 | return maxcount; | 593 | 0 | } | 594 | 3.19M | i = i + mlast; | 595 | 3.19M | continue; | 596 | 3.19M | } | 597 | | /* miss: check if next character is part of pattern */ | 598 | 18.5M | if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) { | 599 | 5.02M | i = i + m; | 600 | 5.02M | } | 601 | 13.5M | else { | 602 | 13.5M | i = i + gap; | 603 | 13.5M | } | 604 | 18.5M | } | 605 | 3.02G | else { | 606 | | /* skip: check if next character is part of pattern */ | 607 | 3.02G | if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) { | 608 | 3.00G | i = i + m; | 609 | 3.00G | } | 610 | 3.02G | } | 611 | 3.04G | } | 612 | 4.94M | return mode == FAST_COUNT ? count : -1; | 613 | 6.57M | } |
unicodeobject.c:ucs2lib_default_find Line | Count | Source | 560 | 2.94M | { | 561 | 2.94M | const Py_ssize_t w = n - m; | 562 | 2.94M | Py_ssize_t mlast = m - 1, count = 0; | 563 | 2.94M | Py_ssize_t gap = mlast; | 564 | 2.94M | const STRINGLIB_CHAR last = p[mlast]; | 565 | 2.94M | const STRINGLIB_CHAR *const ss = &s[mlast]; | 566 | | | 567 | 2.94M | unsigned long mask = 0; | 568 | 5.91M | for (Py_ssize_t i = 0; i < mlast; i++) { | 569 | 2.97M | STRINGLIB_BLOOM_ADD(mask, p[i]); | 570 | 2.97M | if (p[i] == last) { | 571 | 35.5k | gap = mlast - i - 1; | 572 | 35.5k | } | 573 | 2.97M | } | 574 | 2.94M | STRINGLIB_BLOOM_ADD(mask, last); | 575 | | | 576 | 930M | for (Py_ssize_t i = 0; i <= w; i++) { | 577 | 930M | if (ss[i] == last) { | 578 | | /* candidate match */ | 579 | 11.0M | Py_ssize_t j; | 580 | 16.6M | for (j = 0; j < mlast; j++) { | 581 | 11.0M | if (s[i+j] != p[j]) { | 582 | 5.41M | break; | 583 | 5.41M | } | 584 | 11.0M | } | 585 | 11.0M | if (j == mlast) { | 586 | | /* got a match! */ | 587 | 5.58M | if (mode != FAST_COUNT) { | 588 | 2.83M | return i; | 589 | 2.83M | } | 590 | 2.75M | count++; | 591 | 2.75M | if (count == maxcount) { | 592 | 0 | return maxcount; | 593 | 0 | } | 594 | 2.75M | i = i + mlast; | 595 | 2.75M | continue; | 596 | 2.75M | } | 597 | | /* miss: check if next character is part of pattern */ | 598 | 5.41M | if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) { | 599 | 1.28M | i = i + m; | 600 | 1.28M | } | 601 | 4.13M | else { | 602 | 4.13M | i = i + gap; | 603 | 4.13M | } | 604 | 5.41M | } | 605 | 919M | else { | 606 | | /* skip: check if next character is part of pattern */ | 607 | 919M | if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) { | 608 | 909M | i = i + m; | 609 | 909M | } | 610 | 919M | } | 611 | 930M | } | 612 | 107k | return mode == FAST_COUNT ? count : -1; | 613 | 2.94M | } |
unicodeobject.c:ucs4lib_default_find Line | Count | Source | 560 | 3.66M | { | 561 | 3.66M | const Py_ssize_t w = n - m; | 562 | 3.66M | Py_ssize_t mlast = m - 1, count = 0; | 563 | 3.66M | Py_ssize_t gap = mlast; | 564 | 3.66M | const STRINGLIB_CHAR last = p[mlast]; | 565 | 3.66M | const STRINGLIB_CHAR *const ss = &s[mlast]; | 566 | | | 567 | 3.66M | unsigned long mask = 0; | 568 | 7.34M | for (Py_ssize_t i = 0; i < mlast; i++) { | 569 | 3.67M | STRINGLIB_BLOOM_ADD(mask, p[i]); | 570 | 3.67M | if (p[i] == last) { | 571 | 22.3k | gap = mlast - i - 1; | 572 | 22.3k | } | 573 | 3.67M | } | 574 | 3.66M | STRINGLIB_BLOOM_ADD(mask, last); | 575 | | | 576 | 1.24G | for (Py_ssize_t i = 0; i <= w; i++) { | 577 | 1.24G | if (ss[i] == last) { | 578 | | /* candidate match */ | 579 | 15.9M | Py_ssize_t j; | 580 | 23.1M | for (j = 0; j < mlast; j++) { | 581 | 15.9M | if (s[i+j] != p[j]) { | 582 | 8.79M | break; | 583 | 8.79M | } | 584 | 15.9M | } | 585 | 15.9M | if (j == mlast) { | 586 | | /* got a match! */ | 587 | 7.15M | if (mode != FAST_COUNT) { | 588 | 3.58M | return i; | 589 | 3.58M | } | 590 | 3.56M | count++; | 591 | 3.56M | if (count == maxcount) { | 592 | 0 | return maxcount; | 593 | 0 | } | 594 | 3.56M | i = i + mlast; | 595 | 3.56M | continue; | 596 | 3.56M | } | 597 | | /* miss: check if next character is part of pattern */ | 598 | 8.79M | if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) { | 599 | 1.45M | i = i + m; | 600 | 1.45M | } | 601 | 7.33M | else { | 602 | 7.33M | i = i + gap; | 603 | 7.33M | } | 604 | 8.79M | } | 605 | 1.22G | else { | 606 | | /* skip: check if next character is part of pattern */ | 607 | 1.22G | if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) { | 608 | 1.21G | i = i + m; | 609 | 1.21G | } | 610 | 1.22G | } | 611 | 1.24G | } | 612 | 77.0k | return mode == FAST_COUNT ? count : -1; | 613 | 3.66M | } |
bytes_methods.c:stringlib_default_find Line | Count | Source | 560 | 3.01k | { | 561 | 3.01k | const Py_ssize_t w = n - m; | 562 | 3.01k | Py_ssize_t mlast = m - 1, count = 0; | 563 | 3.01k | Py_ssize_t gap = mlast; | 564 | 3.01k | const STRINGLIB_CHAR last = p[mlast]; | 565 | 3.01k | const STRINGLIB_CHAR *const ss = &s[mlast]; | 566 | | | 567 | 3.01k | unsigned long mask = 0; | 568 | 12.0k | for (Py_ssize_t i = 0; i < mlast; i++) { | 569 | 9.05k | STRINGLIB_BLOOM_ADD(mask, p[i]); | 570 | 9.05k | if (p[i] == last) { | 571 | 3.01k | gap = mlast - i - 1; | 572 | 3.01k | } | 573 | 9.05k | } | 574 | 3.01k | STRINGLIB_BLOOM_ADD(mask, last); | 575 | | | 576 | 1.62M | for (Py_ssize_t i = 0; i <= w; i++) { | 577 | 1.62M | if (ss[i] == last) { | 578 | | /* candidate match */ | 579 | 9.43k | Py_ssize_t j; | 580 | 18.0k | for (j = 0; j < mlast; j++) { | 581 | 15.2k | if (s[i+j] != p[j]) { | 582 | 6.65k | break; | 583 | 6.65k | } | 584 | 15.2k | } | 585 | 9.43k | if (j == mlast) { | 586 | | /* got a match! */ | 587 | 2.77k | if (mode != FAST_COUNT) { | 588 | 2.77k | return i; | 589 | 2.77k | } | 590 | 0 | count++; | 591 | 0 | if (count == maxcount) { | 592 | 0 | return maxcount; | 593 | 0 | } | 594 | 0 | i = i + mlast; | 595 | 0 | continue; | 596 | 0 | } | 597 | | /* miss: check if next character is part of pattern */ | 598 | 6.65k | if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) { | 599 | 626 | i = i + m; | 600 | 626 | } | 601 | 6.03k | else { | 602 | 6.03k | i = i + gap; | 603 | 6.03k | } | 604 | 6.65k | } | 605 | 1.61M | else { | 606 | | /* skip: check if next character is part of pattern */ | 607 | 1.61M | if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) { | 608 | 27.8k | i = i + m; | 609 | 27.8k | } | 610 | 1.61M | } | 611 | 1.62M | } | 612 | 242 | return mode == FAST_COUNT ? count : -1; | 613 | 3.01k | } |
Unexecuted instantiation: bytearrayobject.c:stringlib_default_find |
614 | | |
615 | | |
616 | | static Py_ssize_t |
617 | | STRINGLIB(adaptive_find)(const STRINGLIB_CHAR* s, Py_ssize_t n, |
618 | | const STRINGLIB_CHAR* p, Py_ssize_t m, |
619 | | Py_ssize_t maxcount, int mode) |
620 | 0 | { |
621 | 0 | const Py_ssize_t w = n - m; |
622 | 0 | Py_ssize_t mlast = m - 1, count = 0; |
623 | 0 | Py_ssize_t gap = mlast; |
624 | 0 | Py_ssize_t hits = 0, res; |
625 | 0 | const STRINGLIB_CHAR last = p[mlast]; |
626 | 0 | const STRINGLIB_CHAR *const ss = &s[mlast]; |
627 | |
|
628 | 0 | unsigned long mask = 0; |
629 | 0 | for (Py_ssize_t i = 0; i < mlast; i++) { |
630 | 0 | STRINGLIB_BLOOM_ADD(mask, p[i]); |
631 | 0 | if (p[i] == last) { |
632 | 0 | gap = mlast - i - 1; |
633 | 0 | } |
634 | 0 | } |
635 | 0 | STRINGLIB_BLOOM_ADD(mask, last); |
636 | |
|
637 | 0 | for (Py_ssize_t i = 0; i <= w; i++) { |
638 | 0 | if (ss[i] == last) { |
639 | | /* candidate match */ |
640 | 0 | Py_ssize_t j; |
641 | 0 | for (j = 0; j < mlast; j++) { |
642 | 0 | if (s[i+j] != p[j]) { |
643 | 0 | break; |
644 | 0 | } |
645 | 0 | } |
646 | 0 | if (j == mlast) { |
647 | | /* got a match! */ |
648 | 0 | if (mode != FAST_COUNT) { |
649 | 0 | return i; |
650 | 0 | } |
651 | 0 | count++; |
652 | 0 | if (count == maxcount) { |
653 | 0 | return maxcount; |
654 | 0 | } |
655 | 0 | i = i + mlast; |
656 | 0 | continue; |
657 | 0 | } |
658 | 0 | hits += j + 1; |
659 | 0 | if (hits > m / 4 && w - i > 2000) { |
660 | 0 | if (mode == FAST_SEARCH) { |
661 | 0 | res = STRINGLIB(_two_way_find)(s + i, n - i, p, m); |
662 | 0 | return res == -1 ? -1 : res + i; |
663 | 0 | } |
664 | 0 | else { |
665 | 0 | res = STRINGLIB(_two_way_count)(s + i, n - i, p, m, |
666 | 0 | maxcount - count); |
667 | 0 | return res + count; |
668 | 0 | } |
669 | 0 | } |
670 | | /* miss: check if next character is part of pattern */ |
671 | 0 | if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) { |
672 | 0 | i = i + m; |
673 | 0 | } |
674 | 0 | else { |
675 | 0 | i = i + gap; |
676 | 0 | } |
677 | 0 | } |
678 | 0 | else { |
679 | | /* skip: check if next character is part of pattern */ |
680 | 0 | if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) { |
681 | 0 | i = i + m; |
682 | 0 | } |
683 | 0 | } |
684 | 0 | } |
685 | 0 | return mode == FAST_COUNT ? count : -1; |
686 | 0 | } Unexecuted instantiation: bytesobject.c:stringlib_adaptive_find Unexecuted instantiation: unicodeobject.c:asciilib_adaptive_find Unexecuted instantiation: unicodeobject.c:ucs1lib_adaptive_find Unexecuted instantiation: unicodeobject.c:ucs2lib_adaptive_find Unexecuted instantiation: unicodeobject.c:ucs4lib_adaptive_find Unexecuted instantiation: bytes_methods.c:stringlib_adaptive_find Unexecuted instantiation: bytearrayobject.c:stringlib_adaptive_find |
687 | | |
688 | | |
689 | | static Py_ssize_t |
690 | | STRINGLIB(default_rfind)(const STRINGLIB_CHAR* s, Py_ssize_t n, |
691 | | const STRINGLIB_CHAR* p, Py_ssize_t m, |
692 | | Py_ssize_t maxcount, int mode) |
693 | 4 | { |
694 | | /* create compressed boyer-moore delta 1 table */ |
695 | 4 | unsigned long mask = 0; |
696 | 4 | Py_ssize_t i, j, mlast = m - 1, skip = m - 1, w = n - m; |
697 | | |
698 | | /* process pattern[0] outside the loop */ |
699 | 4 | STRINGLIB_BLOOM_ADD(mask, p[0]); |
700 | | /* process pattern[:0:-1] */ |
701 | 16 | for (i = mlast; i > 0; i--) { |
702 | 12 | STRINGLIB_BLOOM_ADD(mask, p[i]); |
703 | 12 | if (p[i] == p[0]) { |
704 | 0 | skip = i - 1; |
705 | 0 | } |
706 | 12 | } |
707 | | |
708 | 356 | for (i = w; i >= 0; i--) { |
709 | 352 | if (s[i] == p[0]) { |
710 | | /* candidate match */ |
711 | 8 | for (j = mlast; j > 0; j--) { |
712 | 8 | if (s[i+j] != p[j]) { |
713 | 8 | break; |
714 | 8 | } |
715 | 8 | } |
716 | 8 | if (j == 0) { |
717 | | /* got a match! */ |
718 | 0 | return i; |
719 | 0 | } |
720 | | /* miss: check if previous character is part of pattern */ |
721 | 8 | if (i > 0 && !STRINGLIB_BLOOM(mask, s[i-1])) { |
722 | 8 | i = i - m; |
723 | 8 | } |
724 | 0 | else { |
725 | 0 | i = i - skip; |
726 | 0 | } |
727 | 8 | } |
728 | 344 | else { |
729 | | /* skip: check if previous character is part of pattern */ |
730 | 344 | if (i > 0 && !STRINGLIB_BLOOM(mask, s[i-1])) { |
731 | 336 | i = i - m; |
732 | 336 | } |
733 | 344 | } |
734 | 352 | } |
735 | 4 | return -1; |
736 | 4 | } Unexecuted instantiation: bytesobject.c:stringlib_default_rfind Unexecuted instantiation: unicodeobject.c:asciilib_default_rfind Unexecuted instantiation: unicodeobject.c:ucs1lib_default_rfind Unexecuted instantiation: unicodeobject.c:ucs2lib_default_rfind Unexecuted instantiation: unicodeobject.c:ucs4lib_default_rfind bytes_methods.c:stringlib_default_rfind Line | Count | Source | 693 | 4 | { | 694 | | /* create compressed boyer-moore delta 1 table */ | 695 | 4 | unsigned long mask = 0; | 696 | 4 | Py_ssize_t i, j, mlast = m - 1, skip = m - 1, w = n - m; | 697 | | | 698 | | /* process pattern[0] outside the loop */ | 699 | 4 | STRINGLIB_BLOOM_ADD(mask, p[0]); | 700 | | /* process pattern[:0:-1] */ | 701 | 16 | for (i = mlast; i > 0; i--) { | 702 | 12 | STRINGLIB_BLOOM_ADD(mask, p[i]); | 703 | 12 | if (p[i] == p[0]) { | 704 | 0 | skip = i - 1; | 705 | 0 | } | 706 | 12 | } | 707 | | | 708 | 356 | for (i = w; i >= 0; i--) { | 709 | 352 | if (s[i] == p[0]) { | 710 | | /* candidate match */ | 711 | 8 | for (j = mlast; j > 0; j--) { | 712 | 8 | if (s[i+j] != p[j]) { | 713 | 8 | break; | 714 | 8 | } | 715 | 8 | } | 716 | 8 | if (j == 0) { | 717 | | /* got a match! */ | 718 | 0 | return i; | 719 | 0 | } | 720 | | /* miss: check if previous character is part of pattern */ | 721 | 8 | if (i > 0 && !STRINGLIB_BLOOM(mask, s[i-1])) { | 722 | 8 | i = i - m; | 723 | 8 | } | 724 | 0 | else { | 725 | 0 | i = i - skip; | 726 | 0 | } | 727 | 8 | } | 728 | 344 | else { | 729 | | /* skip: check if previous character is part of pattern */ | 730 | 344 | if (i > 0 && !STRINGLIB_BLOOM(mask, s[i-1])) { | 731 | 336 | i = i - m; | 732 | 336 | } | 733 | 344 | } | 734 | 352 | } | 735 | 4 | return -1; | 736 | 4 | } |
Unexecuted instantiation: bytearrayobject.c:stringlib_default_rfind |
737 | | |
738 | | |
739 | | static inline Py_ssize_t |
740 | | STRINGLIB(count_char)(const STRINGLIB_CHAR *s, Py_ssize_t n, |
741 | | const STRINGLIB_CHAR p0, Py_ssize_t maxcount) |
742 | 0 | { |
743 | 0 | Py_ssize_t i, count = 0; |
744 | 0 | for (i = 0; i < n; i++) { |
745 | 0 | if (s[i] == p0) { |
746 | 0 | count++; |
747 | 0 | if (count == maxcount) { |
748 | 0 | return maxcount; |
749 | 0 | } |
750 | 0 | } |
751 | 0 | } |
752 | 0 | return count; |
753 | 0 | } Unexecuted instantiation: bytesobject.c:stringlib_count_char Unexecuted instantiation: unicodeobject.c:asciilib_count_char Unexecuted instantiation: unicodeobject.c:ucs1lib_count_char Unexecuted instantiation: unicodeobject.c:ucs2lib_count_char Unexecuted instantiation: unicodeobject.c:ucs4lib_count_char Unexecuted instantiation: bytes_methods.c:stringlib_count_char Unexecuted instantiation: bytearrayobject.c:stringlib_count_char |
754 | | |
755 | | |
756 | | static inline Py_ssize_t |
757 | | STRINGLIB(count_char_no_maxcount)(const STRINGLIB_CHAR *s, Py_ssize_t n, |
758 | | const STRINGLIB_CHAR p0) |
759 | | /* A specialized function of count_char that does not cut off at a maximum. |
760 | | As a result, the compiler is able to vectorize the loop. */ |
761 | 55.9M | { |
762 | 55.9M | Py_ssize_t count = 0; |
763 | 13.3G | for (Py_ssize_t i = 0; i < n; i++) { |
764 | 13.2G | if (s[i] == p0) { |
765 | 231M | count++; |
766 | 231M | } |
767 | 13.2G | } |
768 | 55.9M | return count; |
769 | 55.9M | } Unexecuted instantiation: bytesobject.c:stringlib_count_char_no_maxcount Unexecuted instantiation: unicodeobject.c:asciilib_count_char_no_maxcount unicodeobject.c:ucs1lib_count_char_no_maxcount Line | Count | Source | 761 | 46.1M | { | 762 | 46.1M | Py_ssize_t count = 0; | 763 | 9.25G | for (Py_ssize_t i = 0; i < n; i++) { | 764 | 9.20G | if (s[i] == p0) { | 765 | 64.2M | count++; | 766 | 64.2M | } | 767 | 9.20G | } | 768 | 46.1M | return count; | 769 | 46.1M | } |
unicodeobject.c:ucs2lib_count_char_no_maxcount Line | Count | Source | 761 | 8.98M | { | 762 | 8.98M | Py_ssize_t count = 0; | 763 | 1.98G | for (Py_ssize_t i = 0; i < n; i++) { | 764 | 1.97G | if (s[i] == p0) { | 765 | 73.5M | count++; | 766 | 73.5M | } | 767 | 1.97G | } | 768 | 8.98M | return count; | 769 | 8.98M | } |
unicodeobject.c:ucs4lib_count_char_no_maxcount Line | Count | Source | 761 | 805k | { | 762 | 805k | Py_ssize_t count = 0; | 763 | 2.07G | for (Py_ssize_t i = 0; i < n; i++) { | 764 | 2.06G | if (s[i] == p0) { | 765 | 94.0M | count++; | 766 | 94.0M | } | 767 | 2.06G | } | 768 | 805k | return count; | 769 | 805k | } |
Unexecuted instantiation: bytes_methods.c:stringlib_count_char_no_maxcount Unexecuted instantiation: bytearrayobject.c:stringlib_count_char_no_maxcount |
770 | | |
771 | | |
772 | | Py_LOCAL_INLINE(Py_ssize_t) |
773 | | FASTSEARCH(const STRINGLIB_CHAR* s, Py_ssize_t n, |
774 | | const STRINGLIB_CHAR* p, Py_ssize_t m, |
775 | | Py_ssize_t maxcount, int mode) |
776 | 214M | { |
777 | 214M | if (n < m || (mode == FAST_COUNT && maxcount == 0)) { |
778 | 12.6k | return -1; |
779 | 12.6k | } |
780 | | |
781 | | /* look for special cases */ |
782 | 214M | if (m <= 1) { |
783 | 199M | if (m <= 0) { |
784 | 0 | return -1; |
785 | 0 | } |
786 | | /* use special case for 1-character strings */ |
787 | 199M | if (mode == FAST_SEARCH) |
788 | 143M | return STRINGLIB(find_char)(s, n, p[0]); |
789 | 55.9M | else if (mode == FAST_RSEARCH) |
790 | 9.41k | return STRINGLIB(rfind_char)(s, n, p[0]); |
791 | 55.9M | else { |
792 | 55.9M | if (maxcount == PY_SSIZE_T_MAX) { |
793 | 55.9M | return STRINGLIB(count_char_no_maxcount)(s, n, p[0]); |
794 | 55.9M | } |
795 | 0 | return STRINGLIB(count_char)(s, n, p[0], maxcount); |
796 | 55.9M | } |
797 | 199M | } |
798 | | |
799 | 14.7M | if (mode != FAST_RSEARCH) { |
800 | 14.7M | if (n < 2500 || (m < 100 && n < 30000) || m < 6) { |
801 | 14.7M | return STRINGLIB(default_find)(s, n, p, m, maxcount, mode); |
802 | 14.7M | } |
803 | 21 | else if ((m >> 2) * 3 < (n >> 2)) { |
804 | | /* 33% threshold, but don't overflow. */ |
805 | | /* For larger problems where the needle isn't a huge |
806 | | percentage of the size of the haystack, the relatively |
807 | | expensive O(m) startup cost of the two-way algorithm |
808 | | will surely pay off. */ |
809 | 21 | if (mode == FAST_SEARCH) { |
810 | 21 | return STRINGLIB(_two_way_find)(s, n, p, m); |
811 | 21 | } |
812 | 0 | else { |
813 | 0 | return STRINGLIB(_two_way_count)(s, n, p, m, maxcount); |
814 | 0 | } |
815 | 21 | } |
816 | 0 | else { |
817 | | /* To ensure that we have good worst-case behavior, |
818 | | here's an adaptive version of the algorithm, where if |
819 | | we match O(m) characters without any matches of the |
820 | | entire needle, then we predict that the startup cost of |
821 | | the two-way algorithm will probably be worth it. */ |
822 | 0 | return STRINGLIB(adaptive_find)(s, n, p, m, maxcount, mode); |
823 | 0 | } |
824 | 14.7M | } |
825 | 4 | else { |
826 | | /* FAST_RSEARCH */ |
827 | 4 | return STRINGLIB(default_rfind)(s, n, p, m, maxcount, mode); |
828 | 4 | } |
829 | 14.7M | } Unexecuted instantiation: bytesobject.c:fastsearch unicodeobject.c:asciilib_fastsearch Line | Count | Source | 776 | 20.2M | { | 777 | 20.2M | if (n < m || (mode == FAST_COUNT && maxcount == 0)) { | 778 | 0 | return -1; | 779 | 0 | } | 780 | | | 781 | | /* look for special cases */ | 782 | 20.2M | if (m <= 1) { | 783 | 18.6M | if (m <= 0) { | 784 | 0 | return -1; | 785 | 0 | } | 786 | | /* use special case for 1-character strings */ | 787 | 18.6M | if (mode == FAST_SEARCH) | 788 | 18.6M | return STRINGLIB(find_char)(s, n, p[0]); | 789 | 9.41k | else if (mode == FAST_RSEARCH) | 790 | 9.41k | return STRINGLIB(rfind_char)(s, n, p[0]); | 791 | 0 | else { | 792 | 0 | if (maxcount == PY_SSIZE_T_MAX) { | 793 | 0 | return STRINGLIB(count_char_no_maxcount)(s, n, p[0]); | 794 | 0 | } | 795 | 0 | return STRINGLIB(count_char)(s, n, p[0], maxcount); | 796 | 0 | } | 797 | 18.6M | } | 798 | | | 799 | 1.59M | if (mode != FAST_RSEARCH) { | 800 | 1.59M | if (n < 2500 || (m < 100 && n < 30000) || m < 6) { | 801 | 1.59M | return STRINGLIB(default_find)(s, n, p, m, maxcount, mode); | 802 | 1.59M | } | 803 | 0 | else if ((m >> 2) * 3 < (n >> 2)) { | 804 | | /* 33% threshold, but don't overflow. */ | 805 | | /* For larger problems where the needle isn't a huge | 806 | | percentage of the size of the haystack, the relatively | 807 | | expensive O(m) startup cost of the two-way algorithm | 808 | | will surely pay off. */ | 809 | 0 | if (mode == FAST_SEARCH) { | 810 | 0 | return STRINGLIB(_two_way_find)(s, n, p, m); | 811 | 0 | } | 812 | 0 | else { | 813 | 0 | return STRINGLIB(_two_way_count)(s, n, p, m, maxcount); | 814 | 0 | } | 815 | 0 | } | 816 | 0 | else { | 817 | | /* To ensure that we have good worst-case behavior, | 818 | | here's an adaptive version of the algorithm, where if | 819 | | we match O(m) characters without any matches of the | 820 | | entire needle, then we predict that the startup cost of | 821 | | the two-way algorithm will probably be worth it. */ | 822 | 0 | return STRINGLIB(adaptive_find)(s, n, p, m, maxcount, mode); | 823 | 0 | } | 824 | 1.59M | } | 825 | 0 | else { | 826 | | /* FAST_RSEARCH */ | 827 | 0 | return STRINGLIB(default_rfind)(s, n, p, m, maxcount, mode); | 828 | 0 | } | 829 | 1.59M | } |
unicodeobject.c:ucs1lib_fastsearch Line | Count | Source | 776 | 58.8M | { | 777 | 58.8M | if (n < m || (mode == FAST_COUNT && maxcount == 0)) { | 778 | 0 | return -1; | 779 | 0 | } | 780 | | | 781 | | /* look for special cases */ | 782 | 58.8M | if (m <= 1) { | 783 | 52.2M | if (m <= 0) { | 784 | 0 | return -1; | 785 | 0 | } | 786 | | /* use special case for 1-character strings */ | 787 | 52.2M | if (mode == FAST_SEARCH) | 788 | 6.05M | return STRINGLIB(find_char)(s, n, p[0]); | 789 | 46.1M | else if (mode == FAST_RSEARCH) | 790 | 0 | return STRINGLIB(rfind_char)(s, n, p[0]); | 791 | 46.1M | else { | 792 | 46.1M | if (maxcount == PY_SSIZE_T_MAX) { | 793 | 46.1M | return STRINGLIB(count_char_no_maxcount)(s, n, p[0]); | 794 | 46.1M | } | 795 | 0 | return STRINGLIB(count_char)(s, n, p[0], maxcount); | 796 | 46.1M | } | 797 | 52.2M | } | 798 | | | 799 | 6.57M | if (mode != FAST_RSEARCH) { | 800 | 6.57M | if (n < 2500 || (m < 100 && n < 30000) || m < 6) { | 801 | 6.57M | return STRINGLIB(default_find)(s, n, p, m, maxcount, mode); | 802 | 6.57M | } | 803 | 21 | else if ((m >> 2) * 3 < (n >> 2)) { | 804 | | /* 33% threshold, but don't overflow. */ | 805 | | /* For larger problems where the needle isn't a huge | 806 | | percentage of the size of the haystack, the relatively | 807 | | expensive O(m) startup cost of the two-way algorithm | 808 | | will surely pay off. */ | 809 | 21 | if (mode == FAST_SEARCH) { | 810 | 21 | return STRINGLIB(_two_way_find)(s, n, p, m); | 811 | 21 | } | 812 | 0 | else { | 813 | 0 | return STRINGLIB(_two_way_count)(s, n, p, m, maxcount); | 814 | 0 | } | 815 | 21 | } | 816 | 0 | else { | 817 | | /* To ensure that we have good worst-case behavior, | 818 | | here's an adaptive version of the algorithm, where if | 819 | | we match O(m) characters without any matches of the | 820 | | entire needle, then we predict that the startup cost of | 821 | | the two-way algorithm will probably be worth it. */ | 822 | 0 | return STRINGLIB(adaptive_find)(s, n, p, m, maxcount, mode); | 823 | 0 | } | 824 | 6.57M | } | 825 | 0 | else { | 826 | | /* FAST_RSEARCH */ | 827 | 0 | return STRINGLIB(default_rfind)(s, n, p, m, maxcount, mode); | 828 | 0 | } | 829 | 6.57M | } |
unicodeobject.c:ucs2lib_fastsearch Line | Count | Source | 776 | 70.4M | { | 777 | 70.4M | if (n < m || (mode == FAST_COUNT && maxcount == 0)) { | 778 | 12.6k | return -1; | 779 | 12.6k | } | 780 | | | 781 | | /* look for special cases */ | 782 | 70.4M | if (m <= 1) { | 783 | 67.5M | if (m <= 0) { | 784 | 0 | return -1; | 785 | 0 | } | 786 | | /* use special case for 1-character strings */ | 787 | 67.5M | if (mode == FAST_SEARCH) | 788 | 58.5M | return STRINGLIB(find_char)(s, n, p[0]); | 789 | 8.98M | else if (mode == FAST_RSEARCH) | 790 | 0 | return STRINGLIB(rfind_char)(s, n, p[0]); | 791 | 8.98M | else { | 792 | 8.98M | if (maxcount == PY_SSIZE_T_MAX) { | 793 | 8.98M | return STRINGLIB(count_char_no_maxcount)(s, n, p[0]); | 794 | 8.98M | } | 795 | 0 | return STRINGLIB(count_char)(s, n, p[0], maxcount); | 796 | 8.98M | } | 797 | 67.5M | } | 798 | | | 799 | 2.94M | if (mode != FAST_RSEARCH) { | 800 | 2.94M | if (n < 2500 || (m < 100 && n < 30000) || m < 6) { | 801 | 2.94M | return STRINGLIB(default_find)(s, n, p, m, maxcount, mode); | 802 | 2.94M | } | 803 | 0 | else if ((m >> 2) * 3 < (n >> 2)) { | 804 | | /* 33% threshold, but don't overflow. */ | 805 | | /* For larger problems where the needle isn't a huge | 806 | | percentage of the size of the haystack, the relatively | 807 | | expensive O(m) startup cost of the two-way algorithm | 808 | | will surely pay off. */ | 809 | 0 | if (mode == FAST_SEARCH) { | 810 | 0 | return STRINGLIB(_two_way_find)(s, n, p, m); | 811 | 0 | } | 812 | 0 | else { | 813 | 0 | return STRINGLIB(_two_way_count)(s, n, p, m, maxcount); | 814 | 0 | } | 815 | 0 | } | 816 | 0 | else { | 817 | | /* To ensure that we have good worst-case behavior, | 818 | | here's an adaptive version of the algorithm, where if | 819 | | we match O(m) characters without any matches of the | 820 | | entire needle, then we predict that the startup cost of | 821 | | the two-way algorithm will probably be worth it. */ | 822 | 0 | return STRINGLIB(adaptive_find)(s, n, p, m, maxcount, mode); | 823 | 0 | } | 824 | 2.94M | } | 825 | 0 | else { | 826 | | /* FAST_RSEARCH */ | 827 | 0 | return STRINGLIB(default_rfind)(s, n, p, m, maxcount, mode); | 828 | 0 | } | 829 | 2.94M | } |
unicodeobject.c:ucs4lib_fastsearch Line | Count | Source | 776 | 64.7M | { | 777 | 64.7M | if (n < m || (mode == FAST_COUNT && maxcount == 0)) { | 778 | 0 | return -1; | 779 | 0 | } | 780 | | | 781 | | /* look for special cases */ | 782 | 64.7M | if (m <= 1) { | 783 | 61.0M | if (m <= 0) { | 784 | 0 | return -1; | 785 | 0 | } | 786 | | /* use special case for 1-character strings */ | 787 | 61.0M | if (mode == FAST_SEARCH) | 788 | 60.2M | return STRINGLIB(find_char)(s, n, p[0]); | 789 | 805k | else if (mode == FAST_RSEARCH) | 790 | 0 | return STRINGLIB(rfind_char)(s, n, p[0]); | 791 | 805k | else { | 792 | 805k | if (maxcount == PY_SSIZE_T_MAX) { | 793 | 805k | return STRINGLIB(count_char_no_maxcount)(s, n, p[0]); | 794 | 805k | } | 795 | 0 | return STRINGLIB(count_char)(s, n, p[0], maxcount); | 796 | 805k | } | 797 | 61.0M | } | 798 | | | 799 | 3.66M | if (mode != FAST_RSEARCH) { | 800 | 3.66M | if (n < 2500 || (m < 100 && n < 30000) || m < 6) { | 801 | 3.66M | return STRINGLIB(default_find)(s, n, p, m, maxcount, mode); | 802 | 3.66M | } | 803 | 0 | else if ((m >> 2) * 3 < (n >> 2)) { | 804 | | /* 33% threshold, but don't overflow. */ | 805 | | /* For larger problems where the needle isn't a huge | 806 | | percentage of the size of the haystack, the relatively | 807 | | expensive O(m) startup cost of the two-way algorithm | 808 | | will surely pay off. */ | 809 | 0 | if (mode == FAST_SEARCH) { | 810 | 0 | return STRINGLIB(_two_way_find)(s, n, p, m); | 811 | 0 | } | 812 | 0 | else { | 813 | 0 | return STRINGLIB(_two_way_count)(s, n, p, m, maxcount); | 814 | 0 | } | 815 | 0 | } | 816 | 0 | else { | 817 | | /* To ensure that we have good worst-case behavior, | 818 | | here's an adaptive version of the algorithm, where if | 819 | | we match O(m) characters without any matches of the | 820 | | entire needle, then we predict that the startup cost of | 821 | | the two-way algorithm will probably be worth it. */ | 822 | 0 | return STRINGLIB(adaptive_find)(s, n, p, m, maxcount, mode); | 823 | 0 | } | 824 | 3.66M | } | 825 | 0 | else { | 826 | | /* FAST_RSEARCH */ | 827 | 0 | return STRINGLIB(default_rfind)(s, n, p, m, maxcount, mode); | 828 | 0 | } | 829 | 3.66M | } |
bytes_methods.c:fastsearch Line | Count | Source | 776 | 3.03k | { | 777 | 3.03k | if (n < m || (mode == FAST_COUNT && maxcount == 0)) { | 778 | 8 | return -1; | 779 | 8 | } | 780 | | | 781 | | /* look for special cases */ | 782 | 3.02k | if (m <= 1) { | 783 | 0 | if (m <= 0) { | 784 | 0 | return -1; | 785 | 0 | } | 786 | | /* use special case for 1-character strings */ | 787 | 0 | if (mode == FAST_SEARCH) | 788 | 0 | return STRINGLIB(find_char)(s, n, p[0]); | 789 | 0 | else if (mode == FAST_RSEARCH) | 790 | 0 | return STRINGLIB(rfind_char)(s, n, p[0]); | 791 | 0 | else { | 792 | 0 | if (maxcount == PY_SSIZE_T_MAX) { | 793 | 0 | return STRINGLIB(count_char_no_maxcount)(s, n, p[0]); | 794 | 0 | } | 795 | 0 | return STRINGLIB(count_char)(s, n, p[0], maxcount); | 796 | 0 | } | 797 | 0 | } | 798 | | | 799 | 3.02k | if (mode != FAST_RSEARCH) { | 800 | 3.01k | if (n < 2500 || (m < 100 && n < 30000) || m < 6) { | 801 | 3.01k | return STRINGLIB(default_find)(s, n, p, m, maxcount, mode); | 802 | 3.01k | } | 803 | 0 | else if ((m >> 2) * 3 < (n >> 2)) { | 804 | | /* 33% threshold, but don't overflow. */ | 805 | | /* For larger problems where the needle isn't a huge | 806 | | percentage of the size of the haystack, the relatively | 807 | | expensive O(m) startup cost of the two-way algorithm | 808 | | will surely pay off. */ | 809 | 0 | if (mode == FAST_SEARCH) { | 810 | 0 | return STRINGLIB(_two_way_find)(s, n, p, m); | 811 | 0 | } | 812 | 0 | else { | 813 | 0 | return STRINGLIB(_two_way_count)(s, n, p, m, maxcount); | 814 | 0 | } | 815 | 0 | } | 816 | 0 | else { | 817 | | /* To ensure that we have good worst-case behavior, | 818 | | here's an adaptive version of the algorithm, where if | 819 | | we match O(m) characters without any matches of the | 820 | | entire needle, then we predict that the startup cost of | 821 | | the two-way algorithm will probably be worth it. */ | 822 | 0 | return STRINGLIB(adaptive_find)(s, n, p, m, maxcount, mode); | 823 | 0 | } | 824 | 3.01k | } | 825 | 4 | else { | 826 | | /* FAST_RSEARCH */ | 827 | 4 | return STRINGLIB(default_rfind)(s, n, p, m, maxcount, mode); | 828 | 4 | } | 829 | 3.02k | } |
Unexecuted instantiation: bytearrayobject.c:fastsearch |
830 | | |