/src/cpython/Objects/stringlib/fastsearch.h
Line | Count | Source (jump to first uncovered line) |
1 | | /* stringlib: fastsearch implementation */ |
2 | | |
3 | | #define STRINGLIB_FASTSEARCH_H |
4 | | |
5 | | /* fast search/count implementation, based on a mix between boyer- |
6 | | moore and horspool, with a few more bells and whistles on the top. |
7 | | for some more background, see: |
8 | | https://web.archive.org/web/20201107074620/http://effbot.org/zone/stringlib.htm */ |
9 | | |
10 | | /* note: fastsearch may access s[n], which isn't a problem when using |
11 | | Python's ordinary string types, but may cause problems if you're |
12 | | using this code in other contexts. also, the count mode returns -1 |
13 | | if there cannot possibly be a match in the target string, and 0 if |
14 | | it has actually checked for matches, but didn't find any. callers |
15 | | beware! */ |
16 | | |
17 | | /* If the strings are long enough, use Crochemore and Perrin's Two-Way |
18 | | algorithm, which has worst-case O(n) runtime and best-case O(n/k). |
19 | | Also compute a table of shifts to achieve O(n/k) in more cases, |
20 | | and often (data dependent) deduce larger shifts than pure C&P can |
21 | | deduce. See stringlib_find_two_way_notes.txt in this folder for a |
22 | | detailed explanation. */ |
23 | | |
24 | 501M | #define FAST_COUNT 0 |
25 | 346M | #define FAST_SEARCH 1 |
26 | 68.4M | #define FAST_RSEARCH 2 |
27 | | |
28 | | #if LONG_BIT >= 128 |
29 | | #define STRINGLIB_BLOOM_WIDTH 128 |
30 | | #elif LONG_BIT >= 64 |
31 | 4.93G | #define STRINGLIB_BLOOM_WIDTH 64 |
32 | | #elif LONG_BIT >= 32 |
33 | | #define STRINGLIB_BLOOM_WIDTH 32 |
34 | | #else |
35 | | #error "LONG_BIT is smaller than 32" |
36 | | #endif |
37 | | |
38 | | #define STRINGLIB_BLOOM_ADD(mask, ch) \ |
39 | 28.0M | ((mask |= (1UL << ((ch) & (STRINGLIB_BLOOM_WIDTH -1))))) |
40 | | #define STRINGLIB_BLOOM(mask, ch) \ |
41 | 4.90G | ((mask & (1UL << ((ch) & (STRINGLIB_BLOOM_WIDTH -1))))) |
42 | | |
43 | | #ifdef STRINGLIB_FAST_MEMCHR |
44 | 189M | # define MEMCHR_CUT_OFF 15 |
45 | | #else |
46 | 54.1M | # define MEMCHR_CUT_OFF 40 |
47 | | #endif |
48 | | |
49 | | Py_LOCAL_INLINE(Py_ssize_t) |
50 | | STRINGLIB(find_char)(const STRINGLIB_CHAR* s, Py_ssize_t n, STRINGLIB_CHAR ch) |
51 | 243M | { |
52 | 243M | const STRINGLIB_CHAR *p, *e; |
53 | | |
54 | 243M | p = s; |
55 | 243M | e = s + n; |
56 | 243M | if (n > MEMCHR_CUT_OFF) { |
57 | | #ifdef STRINGLIB_FAST_MEMCHR |
58 | 98.5M | p = STRINGLIB_FAST_MEMCHR(s, ch, n); |
59 | 98.5M | if (p != NULL) |
60 | 97.1M | return (p - s); |
61 | 1.37M | return -1; |
62 | | #else |
63 | | /* use memchr if we can choose a needle without too many likely |
64 | | false positives */ |
65 | | const STRINGLIB_CHAR *s1, *e1; |
66 | | unsigned char needle = ch & 0xff; |
67 | | /* If looking for a multiple of 256, we'd have too |
68 | | many false positives looking for the '\0' byte in UCS2 |
69 | | and UCS4 representations. */ |
70 | 46.8M | if (needle != 0) { |
71 | 46.5M | do { |
72 | 46.5M | void *candidate = memchr(p, needle, |
73 | 46.5M | (e - p) * sizeof(STRINGLIB_CHAR)); |
74 | 46.5M | if (candidate == NULL) |
75 | 368k | return -1; |
76 | 46.1M | s1 = p; |
77 | 46.1M | p = (const STRINGLIB_CHAR *) |
78 | 46.1M | _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR)); |
79 | 46.1M | if (*p == ch) |
80 | 46.1M | return (p - s); |
81 | | /* False positive */ |
82 | 52.9k | p++; |
83 | 52.9k | if (p - s1 > MEMCHR_CUT_OFF) |
84 | 24.1k | continue; |
85 | 28.7k | if (e - p <= MEMCHR_CUT_OFF) |
86 | 3.12k | break; |
87 | 25.6k | e1 = p + MEMCHR_CUT_OFF; |
88 | 866k | while (p != e1) { |
89 | 847k | if (*p == ch) |
90 | 6.65k | return (p - s); |
91 | 840k | p++; |
92 | 840k | } |
93 | 25.6k | } |
94 | 46.5M | while (e - p > MEMCHR_CUT_OFF); |
95 | 46.5M | } |
96 | | #endif |
97 | 145M | } |
98 | 372M | while (p < e) { |
99 | 287M | if (*p == ch) |
100 | 13.3M | return (p - s); |
101 | 273M | p++; |
102 | 273M | } |
103 | 84.8M | return -1; |
104 | 98.2M | } Unexecuted instantiation: bytesobject.c:stringlib_find_char unicodeobject.c:ucs1lib_find_char Line | Count | Source | 51 | 101M | { | 52 | 101M | const STRINGLIB_CHAR *p, *e; | 53 | | | 54 | 101M | p = s; | 55 | 101M | e = s + n; | 56 | 101M | if (n > MEMCHR_CUT_OFF) { | 57 | 14.7M | #ifdef STRINGLIB_FAST_MEMCHR | 58 | 14.7M | p = STRINGLIB_FAST_MEMCHR(s, ch, n); | 59 | 14.7M | if (p != NULL) | 60 | 14.0M | return (p - s); | 61 | 728k | return -1; | 62 | | #else | 63 | | /* use memchr if we can choose a needle without too many likely | 64 | | false positives */ | 65 | | const STRINGLIB_CHAR *s1, *e1; | 66 | | unsigned char needle = ch & 0xff; | 67 | | /* If looking for a multiple of 256, we'd have too | 68 | | many false positives looking for the '\0' byte in UCS2 | 69 | | and UCS4 representations. */ | 70 | | if (needle != 0) { | 71 | | do { | 72 | | void *candidate = memchr(p, needle, | 73 | | (e - p) * sizeof(STRINGLIB_CHAR)); | 74 | | if (candidate == NULL) | 75 | | return -1; | 76 | | s1 = p; | 77 | | p = (const STRINGLIB_CHAR *) | 78 | | _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR)); | 79 | | if (*p == ch) | 80 | | return (p - s); | 81 | | /* False positive */ | 82 | | p++; | 83 | | if (p - s1 > MEMCHR_CUT_OFF) | 84 | | continue; | 85 | | if (e - p <= MEMCHR_CUT_OFF) | 86 | | break; | 87 | | e1 = p + MEMCHR_CUT_OFF; | 88 | | while (p != e1) { | 89 | | if (*p == ch) | 90 | | return (p - s); | 91 | | p++; | 92 | | } | 93 | | } | 94 | | while (e - p > MEMCHR_CUT_OFF); | 95 | | } | 96 | | #endif | 97 | 14.7M | } | 98 | 242M | while (p < e) { | 99 | 159M | if (*p == ch) | 100 | 3.64M | return (p - s); | 101 | 156M | p++; | 102 | 156M | } | 103 | 82.6M | return -1; | 104 | 86.3M | } |
unicodeobject.c:ucs2lib_find_char Line | Count | Source | 51 | 54.0M | { | 52 | 54.0M | const STRINGLIB_CHAR *p, *e; | 53 | | | 54 | 54.0M | p = s; | 55 | 54.0M | e = s + n; | 56 | 54.0M | if (n > MEMCHR_CUT_OFF) { | 57 | | #ifdef STRINGLIB_FAST_MEMCHR | 58 | | p = STRINGLIB_FAST_MEMCHR(s, ch, n); | 59 | | if (p != NULL) | 60 | | return (p - s); | 61 | | return -1; | 62 | | #else | 63 | | /* use memchr if we can choose a needle without too many likely | 64 | | false positives */ | 65 | 46.8M | const STRINGLIB_CHAR *s1, *e1; | 66 | 46.8M | unsigned char needle = ch & 0xff; | 67 | | /* If looking for a multiple of 256, we'd have too | 68 | | many false positives looking for the '\0' byte in UCS2 | 69 | | and UCS4 representations. */ | 70 | 46.8M | if (needle != 0) { | 71 | 46.5M | do { | 72 | 46.5M | void *candidate = memchr(p, needle, | 73 | 46.5M | (e - p) * sizeof(STRINGLIB_CHAR)); | 74 | 46.5M | if (candidate == NULL) | 75 | 368k | return -1; | 76 | 46.1M | s1 = p; | 77 | 46.1M | p = (const STRINGLIB_CHAR *) | 78 | 46.1M | _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR)); | 79 | 46.1M | if (*p == ch) | 80 | 46.1M | return (p - s); | 81 | | /* False positive */ | 82 | 52.9k | p++; | 83 | 52.9k | if (p - s1 > MEMCHR_CUT_OFF) | 84 | 24.1k | continue; | 85 | 28.7k | if (e - p <= MEMCHR_CUT_OFF) | 86 | 3.12k | break; | 87 | 25.6k | e1 = p + MEMCHR_CUT_OFF; | 88 | 866k | while (p != e1) { | 89 | 847k | if (*p == ch) | 90 | 6.65k | return (p - s); | 91 | 840k | p++; | 92 | 840k | } | 93 | 25.6k | } | 94 | 46.5M | while (e - p > MEMCHR_CUT_OFF); | 95 | 46.5M | } | 96 | 46.8M | #endif | 97 | 46.8M | } | 98 | 116M | while (p < e) { | 99 | 114M | if (*p == ch) | 100 | 5.44M | return (p - s); | 101 | 109M | p++; | 102 | 109M | } | 103 | 2.10M | return -1; | 104 | 7.54M | } |
unicodeobject.c:ucs4lib_find_char Line | Count | Source | 51 | 71.4M | { | 52 | 71.4M | const STRINGLIB_CHAR *p, *e; | 53 | | | 54 | 71.4M | p = s; | 55 | 71.4M | e = s + n; | 56 | 71.4M | if (n > MEMCHR_CUT_OFF) { | 57 | 71.3M | #ifdef STRINGLIB_FAST_MEMCHR | 58 | 71.3M | p = STRINGLIB_FAST_MEMCHR(s, ch, n); | 59 | 71.3M | if (p != NULL) | 60 | 71.3M | return (p - s); | 61 | 26.8k | return -1; | 62 | | #else | 63 | | /* use memchr if we can choose a needle without too many likely | 64 | | false positives */ | 65 | | const STRINGLIB_CHAR *s1, *e1; | 66 | | unsigned char needle = ch & 0xff; | 67 | | /* If looking for a multiple of 256, we'd have too | 68 | | many false positives looking for the '\0' byte in UCS2 | 69 | | and UCS4 representations. */ | 70 | | if (needle != 0) { | 71 | | do { | 72 | | void *candidate = memchr(p, needle, | 73 | | (e - p) * sizeof(STRINGLIB_CHAR)); | 74 | | if (candidate == NULL) | 75 | | return -1; | 76 | | s1 = p; | 77 | | p = (const STRINGLIB_CHAR *) | 78 | | _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR)); | 79 | | if (*p == ch) | 80 | | return (p - s); | 81 | | /* False positive */ | 82 | | p++; | 83 | | if (p - s1 > MEMCHR_CUT_OFF) | 84 | | continue; | 85 | | if (e - p <= MEMCHR_CUT_OFF) | 86 | | break; | 87 | | e1 = p + MEMCHR_CUT_OFF; | 88 | | while (p != e1) { | 89 | | if (*p == ch) | 90 | | return (p - s); | 91 | | p++; | 92 | | } | 93 | | } | 94 | | while (e - p > MEMCHR_CUT_OFF); | 95 | | } | 96 | | #endif | 97 | 71.3M | } | 98 | 243k | while (p < e) { | 99 | 213k | if (*p == ch) | 100 | 27.1k | return (p - s); | 101 | 186k | p++; | 102 | 186k | } | 103 | 30.8k | return -1; | 104 | 57.9k | } |
unicodeobject.c:asciilib_find_char Line | Count | Source | 51 | 16.6M | { | 52 | 16.6M | const STRINGLIB_CHAR *p, *e; | 53 | | | 54 | 16.6M | p = s; | 55 | 16.6M | e = s + n; | 56 | 16.6M | if (n > MEMCHR_CUT_OFF) { | 57 | 12.3M | #ifdef STRINGLIB_FAST_MEMCHR | 58 | 12.3M | p = STRINGLIB_FAST_MEMCHR(s, ch, n); | 59 | 12.3M | if (p != NULL) | 60 | 11.7M | return (p - s); | 61 | 616k | return -1; | 62 | | #else | 63 | | /* use memchr if we can choose a needle without too many likely | 64 | | false positives */ | 65 | | const STRINGLIB_CHAR *s1, *e1; | 66 | | unsigned char needle = ch & 0xff; | 67 | | /* If looking for a multiple of 256, we'd have too | 68 | | many false positives looking for the '\0' byte in UCS2 | 69 | | and UCS4 representations. */ | 70 | | if (needle != 0) { | 71 | | do { | 72 | | void *candidate = memchr(p, needle, | 73 | | (e - p) * sizeof(STRINGLIB_CHAR)); | 74 | | if (candidate == NULL) | 75 | | return -1; | 76 | | s1 = p; | 77 | | p = (const STRINGLIB_CHAR *) | 78 | | _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR)); | 79 | | if (*p == ch) | 80 | | return (p - s); | 81 | | /* False positive */ | 82 | | p++; | 83 | | if (p - s1 > MEMCHR_CUT_OFF) | 84 | | continue; | 85 | | if (e - p <= MEMCHR_CUT_OFF) | 86 | | break; | 87 | | e1 = p + MEMCHR_CUT_OFF; | 88 | | while (p != e1) { | 89 | | if (*p == ch) | 90 | | return (p - s); | 91 | | p++; | 92 | | } | 93 | | } | 94 | | while (e - p > MEMCHR_CUT_OFF); | 95 | | } | 96 | | #endif | 97 | 12.3M | } | 98 | 12.7M | while (p < e) { | 99 | 12.6M | if (*p == ch) | 100 | 4.25M | return (p - s); | 101 | 8.44M | p++; | 102 | 8.44M | } | 103 | 58.2k | return -1; | 104 | 4.31M | } |
bytes_methods.c:stringlib_find_char Line | Count | Source | 51 | 1.59k | { | 52 | 1.59k | const STRINGLIB_CHAR *p, *e; | 53 | | | 54 | 1.59k | p = s; | 55 | 1.59k | e = s + n; | 56 | 1.59k | if (n > MEMCHR_CUT_OFF) { | 57 | 1.59k | #ifdef STRINGLIB_FAST_MEMCHR | 58 | 1.59k | p = STRINGLIB_FAST_MEMCHR(s, ch, n); | 59 | 1.59k | if (p != NULL) | 60 | 1.35k | return (p - s); | 61 | 242 | return -1; | 62 | | #else | 63 | | /* use memchr if we can choose a needle without too many likely | 64 | | false positives */ | 65 | | const STRINGLIB_CHAR *s1, *e1; | 66 | | unsigned char needle = ch & 0xff; | 67 | | /* If looking for a multiple of 256, we'd have too | 68 | | many false positives looking for the '\0' byte in UCS2 | 69 | | and UCS4 representations. */ | 70 | | if (needle != 0) { | 71 | | do { | 72 | | void *candidate = memchr(p, needle, | 73 | | (e - p) * sizeof(STRINGLIB_CHAR)); | 74 | | if (candidate == NULL) | 75 | | return -1; | 76 | | s1 = p; | 77 | | p = (const STRINGLIB_CHAR *) | 78 | | _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR)); | 79 | | if (*p == ch) | 80 | | return (p - s); | 81 | | /* False positive */ | 82 | | p++; | 83 | | if (p - s1 > MEMCHR_CUT_OFF) | 84 | | continue; | 85 | | if (e - p <= MEMCHR_CUT_OFF) | 86 | | break; | 87 | | e1 = p + MEMCHR_CUT_OFF; | 88 | | while (p != e1) { | 89 | | if (*p == ch) | 90 | | return (p - s); | 91 | | p++; | 92 | | } | 93 | | } | 94 | | while (e - p > MEMCHR_CUT_OFF); | 95 | | } | 96 | | #endif | 97 | 1.59k | } | 98 | 0 | while (p < e) { | 99 | 0 | if (*p == ch) | 100 | 0 | return (p - s); | 101 | 0 | p++; | 102 | 0 | } | 103 | 0 | return -1; | 104 | 0 | } |
Unexecuted instantiation: bytearrayobject.c:stringlib_find_char |
105 | | |
106 | | #undef MEMCHR_CUT_OFF |
107 | | |
108 | | #if STRINGLIB_SIZEOF_CHAR == 1 |
109 | 35.3k | # define MEMRCHR_CUT_OFF 15 |
110 | | #else |
111 | 224k | # define MEMRCHR_CUT_OFF 40 |
112 | | #endif |
113 | | |
114 | | |
115 | | Py_LOCAL_INLINE(Py_ssize_t) |
116 | | STRINGLIB(rfind_char)(const STRINGLIB_CHAR* s, Py_ssize_t n, STRINGLIB_CHAR ch) |
117 | 221k | { |
118 | 221k | const STRINGLIB_CHAR *p; |
119 | 221k | #ifdef HAVE_MEMRCHR |
120 | | /* memrchr() is a GNU extension, available since glibc 2.1.91. it |
121 | | doesn't seem as optimized as memchr(), but is still quite |
122 | | faster than our hand-written loop below. There is no wmemrchr |
123 | | for 4-byte chars. */ |
124 | | |
125 | 221k | if (n > MEMRCHR_CUT_OFF) { |
126 | | #if STRINGLIB_SIZEOF_CHAR == 1 |
127 | | p = memrchr(s, ch, n); |
128 | 9.60k | if (p != NULL) |
129 | 6.81k | return (p - s); |
130 | 2.78k | return -1; |
131 | | #else |
132 | | /* use memrchr if we can choose a needle without too many likely |
133 | | false positives */ |
134 | | const STRINGLIB_CHAR *s1; |
135 | | Py_ssize_t n1; |
136 | | unsigned char needle = ch & 0xff; |
137 | | /* If looking for a multiple of 256, we'd have too |
138 | | many false positives looking for the '\0' byte in UCS2 |
139 | | and UCS4 representations. */ |
140 | 94.6k | if (needle != 0) { |
141 | 102k | do { |
142 | 102k | void *candidate = memrchr(s, needle, |
143 | 102k | n * sizeof(STRINGLIB_CHAR)); |
144 | 102k | if (candidate == NULL) |
145 | 597 | return -1; |
146 | 102k | n1 = n; |
147 | 102k | p = (const STRINGLIB_CHAR *) |
148 | 102k | _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR)); |
149 | 102k | n = p - s; |
150 | 102k | if (*p == ch) |
151 | 89.1k | return n; |
152 | | /* False positive */ |
153 | 13.1k | if (n1 - n > MEMRCHR_CUT_OFF) |
154 | 4.95k | continue; |
155 | 8.15k | if (n <= MEMRCHR_CUT_OFF) |
156 | 1.26k | break; |
157 | 6.89k | s1 = p - MEMRCHR_CUT_OFF; |
158 | 235k | while (p > s1) { |
159 | 230k | p--; |
160 | 230k | if (*p == ch) |
161 | 1.89k | return (p - s); |
162 | 230k | } |
163 | 5.00k | n = p - s; |
164 | 5.00k | } |
165 | 94.6k | while (n > MEMRCHR_CUT_OFF); |
166 | 94.6k | } |
167 | | #endif |
168 | 104k | } |
169 | 120k | #endif /* HAVE_MEMRCHR */ |
170 | 120k | p = s + n; |
171 | 549k | while (p > s) { |
172 | 532k | p--; |
173 | 532k | if (*p == ch) |
174 | 103k | return (p - s); |
175 | 532k | } |
176 | 16.5k | return -1; |
177 | 120k | } Unexecuted instantiation: bytesobject.c:stringlib_rfind_char unicodeobject.c:ucs1lib_rfind_char Line | Count | Source | 117 | 8.03k | { | 118 | 8.03k | const STRINGLIB_CHAR *p; | 119 | 8.03k | #ifdef HAVE_MEMRCHR | 120 | | /* memrchr() is a GNU extension, available since glibc 2.1.91. it | 121 | | doesn't seem as optimized as memchr(), but is still quite | 122 | | faster than our hand-written loop below. There is no wmemrchr | 123 | | for 4-byte chars. */ | 124 | | | 125 | 8.03k | if (n > MEMRCHR_CUT_OFF) { | 126 | 3.86k | #if STRINGLIB_SIZEOF_CHAR == 1 | 127 | 3.86k | p = memrchr(s, ch, n); | 128 | 3.86k | if (p != NULL) | 129 | 2.93k | return (p - s); | 130 | 927 | return -1; | 131 | | #else | 132 | | /* use memrchr if we can choose a needle without too many likely | 133 | | false positives */ | 134 | | const STRINGLIB_CHAR *s1; | 135 | | Py_ssize_t n1; | 136 | | unsigned char needle = ch & 0xff; | 137 | | /* If looking for a multiple of 256, we'd have too | 138 | | many false positives looking for the '\0' byte in UCS2 | 139 | | and UCS4 representations. */ | 140 | | if (needle != 0) { | 141 | | do { | 142 | | void *candidate = memrchr(s, needle, | 143 | | n * sizeof(STRINGLIB_CHAR)); | 144 | | if (candidate == NULL) | 145 | | return -1; | 146 | | n1 = n; | 147 | | p = (const STRINGLIB_CHAR *) | 148 | | _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR)); | 149 | | n = p - s; | 150 | | if (*p == ch) | 151 | | return n; | 152 | | /* False positive */ | 153 | | if (n1 - n > MEMRCHR_CUT_OFF) | 154 | | continue; | 155 | | if (n <= MEMRCHR_CUT_OFF) | 156 | | break; | 157 | | s1 = p - MEMRCHR_CUT_OFF; | 158 | | while (p > s1) { | 159 | | p--; | 160 | | if (*p == ch) | 161 | | return (p - s); | 162 | | } | 163 | | n = p - s; | 164 | | } | 165 | | while (n > MEMRCHR_CUT_OFF); | 166 | | } | 167 | | #endif | 168 | 3.86k | } | 169 | 4.17k | #endif /* HAVE_MEMRCHR */ | 170 | 4.17k | p = s + n; | 171 | 12.9k | while (p > s) { | 172 | 12.1k | p--; | 173 | 12.1k | if (*p == ch) | 174 | 3.45k | return (p - s); | 175 | 12.1k | } | 176 | 714 | return -1; | 177 | 4.17k | } |
unicodeobject.c:ucs2lib_rfind_char Line | Count | Source | 117 | 35.1k | { | 118 | 35.1k | const STRINGLIB_CHAR *p; | 119 | 35.1k | #ifdef HAVE_MEMRCHR | 120 | | /* memrchr() is a GNU extension, available since glibc 2.1.91. it | 121 | | doesn't seem as optimized as memchr(), but is still quite | 122 | | faster than our hand-written loop below. There is no wmemrchr | 123 | | for 4-byte chars. */ | 124 | | | 125 | 35.1k | if (n > MEMRCHR_CUT_OFF) { | 126 | | #if STRINGLIB_SIZEOF_CHAR == 1 | 127 | | p = memrchr(s, ch, n); | 128 | | if (p != NULL) | 129 | | return (p - s); | 130 | | return -1; | 131 | | #else | 132 | | /* use memrchr if we can choose a needle without too many likely | 133 | | false positives */ | 134 | 14.2k | const STRINGLIB_CHAR *s1; | 135 | 14.2k | Py_ssize_t n1; | 136 | 14.2k | unsigned char needle = ch & 0xff; | 137 | | /* If looking for a multiple of 256, we'd have too | 138 | | many false positives looking for the '\0' byte in UCS2 | 139 | | and UCS4 representations. */ | 140 | 14.2k | if (needle != 0) { | 141 | 15.9k | do { | 142 | 15.9k | void *candidate = memrchr(s, needle, | 143 | 15.9k | n * sizeof(STRINGLIB_CHAR)); | 144 | 15.9k | if (candidate == NULL) | 145 | 331 | return -1; | 146 | 15.6k | n1 = n; | 147 | 15.6k | p = (const STRINGLIB_CHAR *) | 148 | 15.6k | _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR)); | 149 | 15.6k | n = p - s; | 150 | 15.6k | if (*p == ch) | 151 | 12.3k | return n; | 152 | | /* False positive */ | 153 | 3.34k | if (n1 - n > MEMRCHR_CUT_OFF) | 154 | 1.23k | continue; | 155 | 2.10k | if (n <= MEMRCHR_CUT_OFF) | 156 | 419 | break; | 157 | 1.68k | s1 = p - MEMRCHR_CUT_OFF; | 158 | 62.9k | while (p > s1) { | 159 | 61.4k | p--; | 160 | 61.4k | if (*p == ch) | 161 | 220 | return (p - s); | 162 | 61.4k | } | 163 | 1.46k | n = p - s; | 164 | 1.46k | } | 165 | 14.2k | while (n > MEMRCHR_CUT_OFF); | 166 | 14.2k | } | 167 | 14.2k | #endif | 168 | 14.2k | } | 169 | 22.2k | #endif /* HAVE_MEMRCHR */ | 170 | 22.2k | p = s + n; | 171 | 111k | while (p > s) { | 172 | 109k | p--; | 173 | 109k | if (*p == ch) | 174 | 20.6k | return (p - s); | 175 | 109k | } | 176 | 1.58k | return -1; | 177 | 22.2k | } |
unicodeobject.c:ucs4lib_rfind_char Line | Count | Source | 117 | 151k | { | 118 | 151k | const STRINGLIB_CHAR *p; | 119 | 151k | #ifdef HAVE_MEMRCHR | 120 | | /* memrchr() is a GNU extension, available since glibc 2.1.91. it | 121 | | doesn't seem as optimized as memchr(), but is still quite | 122 | | faster than our hand-written loop below. There is no wmemrchr | 123 | | for 4-byte chars. */ | 124 | | | 125 | 151k | if (n > MEMRCHR_CUT_OFF) { | 126 | | #if STRINGLIB_SIZEOF_CHAR == 1 | 127 | | p = memrchr(s, ch, n); | 128 | | if (p != NULL) | 129 | | return (p - s); | 130 | | return -1; | 131 | | #else | 132 | | /* use memrchr if we can choose a needle without too many likely | 133 | | false positives */ | 134 | 80.4k | const STRINGLIB_CHAR *s1; | 135 | 80.4k | Py_ssize_t n1; | 136 | 80.4k | unsigned char needle = ch & 0xff; | 137 | | /* If looking for a multiple of 256, we'd have too | 138 | | many false positives looking for the '\0' byte in UCS2 | 139 | | and UCS4 representations. */ | 140 | 80.4k | if (needle != 0) { | 141 | 86.8k | do { | 142 | 86.8k | void *candidate = memrchr(s, needle, | 143 | 86.8k | n * sizeof(STRINGLIB_CHAR)); | 144 | 86.8k | if (candidate == NULL) | 145 | 266 | return -1; | 146 | 86.6k | n1 = n; | 147 | 86.6k | p = (const STRINGLIB_CHAR *) | 148 | 86.6k | _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR)); | 149 | 86.6k | n = p - s; | 150 | 86.6k | if (*p == ch) | 151 | 76.8k | return n; | 152 | | /* False positive */ | 153 | 9.76k | if (n1 - n > MEMRCHR_CUT_OFF) | 154 | 3.71k | continue; | 155 | 6.05k | if (n <= MEMRCHR_CUT_OFF) | 156 | 844 | break; | 157 | 5.20k | s1 = p - MEMRCHR_CUT_OFF; | 158 | 172k | while (p > s1) { | 159 | 169k | p--; | 160 | 169k | if (*p == ch) | 161 | 1.67k | return (p - s); | 162 | 169k | } | 163 | 3.53k | n = p - s; | 164 | 3.53k | } | 165 | 80.4k | while (n > MEMRCHR_CUT_OFF); | 166 | 80.4k | } | 167 | 80.4k | #endif | 168 | 80.4k | } | 169 | 72.4k | #endif /* HAVE_MEMRCHR */ | 170 | 72.4k | p = s + n; | 171 | 309k | while (p > s) { | 172 | 307k | p--; | 173 | 307k | if (*p == ch) | 174 | 71.2k | return (p - s); | 175 | 307k | } | 176 | 1.22k | return -1; | 177 | 72.4k | } |
unicodeobject.c:asciilib_rfind_char Line | Count | Source | 117 | 9.02k | { | 118 | 9.02k | const STRINGLIB_CHAR *p; | 119 | 9.02k | #ifdef HAVE_MEMRCHR | 120 | | /* memrchr() is a GNU extension, available since glibc 2.1.91. it | 121 | | doesn't seem as optimized as memchr(), but is still quite | 122 | | faster than our hand-written loop below. There is no wmemrchr | 123 | | for 4-byte chars. */ | 124 | | | 125 | 9.02k | if (n > MEMRCHR_CUT_OFF) { | 126 | 3.13k | #if STRINGLIB_SIZEOF_CHAR == 1 | 127 | 3.13k | p = memrchr(s, ch, n); | 128 | 3.13k | if (p != NULL) | 129 | 3.05k | return (p - s); | 130 | 80 | return -1; | 131 | | #else | 132 | | /* use memrchr if we can choose a needle without too many likely | 133 | | false positives */ | 134 | | const STRINGLIB_CHAR *s1; | 135 | | Py_ssize_t n1; | 136 | | unsigned char needle = ch & 0xff; | 137 | | /* If looking for a multiple of 256, we'd have too | 138 | | many false positives looking for the '\0' byte in UCS2 | 139 | | and UCS4 representations. */ | 140 | | if (needle != 0) { | 141 | | do { | 142 | | void *candidate = memrchr(s, needle, | 143 | | n * sizeof(STRINGLIB_CHAR)); | 144 | | if (candidate == NULL) | 145 | | return -1; | 146 | | n1 = n; | 147 | | p = (const STRINGLIB_CHAR *) | 148 | | _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR)); | 149 | | n = p - s; | 150 | | if (*p == ch) | 151 | | return n; | 152 | | /* False positive */ | 153 | | if (n1 - n > MEMRCHR_CUT_OFF) | 154 | | continue; | 155 | | if (n <= MEMRCHR_CUT_OFF) | 156 | | break; | 157 | | s1 = p - MEMRCHR_CUT_OFF; | 158 | | while (p > s1) { | 159 | | p--; | 160 | | if (*p == ch) | 161 | | return (p - s); | 162 | | } | 163 | | n = p - s; | 164 | | } | 165 | | while (n > MEMRCHR_CUT_OFF); | 166 | | } | 167 | | #endif | 168 | 3.13k | } | 169 | 5.89k | #endif /* HAVE_MEMRCHR */ | 170 | 5.89k | p = s + n; | 171 | 33.4k | while (p > s) { | 172 | 31.4k | p--; | 173 | 31.4k | if (*p == ch) | 174 | 3.89k | return (p - s); | 175 | 31.4k | } | 176 | 2.00k | return -1; | 177 | 5.89k | } |
bytes_methods.c:stringlib_rfind_char Line | Count | Source | 117 | 18.2k | { | 118 | 18.2k | const STRINGLIB_CHAR *p; | 119 | 18.2k | #ifdef HAVE_MEMRCHR | 120 | | /* memrchr() is a GNU extension, available since glibc 2.1.91. it | 121 | | doesn't seem as optimized as memchr(), but is still quite | 122 | | faster than our hand-written loop below. There is no wmemrchr | 123 | | for 4-byte chars. */ | 124 | | | 125 | 18.2k | if (n > MEMRCHR_CUT_OFF) { | 126 | 2.60k | #if STRINGLIB_SIZEOF_CHAR == 1 | 127 | 2.60k | p = memrchr(s, ch, n); | 128 | 2.60k | if (p != NULL) | 129 | 828 | return (p - s); | 130 | 1.77k | return -1; | 131 | | #else | 132 | | /* use memrchr if we can choose a needle without too many likely | 133 | | false positives */ | 134 | | const STRINGLIB_CHAR *s1; | 135 | | Py_ssize_t n1; | 136 | | unsigned char needle = ch & 0xff; | 137 | | /* If looking for a multiple of 256, we'd have too | 138 | | many false positives looking for the '\0' byte in UCS2 | 139 | | and UCS4 representations. */ | 140 | | if (needle != 0) { | 141 | | do { | 142 | | void *candidate = memrchr(s, needle, | 143 | | n * sizeof(STRINGLIB_CHAR)); | 144 | | if (candidate == NULL) | 145 | | return -1; | 146 | | n1 = n; | 147 | | p = (const STRINGLIB_CHAR *) | 148 | | _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR)); | 149 | | n = p - s; | 150 | | if (*p == ch) | 151 | | return n; | 152 | | /* False positive */ | 153 | | if (n1 - n > MEMRCHR_CUT_OFF) | 154 | | continue; | 155 | | if (n <= MEMRCHR_CUT_OFF) | 156 | | break; | 157 | | s1 = p - MEMRCHR_CUT_OFF; | 158 | | while (p > s1) { | 159 | | p--; | 160 | | if (*p == ch) | 161 | | return (p - s); | 162 | | } | 163 | | n = p - s; | 164 | | } | 165 | | while (n > MEMRCHR_CUT_OFF); | 166 | | } | 167 | | #endif | 168 | 2.60k | } | 169 | 15.6k | #endif /* HAVE_MEMRCHR */ | 170 | 15.6k | p = s + n; | 171 | 82.1k | while (p > s) { | 172 | 71.1k | p--; | 173 | 71.1k | if (*p == ch) | 174 | 4.64k | return (p - s); | 175 | 71.1k | } | 176 | 11.0k | return -1; | 177 | 15.6k | } |
Unexecuted instantiation: bytearrayobject.c:stringlib_rfind_char |
178 | | |
179 | | #undef MEMRCHR_CUT_OFF |
180 | | |
181 | | /* Change to a 1 to see logging comments walk through the algorithm. */ |
182 | | #if 0 && STRINGLIB_SIZEOF_CHAR == 1 |
183 | | # define LOG(...) printf(__VA_ARGS__) |
184 | | # define LOG_STRING(s, n) printf("\"%.*s\"", (int)(n), s) |
185 | | # define LOG_LINEUP() do { \ |
186 | | LOG("> "); LOG_STRING(haystack, len_haystack); LOG("\n> "); \ |
187 | | LOG("%*s",(int)(window_last - haystack + 1 - len_needle), ""); \ |
188 | | LOG_STRING(needle, len_needle); LOG("\n"); \ |
189 | | } while(0) |
190 | | #else |
191 | | # define LOG(...) |
192 | | # define LOG_STRING(s, n) |
193 | | # define LOG_LINEUP() |
194 | | #endif |
195 | | |
196 | | Py_LOCAL_INLINE(Py_ssize_t) |
197 | | STRINGLIB(_lex_search)(const STRINGLIB_CHAR *needle, Py_ssize_t len_needle, |
198 | | Py_ssize_t *return_period, int invert_alphabet) |
199 | 36 | { |
200 | | /* Do a lexicographic search. Essentially this: |
201 | | >>> max(needle[i:] for i in range(len(needle)+1)) |
202 | | Also find the period of the right half. */ |
203 | 36 | Py_ssize_t max_suffix = 0; |
204 | 36 | Py_ssize_t candidate = 1; |
205 | 36 | Py_ssize_t k = 0; |
206 | | // The period of the right half. |
207 | 36 | Py_ssize_t period = 1; |
208 | | |
209 | 360 | while (candidate + k < len_needle) { |
210 | | // each loop increases candidate + k + max_suffix |
211 | 324 | STRINGLIB_CHAR a = needle[candidate + k]; |
212 | 324 | STRINGLIB_CHAR b = needle[max_suffix + k]; |
213 | | // check if the suffix at candidate is better than max_suffix |
214 | 324 | if (invert_alphabet ? (b < a) : (a < b)) { |
215 | | // Fell short of max_suffix. |
216 | | // The next k + 1 characters are non-increasing |
217 | | // from candidate, so they won't start a maximal suffix. |
218 | 234 | candidate += k + 1; |
219 | 234 | k = 0; |
220 | | // We've ruled out any period smaller than what's |
221 | | // been scanned since max_suffix. |
222 | 234 | period = candidate - max_suffix; |
223 | 234 | } |
224 | 90 | else if (a == b) { |
225 | 18 | if (k + 1 != period) { |
226 | | // Keep scanning the equal strings |
227 | 0 | k++; |
228 | 0 | } |
229 | 18 | else { |
230 | | // Matched a whole period. |
231 | | // Start matching the next period. |
232 | 18 | candidate += period; |
233 | 18 | k = 0; |
234 | 18 | } |
235 | 18 | } |
236 | 72 | else { |
237 | | // Did better than max_suffix, so replace it. |
238 | 72 | max_suffix = candidate; |
239 | 72 | candidate++; |
240 | 72 | k = 0; |
241 | 72 | period = 1; |
242 | 72 | } |
243 | 324 | } |
244 | 36 | *return_period = period; |
245 | 36 | return max_suffix; |
246 | 36 | } Unexecuted instantiation: bytesobject.c:stringlib__lex_search Unexecuted instantiation: unicodeobject.c:asciilib__lex_search unicodeobject.c:ucs1lib__lex_search Line | Count | Source | 199 | 36 | { | 200 | | /* Do a lexicographic search. Essentially this: | 201 | | >>> max(needle[i:] for i in range(len(needle)+1)) | 202 | | Also find the period of the right half. */ | 203 | 36 | Py_ssize_t max_suffix = 0; | 204 | 36 | Py_ssize_t candidate = 1; | 205 | 36 | Py_ssize_t k = 0; | 206 | | // The period of the right half. | 207 | 36 | Py_ssize_t period = 1; | 208 | | | 209 | 360 | while (candidate + k < len_needle) { | 210 | | // each loop increases candidate + k + max_suffix | 211 | 324 | STRINGLIB_CHAR a = needle[candidate + k]; | 212 | 324 | STRINGLIB_CHAR b = needle[max_suffix + k]; | 213 | | // check if the suffix at candidate is better than max_suffix | 214 | 324 | if (invert_alphabet ? (b < a) : (a < b)) { | 215 | | // Fell short of max_suffix. | 216 | | // The next k + 1 characters are non-increasing | 217 | | // from candidate, so they won't start a maximal suffix. | 218 | 234 | candidate += k + 1; | 219 | 234 | k = 0; | 220 | | // We've ruled out any period smaller than what's | 221 | | // been scanned since max_suffix. | 222 | 234 | period = candidate - max_suffix; | 223 | 234 | } | 224 | 90 | else if (a == b) { | 225 | 18 | if (k + 1 != period) { | 226 | | // Keep scanning the equal strings | 227 | 0 | k++; | 228 | 0 | } | 229 | 18 | else { | 230 | | // Matched a whole period. | 231 | | // Start matching the next period. | 232 | 18 | candidate += period; | 233 | 18 | k = 0; | 234 | 18 | } | 235 | 18 | } | 236 | 72 | else { | 237 | | // Did better than max_suffix, so replace it. | 238 | 72 | max_suffix = candidate; | 239 | 72 | candidate++; | 240 | 72 | k = 0; | 241 | 72 | period = 1; | 242 | 72 | } | 243 | 324 | } | 244 | 36 | *return_period = period; | 245 | 36 | return max_suffix; | 246 | 36 | } |
Unexecuted instantiation: unicodeobject.c:ucs2lib__lex_search Unexecuted instantiation: unicodeobject.c:ucs4lib__lex_search Unexecuted instantiation: bytes_methods.c:stringlib__lex_search Unexecuted instantiation: bytearrayobject.c:stringlib__lex_search |
247 | | |
248 | | Py_LOCAL_INLINE(Py_ssize_t) |
249 | | STRINGLIB(_factorize)(const STRINGLIB_CHAR *needle, |
250 | | Py_ssize_t len_needle, |
251 | | Py_ssize_t *return_period) |
252 | 18 | { |
253 | | /* Do a "critical factorization", making it so that: |
254 | | >>> needle = (left := needle[:cut]) + (right := needle[cut:]) |
255 | | where the "local period" of the cut is maximal. |
256 | | |
257 | | The local period of the cut is the minimal length of a string w |
258 | | such that (left endswith w or w endswith left) |
259 | | and (right startswith w or w startswith right). |
260 | | |
261 | | The Critical Factorization Theorem says that this maximal local |
262 | | period is the global period of the string. |
263 | | |
264 | | Crochemore and Perrin (1991) show that this cut can be computed |
265 | | as the later of two cuts: one that gives a lexicographically |
266 | | maximal right half, and one that gives the same with the |
267 | | with respect to a reversed alphabet-ordering. |
268 | | |
269 | | This is what we want to happen: |
270 | | >>> x = "GCAGAGAG" |
271 | | >>> cut, period = factorize(x) |
272 | | >>> x[:cut], (right := x[cut:]) |
273 | | ('GC', 'AGAGAG') |
274 | | >>> period # right half period |
275 | | 2 |
276 | | >>> right[period:] == right[:-period] |
277 | | True |
278 | | |
279 | | This is how the local period lines up in the above example: |
280 | | GC | AGAGAG |
281 | | AGAGAGC = AGAGAGC |
282 | | The length of this minimal repetition is 7, which is indeed the |
283 | | period of the original string. */ |
284 | | |
285 | 18 | Py_ssize_t cut1, period1, cut2, period2, cut, period; |
286 | 18 | cut1 = STRINGLIB(_lex_search)(needle, len_needle, &period1, 0); |
287 | 18 | cut2 = STRINGLIB(_lex_search)(needle, len_needle, &period2, 1); |
288 | | |
289 | | // Take the later cut. |
290 | 18 | if (cut1 > cut2) { |
291 | 18 | period = period1; |
292 | 18 | cut = cut1; |
293 | 18 | } |
294 | 0 | else { |
295 | 0 | period = period2; |
296 | 0 | cut = cut2; |
297 | 0 | } |
298 | | |
299 | 18 | LOG("split: "); LOG_STRING(needle, cut); |
300 | 18 | LOG(" + "); LOG_STRING(needle + cut, len_needle - cut); |
301 | 18 | LOG("\n"); |
302 | | |
303 | 18 | *return_period = period; |
304 | 18 | return cut; |
305 | 18 | } Unexecuted instantiation: bytesobject.c:stringlib__factorize Unexecuted instantiation: unicodeobject.c:asciilib__factorize unicodeobject.c:ucs1lib__factorize Line | Count | Source | 252 | 18 | { | 253 | | /* Do a "critical factorization", making it so that: | 254 | | >>> needle = (left := needle[:cut]) + (right := needle[cut:]) | 255 | | where the "local period" of the cut is maximal. | 256 | | | 257 | | The local period of the cut is the minimal length of a string w | 258 | | such that (left endswith w or w endswith left) | 259 | | and (right startswith w or w startswith right). | 260 | | | 261 | | The Critical Factorization Theorem says that this maximal local | 262 | | period is the global period of the string. | 263 | | | 264 | | Crochemore and Perrin (1991) show that this cut can be computed | 265 | | as the later of two cuts: one that gives a lexicographically | 266 | | maximal right half, and one that gives the same with the | 267 | | with respect to a reversed alphabet-ordering. | 268 | | | 269 | | This is what we want to happen: | 270 | | >>> x = "GCAGAGAG" | 271 | | >>> cut, period = factorize(x) | 272 | | >>> x[:cut], (right := x[cut:]) | 273 | | ('GC', 'AGAGAG') | 274 | | >>> period # right half period | 275 | | 2 | 276 | | >>> right[period:] == right[:-period] | 277 | | True | 278 | | | 279 | | This is how the local period lines up in the above example: | 280 | | GC | AGAGAG | 281 | | AGAGAGC = AGAGAGC | 282 | | The length of this minimal repetition is 7, which is indeed the | 283 | | period of the original string. */ | 284 | | | 285 | 18 | Py_ssize_t cut1, period1, cut2, period2, cut, period; | 286 | 18 | cut1 = STRINGLIB(_lex_search)(needle, len_needle, &period1, 0); | 287 | 18 | cut2 = STRINGLIB(_lex_search)(needle, len_needle, &period2, 1); | 288 | | | 289 | | // Take the later cut. | 290 | 18 | if (cut1 > cut2) { | 291 | 18 | period = period1; | 292 | 18 | cut = cut1; | 293 | 18 | } | 294 | 0 | else { | 295 | 0 | period = period2; | 296 | 0 | cut = cut2; | 297 | 0 | } | 298 | | | 299 | 18 | LOG("split: "); LOG_STRING(needle, cut); | 300 | 18 | LOG(" + "); LOG_STRING(needle + cut, len_needle - cut); | 301 | 18 | LOG("\n"); | 302 | | | 303 | 18 | *return_period = period; | 304 | 18 | return cut; | 305 | 18 | } |
Unexecuted instantiation: unicodeobject.c:ucs2lib__factorize Unexecuted instantiation: unicodeobject.c:ucs4lib__factorize Unexecuted instantiation: bytes_methods.c:stringlib__factorize Unexecuted instantiation: bytearrayobject.c:stringlib__factorize |
306 | | |
307 | | |
308 | 198 | #define SHIFT_TYPE uint8_t |
309 | | #define MAX_SHIFT UINT8_MAX |
310 | | |
311 | 64.4k | #define TABLE_SIZE_BITS 6u |
312 | 64.4k | #define TABLE_SIZE (1U << TABLE_SIZE_BITS) |
313 | 63.2k | #define TABLE_MASK (TABLE_SIZE - 1U) |
314 | | |
315 | | typedef struct STRINGLIB(_pre) { |
316 | | const STRINGLIB_CHAR *needle; |
317 | | Py_ssize_t len_needle; |
318 | | Py_ssize_t cut; |
319 | | Py_ssize_t period; |
320 | | Py_ssize_t gap; |
321 | | int is_periodic; |
322 | | SHIFT_TYPE table[TABLE_SIZE]; |
323 | | } STRINGLIB(prework); |
324 | | |
325 | | |
326 | | static void |
327 | | STRINGLIB(_preprocess)(const STRINGLIB_CHAR *needle, Py_ssize_t len_needle, |
328 | | STRINGLIB(prework) *p) |
329 | 18 | { |
330 | 18 | p->needle = needle; |
331 | 18 | p->len_needle = len_needle; |
332 | 18 | p->cut = STRINGLIB(_factorize)(needle, len_needle, &(p->period)); |
333 | 18 | assert(p->period + p->cut <= len_needle); |
334 | 18 | p->is_periodic = (0 == memcmp(needle, |
335 | 18 | needle + p->period, |
336 | 18 | p->cut * STRINGLIB_SIZEOF_CHAR)); |
337 | 18 | if (p->is_periodic) { |
338 | 0 | assert(p->cut <= len_needle/2); |
339 | 0 | assert(p->cut < p->period); |
340 | 0 | } |
341 | 18 | else { |
342 | | // A lower bound on the period |
343 | 18 | p->period = Py_MAX(p->cut, len_needle - p->cut) + 1; |
344 | 18 | } |
345 | | // The gap between the last character and the previous |
346 | | // occurrence of an equivalent character (modulo TABLE_SIZE) |
347 | 18 | p->gap = len_needle; |
348 | 18 | STRINGLIB_CHAR last = needle[len_needle - 1] & TABLE_MASK; |
349 | 126 | for (Py_ssize_t i = len_needle - 2; i >= 0; i--) { |
350 | 126 | STRINGLIB_CHAR x = needle[i] & TABLE_MASK; |
351 | 126 | if (x == last) { |
352 | 18 | p->gap = len_needle - 1 - i; |
353 | 18 | break; |
354 | 18 | } |
355 | 126 | } |
356 | | // Fill up a compressed Boyer-Moore "Bad Character" table |
357 | 18 | Py_ssize_t not_found_shift = Py_MIN(len_needle, MAX_SHIFT); |
358 | 1.17k | for (Py_ssize_t i = 0; i < (Py_ssize_t)TABLE_SIZE; i++) { |
359 | 1.15k | p->table[i] = Py_SAFE_DOWNCAST(not_found_shift, |
360 | 1.15k | Py_ssize_t, SHIFT_TYPE); |
361 | 1.15k | } |
362 | 198 | for (Py_ssize_t i = len_needle - not_found_shift; i < len_needle; i++) { |
363 | 180 | SHIFT_TYPE shift = Py_SAFE_DOWNCAST(len_needle - 1 - i, |
364 | 180 | Py_ssize_t, SHIFT_TYPE); |
365 | 180 | p->table[needle[i] & TABLE_MASK] = shift; |
366 | 180 | } |
367 | 18 | } Unexecuted instantiation: bytesobject.c:stringlib__preprocess Unexecuted instantiation: unicodeobject.c:asciilib__preprocess unicodeobject.c:ucs1lib__preprocess Line | Count | Source | 329 | 18 | { | 330 | 18 | p->needle = needle; | 331 | 18 | p->len_needle = len_needle; | 332 | 18 | p->cut = STRINGLIB(_factorize)(needle, len_needle, &(p->period)); | 333 | 18 | assert(p->period + p->cut <= len_needle); | 334 | 18 | p->is_periodic = (0 == memcmp(needle, | 335 | 18 | needle + p->period, | 336 | 18 | p->cut * STRINGLIB_SIZEOF_CHAR)); | 337 | 18 | if (p->is_periodic) { | 338 | 0 | assert(p->cut <= len_needle/2); | 339 | 0 | assert(p->cut < p->period); | 340 | 0 | } | 341 | 18 | else { | 342 | | // A lower bound on the period | 343 | 18 | p->period = Py_MAX(p->cut, len_needle - p->cut) + 1; | 344 | 18 | } | 345 | | // The gap between the last character and the previous | 346 | | // occurrence of an equivalent character (modulo TABLE_SIZE) | 347 | 18 | p->gap = len_needle; | 348 | 18 | STRINGLIB_CHAR last = needle[len_needle - 1] & TABLE_MASK; | 349 | 126 | for (Py_ssize_t i = len_needle - 2; i >= 0; i--) { | 350 | 126 | STRINGLIB_CHAR x = needle[i] & TABLE_MASK; | 351 | 126 | if (x == last) { | 352 | 18 | p->gap = len_needle - 1 - i; | 353 | 18 | break; | 354 | 18 | } | 355 | 126 | } | 356 | | // Fill up a compressed Boyer-Moore "Bad Character" table | 357 | 18 | Py_ssize_t not_found_shift = Py_MIN(len_needle, MAX_SHIFT); | 358 | 1.17k | for (Py_ssize_t i = 0; i < (Py_ssize_t)TABLE_SIZE; i++) { | 359 | 1.15k | p->table[i] = Py_SAFE_DOWNCAST(not_found_shift, | 360 | 1.15k | Py_ssize_t, SHIFT_TYPE); | 361 | 1.15k | } | 362 | 198 | for (Py_ssize_t i = len_needle - not_found_shift; i < len_needle; i++) { | 363 | 180 | SHIFT_TYPE shift = Py_SAFE_DOWNCAST(len_needle - 1 - i, | 364 | 180 | Py_ssize_t, SHIFT_TYPE); | 365 | 180 | p->table[needle[i] & TABLE_MASK] = shift; | 366 | 180 | } | 367 | 18 | } |
Unexecuted instantiation: unicodeobject.c:ucs2lib__preprocess Unexecuted instantiation: unicodeobject.c:ucs4lib__preprocess Unexecuted instantiation: bytes_methods.c:stringlib__preprocess Unexecuted instantiation: bytearrayobject.c:stringlib__preprocess |
368 | | |
369 | | static Py_ssize_t |
370 | | STRINGLIB(_two_way)(const STRINGLIB_CHAR *haystack, Py_ssize_t len_haystack, |
371 | | STRINGLIB(prework) *p) |
372 | 18 | { |
373 | | // Crochemore and Perrin's (1991) Two-Way algorithm. |
374 | | // See http://www-igm.univ-mlv.fr/~lecroq/string/node26.html#SECTION00260 |
375 | 18 | const Py_ssize_t len_needle = p->len_needle; |
376 | 18 | const Py_ssize_t cut = p->cut; |
377 | 18 | Py_ssize_t period = p->period; |
378 | 18 | const STRINGLIB_CHAR *const needle = p->needle; |
379 | 18 | const STRINGLIB_CHAR *window_last = haystack + len_needle - 1; |
380 | 18 | const STRINGLIB_CHAR *const haystack_end = haystack + len_haystack; |
381 | 18 | SHIFT_TYPE *table = p->table; |
382 | 18 | const STRINGLIB_CHAR *window; |
383 | 18 | LOG("===== Two-way: \"%s\" in \"%s\". =====\n", needle, haystack); |
384 | | |
385 | 18 | Py_ssize_t gap = p->gap; |
386 | 18 | Py_ssize_t gap_jump_end = Py_MIN(len_needle, cut + gap); |
387 | 18 | if (p->is_periodic) { |
388 | 0 | LOG("Needle is periodic.\n"); |
389 | 0 | Py_ssize_t memory = 0; |
390 | 0 | periodicwindowloop: |
391 | 0 | while (window_last < haystack_end) { |
392 | 0 | assert(memory == 0); |
393 | 0 | for (;;) { |
394 | 0 | LOG_LINEUP(); |
395 | 0 | Py_ssize_t shift = table[(*window_last) & TABLE_MASK]; |
396 | 0 | window_last += shift; |
397 | 0 | if (shift == 0) { |
398 | 0 | break; |
399 | 0 | } |
400 | 0 | if (window_last >= haystack_end) { |
401 | 0 | return -1; |
402 | 0 | } |
403 | 0 | LOG("Horspool skip\n"); |
404 | 0 | } |
405 | 0 | no_shift: |
406 | 0 | window = window_last - len_needle + 1; |
407 | 0 | assert((window[len_needle - 1] & TABLE_MASK) == |
408 | 0 | (needle[len_needle - 1] & TABLE_MASK)); |
409 | 0 | Py_ssize_t i = Py_MAX(cut, memory); |
410 | 0 | for (; i < len_needle; i++) { |
411 | 0 | if (needle[i] != window[i]) { |
412 | 0 | if (i < gap_jump_end) { |
413 | 0 | LOG("Early right half mismatch: jump by gap.\n"); |
414 | 0 | assert(gap >= i - cut + 1); |
415 | 0 | window_last += gap; |
416 | 0 | } |
417 | 0 | else { |
418 | 0 | LOG("Late right half mismatch: jump by n (>gap)\n"); |
419 | 0 | assert(i - cut + 1 > gap); |
420 | 0 | window_last += i - cut + 1; |
421 | 0 | } |
422 | 0 | memory = 0; |
423 | 0 | goto periodicwindowloop; |
424 | 0 | } |
425 | 0 | } |
426 | 0 | for (i = memory; i < cut; i++) { |
427 | 0 | if (needle[i] != window[i]) { |
428 | 0 | LOG("Left half does not match.\n"); |
429 | 0 | window_last += period; |
430 | 0 | memory = len_needle - period; |
431 | 0 | if (window_last >= haystack_end) { |
432 | 0 | return -1; |
433 | 0 | } |
434 | 0 | Py_ssize_t shift = table[(*window_last) & TABLE_MASK]; |
435 | 0 | if (shift) { |
436 | | // A mismatch has been identified to the right |
437 | | // of where i will next start, so we can jump |
438 | | // at least as far as if the mismatch occurred |
439 | | // on the first comparison. |
440 | 0 | Py_ssize_t mem_jump = Py_MAX(cut, memory) - cut + 1; |
441 | 0 | LOG("Skip with Memory.\n"); |
442 | 0 | memory = 0; |
443 | 0 | window_last += Py_MAX(shift, mem_jump); |
444 | 0 | goto periodicwindowloop; |
445 | 0 | } |
446 | 0 | goto no_shift; |
447 | 0 | } |
448 | 0 | } |
449 | 0 | LOG("Found a match!\n"); |
450 | 0 | return window - haystack; |
451 | 0 | } |
452 | 0 | } |
453 | 18 | else { |
454 | 18 | period = Py_MAX(gap, period); |
455 | 18 | LOG("Needle is not periodic.\n"); |
456 | 10.6k | windowloop: |
457 | 10.6k | while (window_last < haystack_end) { |
458 | 62.9k | for (;;) { |
459 | 62.9k | LOG_LINEUP(); |
460 | 62.9k | Py_ssize_t shift = table[(*window_last) & TABLE_MASK]; |
461 | 62.9k | window_last += shift; |
462 | 62.9k | if (shift == 0) { |
463 | 10.6k | break; |
464 | 10.6k | } |
465 | 52.3k | if (window_last >= haystack_end) { |
466 | 15 | return -1; |
467 | 15 | } |
468 | 52.3k | LOG("Horspool skip\n"); |
469 | 52.3k | } |
470 | 10.6k | window = window_last - len_needle + 1; |
471 | 10.6k | assert((window[len_needle - 1] & TABLE_MASK) == |
472 | 10.6k | (needle[len_needle - 1] & TABLE_MASK)); |
473 | 10.6k | Py_ssize_t i = cut; |
474 | 10.8k | for (; i < len_needle; i++) { |
475 | 10.7k | if (needle[i] != window[i]) { |
476 | 10.5k | if (i < gap_jump_end) { |
477 | 10.5k | LOG("Early right half mismatch: jump by gap.\n"); |
478 | 10.5k | assert(gap >= i - cut + 1); |
479 | 10.5k | window_last += gap; |
480 | 10.5k | } |
481 | 0 | else { |
482 | 0 | LOG("Late right half mismatch: jump by n (>gap)\n"); |
483 | 0 | assert(i - cut + 1 > gap); |
484 | 0 | window_last += i - cut + 1; |
485 | 0 | } |
486 | 10.5k | goto windowloop; |
487 | 10.5k | } |
488 | 10.7k | } |
489 | 98 | for (Py_ssize_t i = 0; i < cut; i++) { |
490 | 96 | if (needle[i] != window[i]) { |
491 | 80 | LOG("Left half does not match.\n"); |
492 | 80 | window_last += period; |
493 | 80 | goto windowloop; |
494 | 80 | } |
495 | 96 | } |
496 | 2 | LOG("Found a match!\n"); |
497 | 2 | return window - haystack; |
498 | 82 | } |
499 | 10.6k | } |
500 | 1 | LOG("Not found. Returning -1.\n"); |
501 | 1 | return -1; |
502 | 18 | } Unexecuted instantiation: bytesobject.c:stringlib__two_way Unexecuted instantiation: unicodeobject.c:asciilib__two_way unicodeobject.c:ucs1lib__two_way Line | Count | Source | 372 | 18 | { | 373 | | // Crochemore and Perrin's (1991) Two-Way algorithm. | 374 | | // See http://www-igm.univ-mlv.fr/~lecroq/string/node26.html#SECTION00260 | 375 | 18 | const Py_ssize_t len_needle = p->len_needle; | 376 | 18 | const Py_ssize_t cut = p->cut; | 377 | 18 | Py_ssize_t period = p->period; | 378 | 18 | const STRINGLIB_CHAR *const needle = p->needle; | 379 | 18 | const STRINGLIB_CHAR *window_last = haystack + len_needle - 1; | 380 | 18 | const STRINGLIB_CHAR *const haystack_end = haystack + len_haystack; | 381 | 18 | SHIFT_TYPE *table = p->table; | 382 | 18 | const STRINGLIB_CHAR *window; | 383 | 18 | LOG("===== Two-way: \"%s\" in \"%s\". =====\n", needle, haystack); | 384 | | | 385 | 18 | Py_ssize_t gap = p->gap; | 386 | 18 | Py_ssize_t gap_jump_end = Py_MIN(len_needle, cut + gap); | 387 | 18 | if (p->is_periodic) { | 388 | 0 | LOG("Needle is periodic.\n"); | 389 | 0 | Py_ssize_t memory = 0; | 390 | 0 | periodicwindowloop: | 391 | 0 | while (window_last < haystack_end) { | 392 | 0 | assert(memory == 0); | 393 | 0 | for (;;) { | 394 | 0 | LOG_LINEUP(); | 395 | 0 | Py_ssize_t shift = table[(*window_last) & TABLE_MASK]; | 396 | 0 | window_last += shift; | 397 | 0 | if (shift == 0) { | 398 | 0 | break; | 399 | 0 | } | 400 | 0 | if (window_last >= haystack_end) { | 401 | 0 | return -1; | 402 | 0 | } | 403 | 0 | LOG("Horspool skip\n"); | 404 | 0 | } | 405 | 0 | no_shift: | 406 | 0 | window = window_last - len_needle + 1; | 407 | 0 | assert((window[len_needle - 1] & TABLE_MASK) == | 408 | 0 | (needle[len_needle - 1] & TABLE_MASK)); | 409 | 0 | Py_ssize_t i = Py_MAX(cut, memory); | 410 | 0 | for (; i < len_needle; i++) { | 411 | 0 | if (needle[i] != window[i]) { | 412 | 0 | if (i < gap_jump_end) { | 413 | 0 | LOG("Early right half mismatch: jump by gap.\n"); | 414 | 0 | assert(gap >= i - cut + 1); | 415 | 0 | window_last += gap; | 416 | 0 | } | 417 | 0 | else { | 418 | 0 | LOG("Late right half mismatch: jump by n (>gap)\n"); | 419 | 0 | assert(i - cut + 1 > gap); | 420 | 0 | window_last += i - cut + 1; | 421 | 0 | } | 422 | 0 | memory = 0; | 423 | 0 | goto periodicwindowloop; | 424 | 0 | } | 425 | 0 | } | 426 | 0 | for (i = memory; i < cut; i++) { | 427 | 0 | if (needle[i] != window[i]) { | 428 | 0 | LOG("Left half does not match.\n"); | 429 | 0 | window_last += period; | 430 | 0 | memory = len_needle - period; | 431 | 0 | if (window_last >= haystack_end) { | 432 | 0 | return -1; | 433 | 0 | } | 434 | 0 | Py_ssize_t shift = table[(*window_last) & TABLE_MASK]; | 435 | 0 | if (shift) { | 436 | | // A mismatch has been identified to the right | 437 | | // of where i will next start, so we can jump | 438 | | // at least as far as if the mismatch occurred | 439 | | // on the first comparison. | 440 | 0 | Py_ssize_t mem_jump = Py_MAX(cut, memory) - cut + 1; | 441 | 0 | LOG("Skip with Memory.\n"); | 442 | 0 | memory = 0; | 443 | 0 | window_last += Py_MAX(shift, mem_jump); | 444 | 0 | goto periodicwindowloop; | 445 | 0 | } | 446 | 0 | goto no_shift; | 447 | 0 | } | 448 | 0 | } | 449 | 0 | LOG("Found a match!\n"); | 450 | 0 | return window - haystack; | 451 | 0 | } | 452 | 0 | } | 453 | 18 | else { | 454 | 18 | period = Py_MAX(gap, period); | 455 | 18 | LOG("Needle is not periodic.\n"); | 456 | 10.6k | windowloop: | 457 | 10.6k | while (window_last < haystack_end) { | 458 | 62.9k | for (;;) { | 459 | 62.9k | LOG_LINEUP(); | 460 | 62.9k | Py_ssize_t shift = table[(*window_last) & TABLE_MASK]; | 461 | 62.9k | window_last += shift; | 462 | 62.9k | if (shift == 0) { | 463 | 10.6k | break; | 464 | 10.6k | } | 465 | 52.3k | if (window_last >= haystack_end) { | 466 | 15 | return -1; | 467 | 15 | } | 468 | 52.3k | LOG("Horspool skip\n"); | 469 | 52.3k | } | 470 | 10.6k | window = window_last - len_needle + 1; | 471 | 10.6k | assert((window[len_needle - 1] & TABLE_MASK) == | 472 | 10.6k | (needle[len_needle - 1] & TABLE_MASK)); | 473 | 10.6k | Py_ssize_t i = cut; | 474 | 10.8k | for (; i < len_needle; i++) { | 475 | 10.7k | if (needle[i] != window[i]) { | 476 | 10.5k | if (i < gap_jump_end) { | 477 | 10.5k | LOG("Early right half mismatch: jump by gap.\n"); | 478 | 10.5k | assert(gap >= i - cut + 1); | 479 | 10.5k | window_last += gap; | 480 | 10.5k | } | 481 | 0 | else { | 482 | 0 | LOG("Late right half mismatch: jump by n (>gap)\n"); | 483 | 0 | assert(i - cut + 1 > gap); | 484 | 0 | window_last += i - cut + 1; | 485 | 0 | } | 486 | 10.5k | goto windowloop; | 487 | 10.5k | } | 488 | 10.7k | } | 489 | 98 | for (Py_ssize_t i = 0; i < cut; i++) { | 490 | 96 | if (needle[i] != window[i]) { | 491 | 80 | LOG("Left half does not match.\n"); | 492 | 80 | window_last += period; | 493 | 80 | goto windowloop; | 494 | 80 | } | 495 | 96 | } | 496 | 2 | LOG("Found a match!\n"); | 497 | 2 | return window - haystack; | 498 | 82 | } | 499 | 10.6k | } | 500 | 1 | LOG("Not found. Returning -1.\n"); | 501 | 1 | return -1; | 502 | 18 | } |
Unexecuted instantiation: unicodeobject.c:ucs2lib__two_way Unexecuted instantiation: unicodeobject.c:ucs4lib__two_way Unexecuted instantiation: bytes_methods.c:stringlib__two_way Unexecuted instantiation: bytearrayobject.c:stringlib__two_way |
503 | | |
504 | | |
505 | | static Py_ssize_t |
506 | | STRINGLIB(_two_way_find)(const STRINGLIB_CHAR *haystack, |
507 | | Py_ssize_t len_haystack, |
508 | | const STRINGLIB_CHAR *needle, |
509 | | Py_ssize_t len_needle) |
510 | 18 | { |
511 | 18 | LOG("###### Finding \"%s\" in \"%s\".\n", needle, haystack); |
512 | 18 | STRINGLIB(prework) p; |
513 | 18 | STRINGLIB(_preprocess)(needle, len_needle, &p); |
514 | 18 | return STRINGLIB(_two_way)(haystack, len_haystack, &p); |
515 | 18 | } Unexecuted instantiation: bytesobject.c:stringlib__two_way_find Unexecuted instantiation: unicodeobject.c:asciilib__two_way_find unicodeobject.c:ucs1lib__two_way_find Line | Count | Source | 510 | 18 | { | 511 | 18 | LOG("###### Finding \"%s\" in \"%s\".\n", needle, haystack); | 512 | 18 | STRINGLIB(prework) p; | 513 | 18 | STRINGLIB(_preprocess)(needle, len_needle, &p); | 514 | 18 | return STRINGLIB(_two_way)(haystack, len_haystack, &p); | 515 | 18 | } |
Unexecuted instantiation: unicodeobject.c:ucs2lib__two_way_find Unexecuted instantiation: unicodeobject.c:ucs4lib__two_way_find Unexecuted instantiation: bytes_methods.c:stringlib__two_way_find Unexecuted instantiation: bytearrayobject.c:stringlib__two_way_find |
516 | | |
517 | | |
518 | | static Py_ssize_t |
519 | | STRINGLIB(_two_way_count)(const STRINGLIB_CHAR *haystack, |
520 | | Py_ssize_t len_haystack, |
521 | | const STRINGLIB_CHAR *needle, |
522 | | Py_ssize_t len_needle, |
523 | | Py_ssize_t maxcount) |
524 | 0 | { |
525 | 0 | LOG("###### Counting \"%s\" in \"%s\".\n", needle, haystack); |
526 | 0 | STRINGLIB(prework) p; |
527 | 0 | STRINGLIB(_preprocess)(needle, len_needle, &p); |
528 | 0 | Py_ssize_t index = 0, count = 0; |
529 | 0 | while (1) { |
530 | 0 | Py_ssize_t result; |
531 | 0 | result = STRINGLIB(_two_way)(haystack + index, |
532 | 0 | len_haystack - index, &p); |
533 | 0 | if (result == -1) { |
534 | 0 | return count; |
535 | 0 | } |
536 | 0 | count++; |
537 | 0 | if (count == maxcount) { |
538 | 0 | return maxcount; |
539 | 0 | } |
540 | 0 | index += result + len_needle; |
541 | 0 | } |
542 | 0 | return count; |
543 | 0 | } Unexecuted instantiation: bytesobject.c:stringlib__two_way_count Unexecuted instantiation: unicodeobject.c:asciilib__two_way_count Unexecuted instantiation: unicodeobject.c:ucs1lib__two_way_count Unexecuted instantiation: unicodeobject.c:ucs2lib__two_way_count Unexecuted instantiation: unicodeobject.c:ucs4lib__two_way_count Unexecuted instantiation: bytes_methods.c:stringlib__two_way_count Unexecuted instantiation: bytearrayobject.c:stringlib__two_way_count |
544 | | |
545 | | #undef SHIFT_TYPE |
546 | | #undef NOT_FOUND |
547 | | #undef SHIFT_OVERFLOW |
548 | | #undef TABLE_SIZE_BITS |
549 | | #undef TABLE_SIZE |
550 | | #undef TABLE_MASK |
551 | | |
552 | | #undef LOG |
553 | | #undef LOG_STRING |
554 | | #undef LOG_LINEUP |
555 | | |
556 | | static inline Py_ssize_t |
557 | | STRINGLIB(default_find)(const STRINGLIB_CHAR* s, Py_ssize_t n, |
558 | | const STRINGLIB_CHAR* p, Py_ssize_t m, |
559 | | Py_ssize_t maxcount, int mode) |
560 | 14.0M | { |
561 | 14.0M | const Py_ssize_t w = n - m; |
562 | 14.0M | Py_ssize_t mlast = m - 1, count = 0; |
563 | 14.0M | Py_ssize_t gap = mlast; |
564 | 14.0M | const STRINGLIB_CHAR last = p[mlast]; |
565 | 14.0M | const STRINGLIB_CHAR *const ss = &s[mlast]; |
566 | | |
567 | 14.0M | unsigned long mask = 0; |
568 | 28.0M | for (Py_ssize_t i = 0; i < mlast; i++) { |
569 | 14.0M | STRINGLIB_BLOOM_ADD(mask, p[i]); |
570 | 14.0M | if (p[i] == last) { |
571 | 297k | gap = mlast - i - 1; |
572 | 297k | } |
573 | 14.0M | } |
574 | 14.0M | STRINGLIB_BLOOM_ADD(mask, last); |
575 | | |
576 | 4.93G | for (Py_ssize_t i = 0; i <= w; i++) { |
577 | 4.92G | if (ss[i] == last) { |
578 | | /* candidate match */ |
579 | 61.0M | Py_ssize_t j; |
580 | 81.7M | for (j = 0; j < mlast; j++) { |
581 | 61.1M | if (s[i+j] != p[j]) { |
582 | 40.4M | break; |
583 | 40.4M | } |
584 | 61.1M | } |
585 | 61.0M | if (j == mlast) { |
586 | | /* got a match! */ |
587 | 20.6M | if (mode != FAST_COUNT) { |
588 | 10.3M | return i; |
589 | 10.3M | } |
590 | 10.2M | count++; |
591 | 10.2M | if (count == maxcount) { |
592 | 0 | return maxcount; |
593 | 0 | } |
594 | 10.2M | i = i + mlast; |
595 | 10.2M | continue; |
596 | 10.2M | } |
597 | | /* miss: check if next character is part of pattern */ |
598 | 40.4M | if (!STRINGLIB_BLOOM(mask, ss[i+1])) { |
599 | 7.02M | i = i + m; |
600 | 7.02M | } |
601 | 33.4M | else { |
602 | 33.4M | i = i + gap; |
603 | 33.4M | } |
604 | 40.4M | } |
605 | 4.86G | else { |
606 | | /* skip: check if next character is part of pattern */ |
607 | 4.86G | if (!STRINGLIB_BLOOM(mask, ss[i+1])) { |
608 | 4.81G | i = i + m; |
609 | 4.81G | } |
610 | 4.86G | } |
611 | 4.92G | } |
612 | 3.64M | return mode == FAST_COUNT ? count : -1; |
613 | 14.0M | } Unexecuted instantiation: bytesobject.c:stringlib_default_find unicodeobject.c:asciilib_default_find Line | Count | Source | 560 | 1.62M | { | 561 | 1.62M | const Py_ssize_t w = n - m; | 562 | 1.62M | Py_ssize_t mlast = m - 1, count = 0; | 563 | 1.62M | Py_ssize_t gap = mlast; | 564 | 1.62M | const STRINGLIB_CHAR last = p[mlast]; | 565 | 1.62M | const STRINGLIB_CHAR *const ss = &s[mlast]; | 566 | | | 567 | 1.62M | unsigned long mask = 0; | 568 | 3.25M | for (Py_ssize_t i = 0; i < mlast; i++) { | 569 | 1.62M | STRINGLIB_BLOOM_ADD(mask, p[i]); | 570 | 1.62M | if (p[i] == last) { | 571 | 3.52k | gap = mlast - i - 1; | 572 | 3.52k | } | 573 | 1.62M | } | 574 | 1.62M | STRINGLIB_BLOOM_ADD(mask, last); | 575 | | | 576 | 137M | for (Py_ssize_t i = 0; i <= w; i++) { | 577 | 137M | if (ss[i] == last) { | 578 | | /* candidate match */ | 579 | 3.52M | Py_ssize_t j; | 580 | 5.11M | for (j = 0; j < mlast; j++) { | 581 | 3.52M | if (s[i+j] != p[j]) { | 582 | 1.93M | break; | 583 | 1.93M | } | 584 | 3.52M | } | 585 | 3.52M | if (j == mlast) { | 586 | | /* got a match! */ | 587 | 1.59M | if (mode != FAST_COUNT) { | 588 | 1.59M | return i; | 589 | 1.59M | } | 590 | 0 | count++; | 591 | 0 | if (count == maxcount) { | 592 | 0 | return maxcount; | 593 | 0 | } | 594 | 0 | i = i + mlast; | 595 | 0 | continue; | 596 | 0 | } | 597 | | /* miss: check if next character is part of pattern */ | 598 | 1.93M | if (!STRINGLIB_BLOOM(mask, ss[i+1])) { | 599 | 24.3k | i = i + m; | 600 | 24.3k | } | 601 | 1.90M | else { | 602 | 1.90M | i = i + gap; | 603 | 1.90M | } | 604 | 1.93M | } | 605 | 134M | else { | 606 | | /* skip: check if next character is part of pattern */ | 607 | 134M | if (!STRINGLIB_BLOOM(mask, ss[i+1])) { | 608 | 128M | i = i + m; | 609 | 128M | } | 610 | 134M | } | 611 | 137M | } | 612 | 34.8k | return mode == FAST_COUNT ? count : -1; | 613 | 1.62M | } |
unicodeobject.c:ucs1lib_default_find Line | Count | Source | 560 | 4.70M | { | 561 | 4.70M | const Py_ssize_t w = n - m; | 562 | 4.70M | Py_ssize_t mlast = m - 1, count = 0; | 563 | 4.70M | Py_ssize_t gap = mlast; | 564 | 4.70M | const STRINGLIB_CHAR last = p[mlast]; | 565 | 4.70M | const STRINGLIB_CHAR *const ss = &s[mlast]; | 566 | | | 567 | 4.70M | unsigned long mask = 0; | 568 | 9.45M | for (Py_ssize_t i = 0; i < mlast; i++) { | 569 | 4.75M | STRINGLIB_BLOOM_ADD(mask, p[i]); | 570 | 4.75M | if (p[i] == last) { | 571 | 238k | gap = mlast - i - 1; | 572 | 238k | } | 573 | 4.75M | } | 574 | 4.70M | STRINGLIB_BLOOM_ADD(mask, last); | 575 | | | 576 | 2.61G | for (Py_ssize_t i = 0; i <= w; i++) { | 577 | 2.60G | if (ss[i] == last) { | 578 | | /* candidate match */ | 579 | 18.6M | Py_ssize_t j; | 580 | 22.7M | for (j = 0; j < mlast; j++) { | 581 | 18.6M | if (s[i+j] != p[j]) { | 582 | 14.5M | break; | 583 | 14.5M | } | 584 | 18.6M | } | 585 | 18.6M | if (j == mlast) { | 586 | | /* got a match! */ | 587 | 4.07M | if (mode != FAST_COUNT) { | 588 | 1.24M | return i; | 589 | 1.24M | } | 590 | 2.82M | count++; | 591 | 2.82M | if (count == maxcount) { | 592 | 0 | return maxcount; | 593 | 0 | } | 594 | 2.82M | i = i + mlast; | 595 | 2.82M | continue; | 596 | 2.82M | } | 597 | | /* miss: check if next character is part of pattern */ | 598 | 14.5M | if (!STRINGLIB_BLOOM(mask, ss[i+1])) { | 599 | 3.43M | i = i + m; | 600 | 3.43M | } | 601 | 11.1M | else { | 602 | 11.1M | i = i + gap; | 603 | 11.1M | } | 604 | 14.5M | } | 605 | 2.59G | else { | 606 | | /* skip: check if next character is part of pattern */ | 607 | 2.59G | if (!STRINGLIB_BLOOM(mask, ss[i+1])) { | 608 | 2.57G | i = i + m; | 609 | 2.57G | } | 610 | 2.59G | } | 611 | 2.60G | } | 612 | 3.45M | return mode == FAST_COUNT ? count : -1; | 613 | 4.70M | } |
unicodeobject.c:ucs2lib_default_find Line | Count | Source | 560 | 3.12M | { | 561 | 3.12M | const Py_ssize_t w = n - m; | 562 | 3.12M | Py_ssize_t mlast = m - 1, count = 0; | 563 | 3.12M | Py_ssize_t gap = mlast; | 564 | 3.12M | const STRINGLIB_CHAR last = p[mlast]; | 565 | 3.12M | const STRINGLIB_CHAR *const ss = &s[mlast]; | 566 | | | 567 | 3.12M | unsigned long mask = 0; | 568 | 6.26M | for (Py_ssize_t i = 0; i < mlast; i++) { | 569 | 3.14M | STRINGLIB_BLOOM_ADD(mask, p[i]); | 570 | 3.14M | if (p[i] == last) { | 571 | 32.2k | gap = mlast - i - 1; | 572 | 32.2k | } | 573 | 3.14M | } | 574 | 3.12M | STRINGLIB_BLOOM_ADD(mask, last); | 575 | | | 576 | 917M | for (Py_ssize_t i = 0; i <= w; i++) { | 577 | 917M | if (ss[i] == last) { | 578 | | /* candidate match */ | 579 | 11.6M | Py_ssize_t j; | 580 | 17.6M | for (j = 0; j < mlast; j++) { | 581 | 11.6M | if (s[i+j] != p[j]) { | 582 | 5.71M | break; | 583 | 5.71M | } | 584 | 11.6M | } | 585 | 11.6M | if (j == mlast) { | 586 | | /* got a match! */ | 587 | 5.96M | if (mode != FAST_COUNT) { | 588 | 3.01M | return i; | 589 | 3.01M | } | 590 | 2.94M | count++; | 591 | 2.94M | if (count == maxcount) { | 592 | 0 | return maxcount; | 593 | 0 | } | 594 | 2.94M | i = i + mlast; | 595 | 2.94M | continue; | 596 | 2.94M | } | 597 | | /* miss: check if next character is part of pattern */ | 598 | 5.71M | if (!STRINGLIB_BLOOM(mask, ss[i+1])) { | 599 | 1.59M | i = i + m; | 600 | 1.59M | } | 601 | 4.12M | else { | 602 | 4.12M | i = i + gap; | 603 | 4.12M | } | 604 | 5.71M | } | 605 | 906M | else { | 606 | | /* skip: check if next character is part of pattern */ | 607 | 906M | if (!STRINGLIB_BLOOM(mask, ss[i+1])) { | 608 | 896M | i = i + m; | 609 | 896M | } | 610 | 906M | } | 611 | 917M | } | 612 | 109k | return mode == FAST_COUNT ? count : -1; | 613 | 3.12M | } |
unicodeobject.c:ucs4lib_default_find Line | Count | Source | 560 | 4.54M | { | 561 | 4.54M | const Py_ssize_t w = n - m; | 562 | 4.54M | Py_ssize_t mlast = m - 1, count = 0; | 563 | 4.54M | Py_ssize_t gap = mlast; | 564 | 4.54M | const STRINGLIB_CHAR last = p[mlast]; | 565 | 4.54M | const STRINGLIB_CHAR *const ss = &s[mlast]; | 566 | | | 567 | 4.54M | unsigned long mask = 0; | 568 | 9.08M | for (Py_ssize_t i = 0; i < mlast; i++) { | 569 | 4.54M | STRINGLIB_BLOOM_ADD(mask, p[i]); | 570 | 4.54M | if (p[i] == last) { | 571 | 20.8k | gap = mlast - i - 1; | 572 | 20.8k | } | 573 | 4.54M | } | 574 | 4.54M | STRINGLIB_BLOOM_ADD(mask, last); | 575 | | | 576 | 1.26G | for (Py_ssize_t i = 0; i <= w; i++) { | 577 | 1.26G | if (ss[i] == last) { | 578 | | /* candidate match */ | 579 | 27.2M | Py_ssize_t j; | 580 | 36.2M | for (j = 0; j < mlast; j++) { | 581 | 27.2M | if (s[i+j] != p[j]) { | 582 | 18.2M | break; | 583 | 18.2M | } | 584 | 27.2M | } | 585 | 27.2M | if (j == mlast) { | 586 | | /* got a match! */ | 587 | 8.98M | if (mode != FAST_COUNT) { | 588 | 4.50M | return i; | 589 | 4.50M | } | 590 | 4.48M | count++; | 591 | 4.48M | if (count == maxcount) { | 592 | 0 | return maxcount; | 593 | 0 | } | 594 | 4.48M | i = i + mlast; | 595 | 4.48M | continue; | 596 | 4.48M | } | 597 | | /* miss: check if next character is part of pattern */ | 598 | 18.2M | if (!STRINGLIB_BLOOM(mask, ss[i+1])) { | 599 | 1.97M | i = i + m; | 600 | 1.97M | } | 601 | 16.2M | else { | 602 | 16.2M | i = i + gap; | 603 | 16.2M | } | 604 | 18.2M | } | 605 | 1.23G | else { | 606 | | /* skip: check if next character is part of pattern */ | 607 | 1.23G | if (!STRINGLIB_BLOOM(mask, ss[i+1])) { | 608 | 1.21G | i = i + m; | 609 | 1.21G | } | 610 | 1.23G | } | 611 | 1.26G | } | 612 | 42.2k | return mode == FAST_COUNT ? count : -1; | 613 | 4.54M | } |
bytes_methods.c:stringlib_default_find Line | Count | Source | 560 | 2.81k | { | 561 | 2.81k | const Py_ssize_t w = n - m; | 562 | 2.81k | Py_ssize_t mlast = m - 1, count = 0; | 563 | 2.81k | Py_ssize_t gap = mlast; | 564 | 2.81k | const STRINGLIB_CHAR last = p[mlast]; | 565 | 2.81k | const STRINGLIB_CHAR *const ss = &s[mlast]; | 566 | | | 567 | 2.81k | unsigned long mask = 0; | 568 | 11.2k | for (Py_ssize_t i = 0; i < mlast; i++) { | 569 | 8.43k | STRINGLIB_BLOOM_ADD(mask, p[i]); | 570 | 8.43k | if (p[i] == last) { | 571 | 2.81k | gap = mlast - i - 1; | 572 | 2.81k | } | 573 | 8.43k | } | 574 | 2.81k | STRINGLIB_BLOOM_ADD(mask, last); | 575 | | | 576 | 1.61M | for (Py_ssize_t i = 0; i <= w; i++) { | 577 | 1.61M | if (ss[i] == last) { | 578 | | /* candidate match */ | 579 | 8.39k | Py_ssize_t j; | 580 | 16.4k | for (j = 0; j < mlast; j++) { | 581 | 13.8k | if (s[i+j] != p[j]) { | 582 | 5.79k | break; | 583 | 5.79k | } | 584 | 13.8k | } | 585 | 8.39k | if (j == mlast) { | 586 | | /* got a match! */ | 587 | 2.60k | if (mode != FAST_COUNT) { | 588 | 2.60k | return i; | 589 | 2.60k | } | 590 | 0 | count++; | 591 | 0 | if (count == maxcount) { | 592 | 0 | return maxcount; | 593 | 0 | } | 594 | 0 | i = i + mlast; | 595 | 0 | continue; | 596 | 0 | } | 597 | | /* miss: check if next character is part of pattern */ | 598 | 5.79k | if (!STRINGLIB_BLOOM(mask, ss[i+1])) { | 599 | 640 | i = i + m; | 600 | 640 | } | 601 | 5.15k | else { | 602 | 5.15k | i = i + gap; | 603 | 5.15k | } | 604 | 5.79k | } | 605 | 1.60M | else { | 606 | | /* skip: check if next character is part of pattern */ | 607 | 1.60M | if (!STRINGLIB_BLOOM(mask, ss[i+1])) { | 608 | 120k | i = i + m; | 609 | 120k | } | 610 | 1.60M | } | 611 | 1.61M | } | 612 | 212 | return mode == FAST_COUNT ? count : -1; | 613 | 2.81k | } |
Unexecuted instantiation: bytearrayobject.c:stringlib_default_find |
614 | | |
615 | | |
616 | | static Py_ssize_t |
617 | | STRINGLIB(adaptive_find)(const STRINGLIB_CHAR* s, Py_ssize_t n, |
618 | | const STRINGLIB_CHAR* p, Py_ssize_t m, |
619 | | Py_ssize_t maxcount, int mode) |
620 | 0 | { |
621 | 0 | const Py_ssize_t w = n - m; |
622 | 0 | Py_ssize_t mlast = m - 1, count = 0; |
623 | 0 | Py_ssize_t gap = mlast; |
624 | 0 | Py_ssize_t hits = 0, res; |
625 | 0 | const STRINGLIB_CHAR last = p[mlast]; |
626 | 0 | const STRINGLIB_CHAR *const ss = &s[mlast]; |
627 | |
|
628 | 0 | unsigned long mask = 0; |
629 | 0 | for (Py_ssize_t i = 0; i < mlast; i++) { |
630 | 0 | STRINGLIB_BLOOM_ADD(mask, p[i]); |
631 | 0 | if (p[i] == last) { |
632 | 0 | gap = mlast - i - 1; |
633 | 0 | } |
634 | 0 | } |
635 | 0 | STRINGLIB_BLOOM_ADD(mask, last); |
636 | |
|
637 | 0 | for (Py_ssize_t i = 0; i <= w; i++) { |
638 | 0 | if (ss[i] == last) { |
639 | | /* candidate match */ |
640 | 0 | Py_ssize_t j; |
641 | 0 | for (j = 0; j < mlast; j++) { |
642 | 0 | if (s[i+j] != p[j]) { |
643 | 0 | break; |
644 | 0 | } |
645 | 0 | } |
646 | 0 | if (j == mlast) { |
647 | | /* got a match! */ |
648 | 0 | if (mode != FAST_COUNT) { |
649 | 0 | return i; |
650 | 0 | } |
651 | 0 | count++; |
652 | 0 | if (count == maxcount) { |
653 | 0 | return maxcount; |
654 | 0 | } |
655 | 0 | i = i + mlast; |
656 | 0 | continue; |
657 | 0 | } |
658 | 0 | hits += j + 1; |
659 | 0 | if (hits > m / 4 && w - i > 2000) { |
660 | 0 | if (mode == FAST_SEARCH) { |
661 | 0 | res = STRINGLIB(_two_way_find)(s + i, n - i, p, m); |
662 | 0 | return res == -1 ? -1 : res + i; |
663 | 0 | } |
664 | 0 | else { |
665 | 0 | res = STRINGLIB(_two_way_count)(s + i, n - i, p, m, |
666 | 0 | maxcount - count); |
667 | 0 | return res + count; |
668 | 0 | } |
669 | 0 | } |
670 | | /* miss: check if next character is part of pattern */ |
671 | 0 | if (!STRINGLIB_BLOOM(mask, ss[i+1])) { |
672 | 0 | i = i + m; |
673 | 0 | } |
674 | 0 | else { |
675 | 0 | i = i + gap; |
676 | 0 | } |
677 | 0 | } |
678 | 0 | else { |
679 | | /* skip: check if next character is part of pattern */ |
680 | 0 | if (!STRINGLIB_BLOOM(mask, ss[i+1])) { |
681 | 0 | i = i + m; |
682 | 0 | } |
683 | 0 | } |
684 | 0 | } |
685 | 0 | return mode == FAST_COUNT ? count : -1; |
686 | 0 | } Unexecuted instantiation: bytesobject.c:stringlib_adaptive_find Unexecuted instantiation: unicodeobject.c:asciilib_adaptive_find Unexecuted instantiation: unicodeobject.c:ucs1lib_adaptive_find Unexecuted instantiation: unicodeobject.c:ucs2lib_adaptive_find Unexecuted instantiation: unicodeobject.c:ucs4lib_adaptive_find Unexecuted instantiation: bytes_methods.c:stringlib_adaptive_find Unexecuted instantiation: bytearrayobject.c:stringlib_adaptive_find |
687 | | |
688 | | |
689 | | static Py_ssize_t |
690 | | STRINGLIB(default_rfind)(const STRINGLIB_CHAR* s, Py_ssize_t n, |
691 | | const STRINGLIB_CHAR* p, Py_ssize_t m, |
692 | | Py_ssize_t maxcount, int mode) |
693 | 4 | { |
694 | | /* create compressed boyer-moore delta 1 table */ |
695 | 4 | unsigned long mask = 0; |
696 | 4 | Py_ssize_t i, j, mlast = m - 1, skip = m - 1, w = n - m; |
697 | | |
698 | | /* process pattern[0] outside the loop */ |
699 | 4 | STRINGLIB_BLOOM_ADD(mask, p[0]); |
700 | | /* process pattern[:0:-1] */ |
701 | 16 | for (i = mlast; i > 0; i--) { |
702 | 12 | STRINGLIB_BLOOM_ADD(mask, p[i]); |
703 | 12 | if (p[i] == p[0]) { |
704 | 0 | skip = i - 1; |
705 | 0 | } |
706 | 12 | } |
707 | | |
708 | 356 | for (i = w; i >= 0; i--) { |
709 | 352 | if (s[i] == p[0]) { |
710 | | /* candidate match */ |
711 | 8 | for (j = mlast; j > 0; j--) { |
712 | 8 | if (s[i+j] != p[j]) { |
713 | 8 | break; |
714 | 8 | } |
715 | 8 | } |
716 | 8 | if (j == 0) { |
717 | | /* got a match! */ |
718 | 0 | return i; |
719 | 0 | } |
720 | | /* miss: check if previous character is part of pattern */ |
721 | 8 | if (i > 0 && !STRINGLIB_BLOOM(mask, s[i-1])) { |
722 | 8 | i = i - m; |
723 | 8 | } |
724 | 0 | else { |
725 | 0 | i = i - skip; |
726 | 0 | } |
727 | 8 | } |
728 | 344 | else { |
729 | | /* skip: check if previous character is part of pattern */ |
730 | 344 | if (i > 0 && !STRINGLIB_BLOOM(mask, s[i-1])) { |
731 | 336 | i = i - m; |
732 | 336 | } |
733 | 344 | } |
734 | 352 | } |
735 | 4 | return -1; |
736 | 4 | } Unexecuted instantiation: bytesobject.c:stringlib_default_rfind Unexecuted instantiation: unicodeobject.c:asciilib_default_rfind Unexecuted instantiation: unicodeobject.c:ucs1lib_default_rfind Unexecuted instantiation: unicodeobject.c:ucs2lib_default_rfind Unexecuted instantiation: unicodeobject.c:ucs4lib_default_rfind bytes_methods.c:stringlib_default_rfind Line | Count | Source | 693 | 4 | { | 694 | | /* create compressed boyer-moore delta 1 table */ | 695 | 4 | unsigned long mask = 0; | 696 | 4 | Py_ssize_t i, j, mlast = m - 1, skip = m - 1, w = n - m; | 697 | | | 698 | | /* process pattern[0] outside the loop */ | 699 | 4 | STRINGLIB_BLOOM_ADD(mask, p[0]); | 700 | | /* process pattern[:0:-1] */ | 701 | 16 | for (i = mlast; i > 0; i--) { | 702 | 12 | STRINGLIB_BLOOM_ADD(mask, p[i]); | 703 | 12 | if (p[i] == p[0]) { | 704 | 0 | skip = i - 1; | 705 | 0 | } | 706 | 12 | } | 707 | | | 708 | 356 | for (i = w; i >= 0; i--) { | 709 | 352 | if (s[i] == p[0]) { | 710 | | /* candidate match */ | 711 | 8 | for (j = mlast; j > 0; j--) { | 712 | 8 | if (s[i+j] != p[j]) { | 713 | 8 | break; | 714 | 8 | } | 715 | 8 | } | 716 | 8 | if (j == 0) { | 717 | | /* got a match! */ | 718 | 0 | return i; | 719 | 0 | } | 720 | | /* miss: check if previous character is part of pattern */ | 721 | 8 | if (i > 0 && !STRINGLIB_BLOOM(mask, s[i-1])) { | 722 | 8 | i = i - m; | 723 | 8 | } | 724 | 0 | else { | 725 | 0 | i = i - skip; | 726 | 0 | } | 727 | 8 | } | 728 | 344 | else { | 729 | | /* skip: check if previous character is part of pattern */ | 730 | 344 | if (i > 0 && !STRINGLIB_BLOOM(mask, s[i-1])) { | 731 | 336 | i = i - m; | 732 | 336 | } | 733 | 344 | } | 734 | 352 | } | 735 | 4 | return -1; | 736 | 4 | } |
Unexecuted instantiation: bytearrayobject.c:stringlib_default_rfind |
737 | | |
738 | | |
739 | | static inline Py_ssize_t |
740 | | STRINGLIB(count_char)(const STRINGLIB_CHAR *s, Py_ssize_t n, |
741 | | const STRINGLIB_CHAR p0, Py_ssize_t maxcount) |
742 | 0 | { |
743 | 0 | Py_ssize_t i, count = 0; |
744 | 0 | for (i = 0; i < n; i++) { |
745 | 0 | if (s[i] == p0) { |
746 | 0 | count++; |
747 | 0 | if (count == maxcount) { |
748 | 0 | return maxcount; |
749 | 0 | } |
750 | 0 | } |
751 | 0 | } |
752 | 0 | return count; |
753 | 0 | } Unexecuted instantiation: bytesobject.c:stringlib_count_char Unexecuted instantiation: unicodeobject.c:asciilib_count_char Unexecuted instantiation: unicodeobject.c:ucs1lib_count_char Unexecuted instantiation: unicodeobject.c:ucs2lib_count_char Unexecuted instantiation: unicodeobject.c:ucs4lib_count_char Unexecuted instantiation: bytes_methods.c:stringlib_count_char Unexecuted instantiation: bytearrayobject.c:stringlib_count_char |
754 | | |
755 | | |
756 | | static inline Py_ssize_t |
757 | | STRINGLIB(count_char_no_maxcount)(const STRINGLIB_CHAR *s, Py_ssize_t n, |
758 | | const STRINGLIB_CHAR p0) |
759 | | /* A specialized function of count_char that does not cut off at a maximum. |
760 | | As a result, the compiler is able to vectorize the loop. */ |
761 | 54.3M | { |
762 | 54.3M | Py_ssize_t count = 0; |
763 | 11.8G | for (Py_ssize_t i = 0; i < n; i++) { |
764 | 11.8G | if (s[i] == p0) { |
765 | 236M | count++; |
766 | 236M | } |
767 | 11.8G | } |
768 | 54.3M | return count; |
769 | 54.3M | } Unexecuted instantiation: bytesobject.c:stringlib_count_char_no_maxcount Unexecuted instantiation: unicodeobject.c:asciilib_count_char_no_maxcount unicodeobject.c:ucs1lib_count_char_no_maxcount Line | Count | Source | 761 | 46.7M | { | 762 | 46.7M | Py_ssize_t count = 0; | 763 | 7.97G | for (Py_ssize_t i = 0; i < n; i++) { | 764 | 7.92G | if (s[i] == p0) { | 765 | 55.7M | count++; | 766 | 55.7M | } | 767 | 7.92G | } | 768 | 46.7M | return count; | 769 | 46.7M | } |
unicodeobject.c:ucs2lib_count_char_no_maxcount Line | Count | Source | 761 | 6.80M | { | 762 | 6.80M | Py_ssize_t count = 0; | 763 | 1.96G | for (Py_ssize_t i = 0; i < n; i++) { | 764 | 1.95G | if (s[i] == p0) { | 765 | 61.2M | count++; | 766 | 61.2M | } | 767 | 1.95G | } | 768 | 6.80M | return count; | 769 | 6.80M | } |
unicodeobject.c:ucs4lib_count_char_no_maxcount Line | Count | Source | 761 | 819k | { | 762 | 819k | Py_ssize_t count = 0; | 763 | 1.95G | for (Py_ssize_t i = 0; i < n; i++) { | 764 | 1.95G | if (s[i] == p0) { | 765 | 119M | count++; | 766 | 119M | } | 767 | 1.95G | } | 768 | 819k | return count; | 769 | 819k | } |
Unexecuted instantiation: bytes_methods.c:stringlib_count_char_no_maxcount Unexecuted instantiation: bytearrayobject.c:stringlib_count_char_no_maxcount |
770 | | |
771 | | |
772 | | Py_LOCAL_INLINE(Py_ssize_t) |
773 | | FASTSEARCH(const STRINGLIB_CHAR* s, Py_ssize_t n, |
774 | | const STRINGLIB_CHAR* p, Py_ssize_t m, |
775 | | Py_ssize_t maxcount, int mode) |
776 | 209M | { |
777 | 209M | if (n < m || (mode == FAST_COUNT && maxcount == 0)) { |
778 | 8.05k | return -1; |
779 | 8.05k | } |
780 | | |
781 | | /* look for special cases */ |
782 | 209M | if (m <= 1) { |
783 | 195M | if (m <= 0) { |
784 | 0 | return -1; |
785 | 0 | } |
786 | | /* use special case for 1-character strings */ |
787 | 195M | if (mode == FAST_SEARCH) |
788 | 141M | return STRINGLIB(find_char)(s, n, p[0]); |
789 | 54.3M | else if (mode == FAST_RSEARCH) |
790 | 9.02k | return STRINGLIB(rfind_char)(s, n, p[0]); |
791 | 54.3M | else { |
792 | 54.3M | if (maxcount == PY_SSIZE_T_MAX) { |
793 | 54.3M | return STRINGLIB(count_char_no_maxcount)(s, n, p[0]); |
794 | 54.3M | } |
795 | 0 | return STRINGLIB(count_char)(s, n, p[0], maxcount); |
796 | 54.3M | } |
797 | 195M | } |
798 | | |
799 | 14.0M | if (mode != FAST_RSEARCH) { |
800 | 14.0M | if (n < 2500 || (m < 100 && n < 30000) || m < 6) { |
801 | 14.0M | return STRINGLIB(default_find)(s, n, p, m, maxcount, mode); |
802 | 14.0M | } |
803 | 18 | else if ((m >> 2) * 3 < (n >> 2)) { |
804 | | /* 33% threshold, but don't overflow. */ |
805 | | /* For larger problems where the needle isn't a huge |
806 | | percentage of the size of the haystack, the relatively |
807 | | expensive O(m) startup cost of the two-way algorithm |
808 | | will surely pay off. */ |
809 | 18 | if (mode == FAST_SEARCH) { |
810 | 18 | return STRINGLIB(_two_way_find)(s, n, p, m); |
811 | 18 | } |
812 | 0 | else { |
813 | 0 | return STRINGLIB(_two_way_count)(s, n, p, m, maxcount); |
814 | 0 | } |
815 | 18 | } |
816 | 0 | else { |
817 | | /* To ensure that we have good worst-case behavior, |
818 | | here's an adaptive version of the algorithm, where if |
819 | | we match O(m) characters without any matches of the |
820 | | entire needle, then we predict that the startup cost of |
821 | | the two-way algorithm will probably be worth it. */ |
822 | 0 | return STRINGLIB(adaptive_find)(s, n, p, m, maxcount, mode); |
823 | 0 | } |
824 | 14.0M | } |
825 | 4 | else { |
826 | | /* FAST_RSEARCH */ |
827 | 4 | return STRINGLIB(default_rfind)(s, n, p, m, maxcount, mode); |
828 | 4 | } |
829 | 14.0M | } Unexecuted instantiation: bytesobject.c:fastsearch unicodeobject.c:asciilib_fastsearch Line | Count | Source | 776 | 18.3M | { | 777 | 18.3M | if (n < m || (mode == FAST_COUNT && maxcount == 0)) { | 778 | 0 | return -1; | 779 | 0 | } | 780 | | | 781 | | /* look for special cases */ | 782 | 18.3M | if (m <= 1) { | 783 | 16.7M | if (m <= 0) { | 784 | 0 | return -1; | 785 | 0 | } | 786 | | /* use special case for 1-character strings */ | 787 | 16.7M | if (mode == FAST_SEARCH) | 788 | 16.6M | return STRINGLIB(find_char)(s, n, p[0]); | 789 | 9.02k | else if (mode == FAST_RSEARCH) | 790 | 9.02k | return STRINGLIB(rfind_char)(s, n, p[0]); | 791 | 0 | else { | 792 | 0 | if (maxcount == PY_SSIZE_T_MAX) { | 793 | 0 | return STRINGLIB(count_char_no_maxcount)(s, n, p[0]); | 794 | 0 | } | 795 | 0 | return STRINGLIB(count_char)(s, n, p[0], maxcount); | 796 | 0 | } | 797 | 16.7M | } | 798 | | | 799 | 1.62M | if (mode != FAST_RSEARCH) { | 800 | 1.62M | if (n < 2500 || (m < 100 && n < 30000) || m < 6) { | 801 | 1.62M | return STRINGLIB(default_find)(s, n, p, m, maxcount, mode); | 802 | 1.62M | } | 803 | 0 | else if ((m >> 2) * 3 < (n >> 2)) { | 804 | | /* 33% threshold, but don't overflow. */ | 805 | | /* For larger problems where the needle isn't a huge | 806 | | percentage of the size of the haystack, the relatively | 807 | | expensive O(m) startup cost of the two-way algorithm | 808 | | will surely pay off. */ | 809 | 0 | if (mode == FAST_SEARCH) { | 810 | 0 | return STRINGLIB(_two_way_find)(s, n, p, m); | 811 | 0 | } | 812 | 0 | else { | 813 | 0 | return STRINGLIB(_two_way_count)(s, n, p, m, maxcount); | 814 | 0 | } | 815 | 0 | } | 816 | 0 | else { | 817 | | /* To ensure that we have good worst-case behavior, | 818 | | here's an adaptive version of the algorithm, where if | 819 | | we match O(m) characters without any matches of the | 820 | | entire needle, then we predict that the startup cost of | 821 | | the two-way algorithm will probably be worth it. */ | 822 | 0 | return STRINGLIB(adaptive_find)(s, n, p, m, maxcount, mode); | 823 | 0 | } | 824 | 1.62M | } | 825 | 0 | else { | 826 | | /* FAST_RSEARCH */ | 827 | 0 | return STRINGLIB(default_rfind)(s, n, p, m, maxcount, mode); | 828 | 0 | } | 829 | 1.62M | } |
unicodeobject.c:ucs1lib_fastsearch Line | Count | Source | 776 | 56.8M | { | 777 | 56.8M | if (n < m || (mode == FAST_COUNT && maxcount == 0)) { | 778 | 0 | return -1; | 779 | 0 | } | 780 | | | 781 | | /* look for special cases */ | 782 | 56.8M | if (m <= 1) { | 783 | 52.1M | if (m <= 0) { | 784 | 0 | return -1; | 785 | 0 | } | 786 | | /* use special case for 1-character strings */ | 787 | 52.1M | if (mode == FAST_SEARCH) | 788 | 5.40M | return STRINGLIB(find_char)(s, n, p[0]); | 789 | 46.7M | else if (mode == FAST_RSEARCH) | 790 | 0 | return STRINGLIB(rfind_char)(s, n, p[0]); | 791 | 46.7M | else { | 792 | 46.7M | if (maxcount == PY_SSIZE_T_MAX) { | 793 | 46.7M | return STRINGLIB(count_char_no_maxcount)(s, n, p[0]); | 794 | 46.7M | } | 795 | 0 | return STRINGLIB(count_char)(s, n, p[0], maxcount); | 796 | 46.7M | } | 797 | 52.1M | } | 798 | | | 799 | 4.70M | if (mode != FAST_RSEARCH) { | 800 | 4.70M | if (n < 2500 || (m < 100 && n < 30000) || m < 6) { | 801 | 4.70M | return STRINGLIB(default_find)(s, n, p, m, maxcount, mode); | 802 | 4.70M | } | 803 | 18 | else if ((m >> 2) * 3 < (n >> 2)) { | 804 | | /* 33% threshold, but don't overflow. */ | 805 | | /* For larger problems where the needle isn't a huge | 806 | | percentage of the size of the haystack, the relatively | 807 | | expensive O(m) startup cost of the two-way algorithm | 808 | | will surely pay off. */ | 809 | 18 | if (mode == FAST_SEARCH) { | 810 | 18 | return STRINGLIB(_two_way_find)(s, n, p, m); | 811 | 18 | } | 812 | 0 | else { | 813 | 0 | return STRINGLIB(_two_way_count)(s, n, p, m, maxcount); | 814 | 0 | } | 815 | 18 | } | 816 | 0 | else { | 817 | | /* To ensure that we have good worst-case behavior, | 818 | | here's an adaptive version of the algorithm, where if | 819 | | we match O(m) characters without any matches of the | 820 | | entire needle, then we predict that the startup cost of | 821 | | the two-way algorithm will probably be worth it. */ | 822 | 0 | return STRINGLIB(adaptive_find)(s, n, p, m, maxcount, mode); | 823 | 0 | } | 824 | 4.70M | } | 825 | 0 | else { | 826 | | /* FAST_RSEARCH */ | 827 | 0 | return STRINGLIB(default_rfind)(s, n, p, m, maxcount, mode); | 828 | 0 | } | 829 | 4.70M | } |
unicodeobject.c:ucs2lib_fastsearch Line | Count | Source | 776 | 58.0M | { | 777 | 58.0M | if (n < m || (mode == FAST_COUNT && maxcount == 0)) { | 778 | 8.04k | return -1; | 779 | 8.04k | } | 780 | | | 781 | | /* look for special cases */ | 782 | 58.0M | if (m <= 1) { | 783 | 54.8M | if (m <= 0) { | 784 | 0 | return -1; | 785 | 0 | } | 786 | | /* use special case for 1-character strings */ | 787 | 54.8M | if (mode == FAST_SEARCH) | 788 | 48.0M | return STRINGLIB(find_char)(s, n, p[0]); | 789 | 6.80M | else if (mode == FAST_RSEARCH) | 790 | 0 | return STRINGLIB(rfind_char)(s, n, p[0]); | 791 | 6.80M | else { | 792 | 6.80M | if (maxcount == PY_SSIZE_T_MAX) { | 793 | 6.80M | return STRINGLIB(count_char_no_maxcount)(s, n, p[0]); | 794 | 6.80M | } | 795 | 0 | return STRINGLIB(count_char)(s, n, p[0], maxcount); | 796 | 6.80M | } | 797 | 54.8M | } | 798 | | | 799 | 3.12M | if (mode != FAST_RSEARCH) { | 800 | 3.12M | if (n < 2500 || (m < 100 && n < 30000) || m < 6) { | 801 | 3.12M | return STRINGLIB(default_find)(s, n, p, m, maxcount, mode); | 802 | 3.12M | } | 803 | 0 | else if ((m >> 2) * 3 < (n >> 2)) { | 804 | | /* 33% threshold, but don't overflow. */ | 805 | | /* For larger problems where the needle isn't a huge | 806 | | percentage of the size of the haystack, the relatively | 807 | | expensive O(m) startup cost of the two-way algorithm | 808 | | will surely pay off. */ | 809 | 0 | if (mode == FAST_SEARCH) { | 810 | 0 | return STRINGLIB(_two_way_find)(s, n, p, m); | 811 | 0 | } | 812 | 0 | else { | 813 | 0 | return STRINGLIB(_two_way_count)(s, n, p, m, maxcount); | 814 | 0 | } | 815 | 0 | } | 816 | 0 | else { | 817 | | /* To ensure that we have good worst-case behavior, | 818 | | here's an adaptive version of the algorithm, where if | 819 | | we match O(m) characters without any matches of the | 820 | | entire needle, then we predict that the startup cost of | 821 | | the two-way algorithm will probably be worth it. */ | 822 | 0 | return STRINGLIB(adaptive_find)(s, n, p, m, maxcount, mode); | 823 | 0 | } | 824 | 3.12M | } | 825 | 0 | else { | 826 | | /* FAST_RSEARCH */ | 827 | 0 | return STRINGLIB(default_rfind)(s, n, p, m, maxcount, mode); | 828 | 0 | } | 829 | 3.12M | } |
unicodeobject.c:ucs4lib_fastsearch Line | Count | Source | 776 | 76.1M | { | 777 | 76.1M | if (n < m || (mode == FAST_COUNT && maxcount == 0)) { | 778 | 0 | return -1; | 779 | 0 | } | 780 | | | 781 | | /* look for special cases */ | 782 | 76.1M | if (m <= 1) { | 783 | 71.6M | if (m <= 0) { | 784 | 0 | return -1; | 785 | 0 | } | 786 | | /* use special case for 1-character strings */ | 787 | 71.6M | if (mode == FAST_SEARCH) | 788 | 70.8M | return STRINGLIB(find_char)(s, n, p[0]); | 789 | 819k | else if (mode == FAST_RSEARCH) | 790 | 0 | return STRINGLIB(rfind_char)(s, n, p[0]); | 791 | 819k | else { | 792 | 819k | if (maxcount == PY_SSIZE_T_MAX) { | 793 | 819k | return STRINGLIB(count_char_no_maxcount)(s, n, p[0]); | 794 | 819k | } | 795 | 0 | return STRINGLIB(count_char)(s, n, p[0], maxcount); | 796 | 819k | } | 797 | 71.6M | } | 798 | | | 799 | 4.54M | if (mode != FAST_RSEARCH) { | 800 | 4.54M | if (n < 2500 || (m < 100 && n < 30000) || m < 6) { | 801 | 4.54M | return STRINGLIB(default_find)(s, n, p, m, maxcount, mode); | 802 | 4.54M | } | 803 | 0 | else if ((m >> 2) * 3 < (n >> 2)) { | 804 | | /* 33% threshold, but don't overflow. */ | 805 | | /* For larger problems where the needle isn't a huge | 806 | | percentage of the size of the haystack, the relatively | 807 | | expensive O(m) startup cost of the two-way algorithm | 808 | | will surely pay off. */ | 809 | 0 | if (mode == FAST_SEARCH) { | 810 | 0 | return STRINGLIB(_two_way_find)(s, n, p, m); | 811 | 0 | } | 812 | 0 | else { | 813 | 0 | return STRINGLIB(_two_way_count)(s, n, p, m, maxcount); | 814 | 0 | } | 815 | 0 | } | 816 | 0 | else { | 817 | | /* To ensure that we have good worst-case behavior, | 818 | | here's an adaptive version of the algorithm, where if | 819 | | we match O(m) characters without any matches of the | 820 | | entire needle, then we predict that the startup cost of | 821 | | the two-way algorithm will probably be worth it. */ | 822 | 0 | return STRINGLIB(adaptive_find)(s, n, p, m, maxcount, mode); | 823 | 0 | } | 824 | 4.54M | } | 825 | 0 | else { | 826 | | /* FAST_RSEARCH */ | 827 | 0 | return STRINGLIB(default_rfind)(s, n, p, m, maxcount, mode); | 828 | 0 | } | 829 | 4.54M | } |
bytes_methods.c:fastsearch Line | Count | Source | 776 | 2.82k | { | 777 | 2.82k | if (n < m || (mode == FAST_COUNT && maxcount == 0)) { | 778 | 9 | return -1; | 779 | 9 | } | 780 | | | 781 | | /* look for special cases */ | 782 | 2.81k | if (m <= 1) { | 783 | 0 | if (m <= 0) { | 784 | 0 | return -1; | 785 | 0 | } | 786 | | /* use special case for 1-character strings */ | 787 | 0 | if (mode == FAST_SEARCH) | 788 | 0 | return STRINGLIB(find_char)(s, n, p[0]); | 789 | 0 | else if (mode == FAST_RSEARCH) | 790 | 0 | return STRINGLIB(rfind_char)(s, n, p[0]); | 791 | 0 | else { | 792 | 0 | if (maxcount == PY_SSIZE_T_MAX) { | 793 | 0 | return STRINGLIB(count_char_no_maxcount)(s, n, p[0]); | 794 | 0 | } | 795 | 0 | return STRINGLIB(count_char)(s, n, p[0], maxcount); | 796 | 0 | } | 797 | 0 | } | 798 | | | 799 | 2.81k | if (mode != FAST_RSEARCH) { | 800 | 2.81k | if (n < 2500 || (m < 100 && n < 30000) || m < 6) { | 801 | 2.81k | return STRINGLIB(default_find)(s, n, p, m, maxcount, mode); | 802 | 2.81k | } | 803 | 0 | else if ((m >> 2) * 3 < (n >> 2)) { | 804 | | /* 33% threshold, but don't overflow. */ | 805 | | /* For larger problems where the needle isn't a huge | 806 | | percentage of the size of the haystack, the relatively | 807 | | expensive O(m) startup cost of the two-way algorithm | 808 | | will surely pay off. */ | 809 | 0 | if (mode == FAST_SEARCH) { | 810 | 0 | return STRINGLIB(_two_way_find)(s, n, p, m); | 811 | 0 | } | 812 | 0 | else { | 813 | 0 | return STRINGLIB(_two_way_count)(s, n, p, m, maxcount); | 814 | 0 | } | 815 | 0 | } | 816 | 0 | else { | 817 | | /* To ensure that we have good worst-case behavior, | 818 | | here's an adaptive version of the algorithm, where if | 819 | | we match O(m) characters without any matches of the | 820 | | entire needle, then we predict that the startup cost of | 821 | | the two-way algorithm will probably be worth it. */ | 822 | 0 | return STRINGLIB(adaptive_find)(s, n, p, m, maxcount, mode); | 823 | 0 | } | 824 | 2.81k | } | 825 | 4 | else { | 826 | | /* FAST_RSEARCH */ | 827 | 4 | return STRINGLIB(default_rfind)(s, n, p, m, maxcount, mode); | 828 | 4 | } | 829 | 2.81k | } |
Unexecuted instantiation: bytearrayobject.c:fastsearch |
830 | | |