/src/Python-3.8.3/Objects/stringlib/fastsearch.h
Line | Count | Source (jump to first uncovered line) |
1 | | /* stringlib: fastsearch implementation */ |
2 | | |
3 | | #define STRINGLIB_FASTSEARCH_H |
4 | | |
5 | | /* fast search/count implementation, based on a mix between boyer- |
6 | | moore and horspool, with a few more bells and whistles on the top. |
7 | | for some more background, see: http://effbot.org/zone/stringlib.htm */ |
8 | | |
9 | | /* note: fastsearch may access s[n], which isn't a problem when using |
10 | | Python's ordinary string types, but may cause problems if you're |
11 | | using this code in other contexts. also, the count mode returns -1 |
12 | | if there cannot possible be a match in the target string, and 0 if |
13 | | it has actually checked for matches, but didn't find any. callers |
14 | | beware! */ |
15 | | |
16 | 4.55k | #define FAST_COUNT 0 |
17 | 2.27k | #define FAST_SEARCH 1 |
18 | 4.44k | #define FAST_RSEARCH 2 |
19 | | |
20 | | #if LONG_BIT >= 128 |
21 | | #define STRINGLIB_BLOOM_WIDTH 128 |
22 | | #elif LONG_BIT >= 64 |
23 | 273 | #define STRINGLIB_BLOOM_WIDTH 64 |
24 | | #elif LONG_BIT >= 32 |
25 | | #define STRINGLIB_BLOOM_WIDTH 32 |
26 | | #else |
27 | | #error "LONG_BIT is smaller than 32" |
28 | | #endif |
29 | | |
30 | | #define STRINGLIB_BLOOM_ADD(mask, ch) \ |
31 | 182 | ((mask |= (1UL << ((ch) & (STRINGLIB_BLOOM_WIDTH -1))))) |
32 | | #define STRINGLIB_BLOOM(mask, ch) \ |
33 | 91 | ((mask & (1UL << ((ch) & (STRINGLIB_BLOOM_WIDTH -1))))) |
34 | | |
35 | | #if STRINGLIB_SIZEOF_CHAR == 1 |
36 | 4.97k | # define MEMCHR_CUT_OFF 15 |
37 | | #else |
38 | 0 | # define MEMCHR_CUT_OFF 40 |
39 | | #endif |
40 | | |
41 | | Py_LOCAL_INLINE(Py_ssize_t) |
42 | | STRINGLIB(find_char)(const STRINGLIB_CHAR* s, Py_ssize_t n, STRINGLIB_CHAR ch) |
43 | 2.68k | { |
44 | 2.68k | const STRINGLIB_CHAR *p, *e; |
45 | | |
46 | 2.68k | p = s; |
47 | 2.68k | e = s + n; |
48 | 2.68k | if (n > MEMCHR_CUT_OFF) { |
49 | | #if STRINGLIB_SIZEOF_CHAR == 1 |
50 | | p = memchr(s, ch, n); |
51 | 174 | if (p != NULL) |
52 | 112 | return (p - s); |
53 | 62 | return -1; |
54 | | #else |
55 | | /* use memchr if we can choose a needle without too many likely |
56 | | false positives */ |
57 | | const STRINGLIB_CHAR *s1, *e1; |
58 | | unsigned char needle = ch & 0xff; |
59 | | /* If looking for a multiple of 256, we'd have too |
60 | | many false positives looking for the '\0' byte in UCS2 |
61 | | and UCS4 representations. */ |
62 | 0 | if (needle != 0) { |
63 | 0 | do { |
64 | 0 | void *candidate = memchr(p, needle, |
65 | 0 | (e - p) * sizeof(STRINGLIB_CHAR)); |
66 | 0 | if (candidate == NULL) |
67 | 0 | return -1; |
68 | 0 | s1 = p; |
69 | 0 | p = (const STRINGLIB_CHAR *) |
70 | 0 | _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR)); |
71 | 0 | if (*p == ch) |
72 | 0 | return (p - s); |
73 | | /* False positive */ |
74 | 0 | p++; |
75 | 0 | if (p - s1 > MEMCHR_CUT_OFF) |
76 | 0 | continue; |
77 | 0 | if (e - p <= MEMCHR_CUT_OFF) |
78 | 0 | break; |
79 | 0 | e1 = p + MEMCHR_CUT_OFF; |
80 | 0 | while (p != e1) { |
81 | 0 | if (*p == ch) |
82 | 0 | return (p - s); |
83 | 0 | p++; |
84 | 0 | } |
85 | 0 | } |
86 | 0 | while (e - p > MEMCHR_CUT_OFF); |
87 | 0 | } |
88 | | #endif |
89 | 174 | } |
90 | 11.2k | while (p < e) { |
91 | 8.93k | if (*p == ch) |
92 | 204 | return (p - s); |
93 | 8.73k | p++; |
94 | 8.73k | } |
95 | 2.30k | return -1; |
96 | 2.51k | } Unexecuted instantiation: bytearrayobject.c:stringlib_find_char Unexecuted instantiation: bytesobject.c:stringlib_find_char unicodeobject.c:ucs1lib_find_char Line | Count | Source | 43 | 2.57k | { | 44 | 2.57k | const STRINGLIB_CHAR *p, *e; | 45 | | | 46 | 2.57k | p = s; | 47 | 2.57k | e = s + n; | 48 | 2.57k | if (n > MEMCHR_CUT_OFF) { | 49 | 73 | #if STRINGLIB_SIZEOF_CHAR == 1 | 50 | 73 | p = memchr(s, ch, n); | 51 | 73 | if (p != NULL) | 52 | 28 | return (p - s); | 53 | 45 | return -1; | 54 | | #else | 55 | | /* use memchr if we can choose a needle without too many likely | 56 | | false positives */ | 57 | | const STRINGLIB_CHAR *s1, *e1; | 58 | | unsigned char needle = ch & 0xff; | 59 | | /* If looking for a multiple of 256, we'd have too | 60 | | many false positives looking for the '\0' byte in UCS2 | 61 | | and UCS4 representations. */ | 62 | | if (needle != 0) { | 63 | | do { | 64 | | void *candidate = memchr(p, needle, | 65 | | (e - p) * sizeof(STRINGLIB_CHAR)); | 66 | | if (candidate == NULL) | 67 | | return -1; | 68 | | s1 = p; | 69 | | p = (const STRINGLIB_CHAR *) | 70 | | _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR)); | 71 | | if (*p == ch) | 72 | | return (p - s); | 73 | | /* False positive */ | 74 | | p++; | 75 | | if (p - s1 > MEMCHR_CUT_OFF) | 76 | | continue; | 77 | | if (e - p <= MEMCHR_CUT_OFF) | 78 | | break; | 79 | | e1 = p + MEMCHR_CUT_OFF; | 80 | | while (p != e1) { | 81 | | if (*p == ch) | 82 | | return (p - s); | 83 | | p++; | 84 | | } | 85 | | } | 86 | | while (e - p > MEMCHR_CUT_OFF); | 87 | | } | 88 | | #endif | 89 | 73 | } | 90 | 11.2k | while (p < e) { | 91 | 8.91k | if (*p == ch) | 92 | 199 | return (p - s); | 93 | 8.71k | p++; | 94 | 8.71k | } | 95 | 2.30k | return -1; | 96 | 2.50k | } |
Unexecuted instantiation: unicodeobject.c:ucs2lib_find_char Unexecuted instantiation: unicodeobject.c:ucs4lib_find_char unicodeobject.c:asciilib_find_char Line | Count | Source | 43 | 18 | { | 44 | 18 | const STRINGLIB_CHAR *p, *e; | 45 | | | 46 | 18 | p = s; | 47 | 18 | e = s + n; | 48 | 18 | if (n > MEMCHR_CUT_OFF) { | 49 | 13 | #if STRINGLIB_SIZEOF_CHAR == 1 | 50 | 13 | p = memchr(s, ch, n); | 51 | 13 | if (p != NULL) | 52 | 13 | return (p - s); | 53 | 0 | return -1; | 54 | | #else | 55 | | /* use memchr if we can choose a needle without too many likely | 56 | | false positives */ | 57 | | const STRINGLIB_CHAR *s1, *e1; | 58 | | unsigned char needle = ch & 0xff; | 59 | | /* If looking for a multiple of 256, we'd have too | 60 | | many false positives looking for the '\0' byte in UCS2 | 61 | | and UCS4 representations. */ | 62 | | if (needle != 0) { | 63 | | do { | 64 | | void *candidate = memchr(p, needle, | 65 | | (e - p) * sizeof(STRINGLIB_CHAR)); | 66 | | if (candidate == NULL) | 67 | | return -1; | 68 | | s1 = p; | 69 | | p = (const STRINGLIB_CHAR *) | 70 | | _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR)); | 71 | | if (*p == ch) | 72 | | return (p - s); | 73 | | /* False positive */ | 74 | | p++; | 75 | | if (p - s1 > MEMCHR_CUT_OFF) | 76 | | continue; | 77 | | if (e - p <= MEMCHR_CUT_OFF) | 78 | | break; | 79 | | e1 = p + MEMCHR_CUT_OFF; | 80 | | while (p != e1) { | 81 | | if (*p == ch) | 82 | | return (p - s); | 83 | | p++; | 84 | | } | 85 | | } | 86 | | while (e - p > MEMCHR_CUT_OFF); | 87 | | } | 88 | | #endif | 89 | 13 | } | 90 | 24 | while (p < e) { | 91 | 24 | if (*p == ch) | 92 | 5 | return (p - s); | 93 | 19 | p++; | 94 | 19 | } | 95 | 0 | return -1; | 96 | 5 | } |
Unexecuted instantiation: unicodeobject.c:stringlib_find_char bytes_methods.c:stringlib_find_char Line | Count | Source | 43 | 88 | { | 44 | 88 | const STRINGLIB_CHAR *p, *e; | 45 | | | 46 | 88 | p = s; | 47 | 88 | e = s + n; | 48 | 88 | if (n > MEMCHR_CUT_OFF) { | 49 | 88 | #if STRINGLIB_SIZEOF_CHAR == 1 | 50 | 88 | p = memchr(s, ch, n); | 51 | 88 | if (p != NULL) | 52 | 71 | return (p - s); | 53 | 17 | return -1; | 54 | | #else | 55 | | /* use memchr if we can choose a needle without too many likely | 56 | | false positives */ | 57 | | const STRINGLIB_CHAR *s1, *e1; | 58 | | unsigned char needle = ch & 0xff; | 59 | | /* If looking for a multiple of 256, we'd have too | 60 | | many false positives looking for the '\0' byte in UCS2 | 61 | | and UCS4 representations. */ | 62 | | if (needle != 0) { | 63 | | do { | 64 | | void *candidate = memchr(p, needle, | 65 | | (e - p) * sizeof(STRINGLIB_CHAR)); | 66 | | if (candidate == NULL) | 67 | | return -1; | 68 | | s1 = p; | 69 | | p = (const STRINGLIB_CHAR *) | 70 | | _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR)); | 71 | | if (*p == ch) | 72 | | return (p - s); | 73 | | /* False positive */ | 74 | | p++; | 75 | | if (p - s1 > MEMCHR_CUT_OFF) | 76 | | continue; | 77 | | if (e - p <= MEMCHR_CUT_OFF) | 78 | | break; | 79 | | e1 = p + MEMCHR_CUT_OFF; | 80 | | while (p != e1) { | 81 | | if (*p == ch) | 82 | | return (p - s); | 83 | | p++; | 84 | | } | 85 | | } | 86 | | while (e - p > MEMCHR_CUT_OFF); | 87 | | } | 88 | | #endif | 89 | 88 | } | 90 | 0 | while (p < e) { | 91 | 0 | if (*p == ch) | 92 | 0 | return (p - s); | 93 | 0 | p++; | 94 | 0 | } | 95 | 0 | return -1; | 96 | 0 | } |
|
97 | | |
98 | | Py_LOCAL_INLINE(Py_ssize_t) |
99 | | STRINGLIB(rfind_char)(const STRINGLIB_CHAR* s, Py_ssize_t n, STRINGLIB_CHAR ch) |
100 | 2.29k | { |
101 | 2.29k | const STRINGLIB_CHAR *p; |
102 | 2.29k | #ifdef HAVE_MEMRCHR |
103 | | /* memrchr() is a GNU extension, available since glibc 2.1.91. |
104 | | it doesn't seem as optimized as memchr(), but is still quite |
105 | | faster than our hand-written loop below */ |
106 | | |
107 | 2.29k | if (n > MEMCHR_CUT_OFF) { |
108 | | #if STRINGLIB_SIZEOF_CHAR == 1 |
109 | | p = memrchr(s, ch, n); |
110 | 864 | if (p != NULL) |
111 | 780 | return (p - s); |
112 | 84 | return -1; |
113 | | #else |
114 | | /* use memrchr if we can choose a needle without too many likely |
115 | | false positives */ |
116 | | const STRINGLIB_CHAR *s1; |
117 | | Py_ssize_t n1; |
118 | | unsigned char needle = ch & 0xff; |
119 | | /* If looking for a multiple of 256, we'd have too |
120 | | many false positives looking for the '\0' byte in UCS2 |
121 | | and UCS4 representations. */ |
122 | 0 | if (needle != 0) { |
123 | 0 | do { |
124 | 0 | void *candidate = memrchr(s, needle, |
125 | 0 | n * sizeof(STRINGLIB_CHAR)); |
126 | 0 | if (candidate == NULL) |
127 | 0 | return -1; |
128 | 0 | n1 = n; |
129 | 0 | p = (const STRINGLIB_CHAR *) |
130 | 0 | _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR)); |
131 | 0 | n = p - s; |
132 | 0 | if (*p == ch) |
133 | 0 | return n; |
134 | | /* False positive */ |
135 | 0 | if (n1 - n > MEMCHR_CUT_OFF) |
136 | 0 | continue; |
137 | 0 | if (n <= MEMCHR_CUT_OFF) |
138 | 0 | break; |
139 | 0 | s1 = p - MEMCHR_CUT_OFF; |
140 | 0 | while (p > s1) { |
141 | 0 | p--; |
142 | 0 | if (*p == ch) |
143 | 0 | return (p - s); |
144 | 0 | } |
145 | 0 | n = p - s; |
146 | 0 | } |
147 | 0 | while (n > MEMCHR_CUT_OFF); |
148 | 0 | } |
149 | | #endif |
150 | 864 | } |
151 | 1.42k | #endif /* HAVE_MEMRCHR */ |
152 | 1.42k | p = s + n; |
153 | 8.61k | while (p > s) { |
154 | 7.74k | p--; |
155 | 7.74k | if (*p == ch) |
156 | 556 | return (p - s); |
157 | 7.74k | } |
158 | 870 | return -1; |
159 | 1.42k | } Unexecuted instantiation: bytearrayobject.c:stringlib_rfind_char Unexecuted instantiation: bytesobject.c:stringlib_rfind_char unicodeobject.c:ucs1lib_rfind_char Line | Count | Source | 100 | 84 | { | 101 | 84 | const STRINGLIB_CHAR *p; | 102 | 84 | #ifdef HAVE_MEMRCHR | 103 | | /* memrchr() is a GNU extension, available since glibc 2.1.91. | 104 | | it doesn't seem as optimized as memchr(), but is still quite | 105 | | faster than our hand-written loop below */ | 106 | | | 107 | 84 | if (n > MEMCHR_CUT_OFF) { | 108 | 42 | #if STRINGLIB_SIZEOF_CHAR == 1 | 109 | 42 | p = memrchr(s, ch, n); | 110 | 42 | if (p != NULL) | 111 | 42 | return (p - s); | 112 | 0 | return -1; | 113 | | #else | 114 | | /* use memrchr if we can choose a needle without too many likely | 115 | | false positives */ | 116 | | const STRINGLIB_CHAR *s1; | 117 | | Py_ssize_t n1; | 118 | | unsigned char needle = ch & 0xff; | 119 | | /* If looking for a multiple of 256, we'd have too | 120 | | many false positives looking for the '\0' byte in UCS2 | 121 | | and UCS4 representations. */ | 122 | | if (needle != 0) { | 123 | | do { | 124 | | void *candidate = memrchr(s, needle, | 125 | | n * sizeof(STRINGLIB_CHAR)); | 126 | | if (candidate == NULL) | 127 | | return -1; | 128 | | n1 = n; | 129 | | p = (const STRINGLIB_CHAR *) | 130 | | _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR)); | 131 | | n = p - s; | 132 | | if (*p == ch) | 133 | | return n; | 134 | | /* False positive */ | 135 | | if (n1 - n > MEMCHR_CUT_OFF) | 136 | | continue; | 137 | | if (n <= MEMCHR_CUT_OFF) | 138 | | break; | 139 | | s1 = p - MEMCHR_CUT_OFF; | 140 | | while (p > s1) { | 141 | | p--; | 142 | | if (*p == ch) | 143 | | return (p - s); | 144 | | } | 145 | | n = p - s; | 146 | | } | 147 | | while (n > MEMCHR_CUT_OFF); | 148 | | } | 149 | | #endif | 150 | 42 | } | 151 | 42 | #endif /* HAVE_MEMRCHR */ | 152 | 42 | p = s + n; | 153 | 252 | while (p > s) { | 154 | 238 | p--; | 155 | 238 | if (*p == ch) | 156 | 28 | return (p - s); | 157 | 238 | } | 158 | 14 | return -1; | 159 | 42 | } |
Unexecuted instantiation: unicodeobject.c:ucs2lib_rfind_char Unexecuted instantiation: unicodeobject.c:ucs4lib_rfind_char unicodeobject.c:asciilib_rfind_char Line | Count | Source | 100 | 2.20k | { | 101 | 2.20k | const STRINGLIB_CHAR *p; | 102 | 2.20k | #ifdef HAVE_MEMRCHR | 103 | | /* memrchr() is a GNU extension, available since glibc 2.1.91. | 104 | | it doesn't seem as optimized as memchr(), but is still quite | 105 | | faster than our hand-written loop below */ | 106 | | | 107 | 2.20k | if (n > MEMCHR_CUT_OFF) { | 108 | 822 | #if STRINGLIB_SIZEOF_CHAR == 1 | 109 | 822 | p = memrchr(s, ch, n); | 110 | 822 | if (p != NULL) | 111 | 738 | return (p - s); | 112 | 84 | return -1; | 113 | | #else | 114 | | /* use memrchr if we can choose a needle without too many likely | 115 | | false positives */ | 116 | | const STRINGLIB_CHAR *s1; | 117 | | Py_ssize_t n1; | 118 | | unsigned char needle = ch & 0xff; | 119 | | /* If looking for a multiple of 256, we'd have too | 120 | | many false positives looking for the '\0' byte in UCS2 | 121 | | and UCS4 representations. */ | 122 | | if (needle != 0) { | 123 | | do { | 124 | | void *candidate = memrchr(s, needle, | 125 | | n * sizeof(STRINGLIB_CHAR)); | 126 | | if (candidate == NULL) | 127 | | return -1; | 128 | | n1 = n; | 129 | | p = (const STRINGLIB_CHAR *) | 130 | | _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR)); | 131 | | n = p - s; | 132 | | if (*p == ch) | 133 | | return n; | 134 | | /* False positive */ | 135 | | if (n1 - n > MEMCHR_CUT_OFF) | 136 | | continue; | 137 | | if (n <= MEMCHR_CUT_OFF) | 138 | | break; | 139 | | s1 = p - MEMCHR_CUT_OFF; | 140 | | while (p > s1) { | 141 | | p--; | 142 | | if (*p == ch) | 143 | | return (p - s); | 144 | | } | 145 | | n = p - s; | 146 | | } | 147 | | while (n > MEMCHR_CUT_OFF); | 148 | | } | 149 | | #endif | 150 | 822 | } | 151 | 1.38k | #endif /* HAVE_MEMRCHR */ | 152 | 1.38k | p = s + n; | 153 | 8.36k | while (p > s) { | 154 | 7.50k | p--; | 155 | 7.50k | if (*p == ch) | 156 | 528 | return (p - s); | 157 | 7.50k | } | 158 | 856 | return -1; | 159 | 1.38k | } |
Unexecuted instantiation: unicodeobject.c:stringlib_rfind_char Unexecuted instantiation: bytes_methods.c:stringlib_rfind_char |
160 | | |
161 | | #undef MEMCHR_CUT_OFF |
162 | | |
163 | | Py_LOCAL_INLINE(Py_ssize_t) |
164 | | FASTSEARCH(const STRINGLIB_CHAR* s, Py_ssize_t n, |
165 | | const STRINGLIB_CHAR* p, Py_ssize_t m, |
166 | | Py_ssize_t maxcount, int mode) |
167 | 2.26k | { |
168 | 2.26k | unsigned long mask; |
169 | 2.26k | Py_ssize_t skip, count = 0; |
170 | 2.26k | Py_ssize_t i, j, mlast, w; |
171 | | |
172 | 2.26k | w = n - m; |
173 | | |
174 | 2.26k | if (w < 0 || (mode == FAST_COUNT && maxcount == 0)) |
175 | 0 | return -1; |
176 | | |
177 | | /* look for special cases */ |
178 | 2.26k | if (m <= 1) { |
179 | 2.22k | if (m <= 0) |
180 | 0 | return -1; |
181 | | /* use special case for 1-character strings */ |
182 | 2.22k | if (mode == FAST_SEARCH) |
183 | 18 | return STRINGLIB(find_char)(s, n, p[0]); |
184 | 2.20k | else if (mode == FAST_RSEARCH) |
185 | 2.20k | return STRINGLIB(rfind_char)(s, n, p[0]); |
186 | 2 | else { /* FAST_COUNT */ |
187 | 85 | for (i = 0; i < n; i++) |
188 | 83 | if (s[i] == p[0]) { |
189 | 18 | count++; |
190 | 18 | if (count == maxcount) |
191 | 0 | return maxcount; |
192 | 18 | } |
193 | 2 | return count; |
194 | 2 | } |
195 | 2.22k | } |
196 | | |
197 | 35 | mlast = m - 1; |
198 | 35 | skip = mlast - 1; |
199 | 35 | mask = 0; |
200 | | |
201 | 35 | if (mode != FAST_RSEARCH) { |
202 | 35 | const STRINGLIB_CHAR *ss = s + m - 1; |
203 | 35 | const STRINGLIB_CHAR *pp = p + m - 1; |
204 | | |
205 | | /* create compressed boyer-moore delta 1 table */ |
206 | | |
207 | | /* process pattern[:-1] */ |
208 | 182 | for (i = 0; i < mlast; i++) { |
209 | 147 | STRINGLIB_BLOOM_ADD(mask, p[i]); |
210 | 147 | if (p[i] == p[mlast]) |
211 | 0 | skip = mlast - i - 1; |
212 | 147 | } |
213 | | /* process pattern[-1] outside the loop */ |
214 | 35 | STRINGLIB_BLOOM_ADD(mask, p[mlast]); |
215 | | |
216 | 126 | for (i = 0; i <= w; i++) { |
217 | | /* note: using mlast in the skip path slows things down on x86 */ |
218 | 91 | if (ss[i] == pp[0]) { |
219 | | /* candidate match */ |
220 | 29 | for (j = 0; j < mlast; j++) |
221 | 29 | if (s[i+j] != p[j]) |
222 | 29 | break; |
223 | 29 | if (j == mlast) { |
224 | | /* got a match! */ |
225 | 0 | if (mode != FAST_COUNT) |
226 | 0 | return i; |
227 | 0 | count++; |
228 | 0 | if (count == maxcount) |
229 | 0 | return maxcount; |
230 | 0 | i = i + mlast; |
231 | 0 | continue; |
232 | 0 | } |
233 | | /* miss: check if next character is part of pattern */ |
234 | 29 | if (!STRINGLIB_BLOOM(mask, ss[i+1])) |
235 | 1 | i = i + m; |
236 | 28 | else |
237 | 28 | i = i + skip; |
238 | 62 | } else { |
239 | | /* skip: check if next character is part of pattern */ |
240 | 62 | if (!STRINGLIB_BLOOM(mask, ss[i+1])) |
241 | 34 | i = i + m; |
242 | 62 | } |
243 | 91 | } |
244 | 35 | } else { /* FAST_RSEARCH */ |
245 | | |
246 | | /* create compressed boyer-moore delta 1 table */ |
247 | | |
248 | | /* process pattern[0] outside the loop */ |
249 | 0 | STRINGLIB_BLOOM_ADD(mask, p[0]); |
250 | | /* process pattern[:0:-1] */ |
251 | 0 | for (i = mlast; i > 0; i--) { |
252 | 0 | STRINGLIB_BLOOM_ADD(mask, p[i]); |
253 | 0 | if (p[i] == p[0]) |
254 | 0 | skip = i - 1; |
255 | 0 | } |
256 | |
|
257 | 0 | for (i = w; i >= 0; i--) { |
258 | 0 | if (s[i] == p[0]) { |
259 | | /* candidate match */ |
260 | 0 | for (j = mlast; j > 0; j--) |
261 | 0 | if (s[i+j] != p[j]) |
262 | 0 | break; |
263 | 0 | if (j == 0) |
264 | | /* got a match! */ |
265 | 0 | return i; |
266 | | /* miss: check if previous character is part of pattern */ |
267 | 0 | if (i > 0 && !STRINGLIB_BLOOM(mask, s[i-1])) |
268 | 0 | i = i - m; |
269 | 0 | else |
270 | 0 | i = i - skip; |
271 | 0 | } else { |
272 | | /* skip: check if previous character is part of pattern */ |
273 | 0 | if (i > 0 && !STRINGLIB_BLOOM(mask, s[i-1])) |
274 | 0 | i = i - m; |
275 | 0 | } |
276 | 0 | } |
277 | 0 | } |
278 | | |
279 | 35 | if (mode != FAST_COUNT) |
280 | 35 | return -1; |
281 | 0 | return count; |
282 | 35 | } Unexecuted instantiation: bytearrayobject.c:fastsearch Unexecuted instantiation: bytesobject.c:fastsearch unicodeobject.c:asciilib_fastsearch Line | Count | Source | 167 | 2.24k | { | 168 | 2.24k | unsigned long mask; | 169 | 2.24k | Py_ssize_t skip, count = 0; | 170 | 2.24k | Py_ssize_t i, j, mlast, w; | 171 | | | 172 | 2.24k | w = n - m; | 173 | | | 174 | 2.24k | if (w < 0 || (mode == FAST_COUNT && maxcount == 0)) | 175 | 0 | return -1; | 176 | | | 177 | | /* look for special cases */ | 178 | 2.24k | if (m <= 1) { | 179 | 2.22k | if (m <= 0) | 180 | 0 | return -1; | 181 | | /* use special case for 1-character strings */ | 182 | 2.22k | if (mode == FAST_SEARCH) | 183 | 18 | return STRINGLIB(find_char)(s, n, p[0]); | 184 | 2.20k | else if (mode == FAST_RSEARCH) | 185 | 2.20k | return STRINGLIB(rfind_char)(s, n, p[0]); | 186 | 2 | else { /* FAST_COUNT */ | 187 | 85 | for (i = 0; i < n; i++) | 188 | 83 | if (s[i] == p[0]) { | 189 | 18 | count++; | 190 | 18 | if (count == maxcount) | 191 | 0 | return maxcount; | 192 | 18 | } | 193 | 2 | return count; | 194 | 2 | } | 195 | 2.22k | } | 196 | | | 197 | 14 | mlast = m - 1; | 198 | 14 | skip = mlast - 1; | 199 | 14 | mask = 0; | 200 | | | 201 | 14 | if (mode != FAST_RSEARCH) { | 202 | 14 | const STRINGLIB_CHAR *ss = s + m - 1; | 203 | 14 | const STRINGLIB_CHAR *pp = p + m - 1; | 204 | | | 205 | | /* create compressed boyer-moore delta 1 table */ | 206 | | | 207 | | /* process pattern[:-1] */ | 208 | 140 | for (i = 0; i < mlast; i++) { | 209 | 126 | STRINGLIB_BLOOM_ADD(mask, p[i]); | 210 | 126 | if (p[i] == p[mlast]) | 211 | 0 | skip = mlast - i - 1; | 212 | 126 | } | 213 | | /* process pattern[-1] outside the loop */ | 214 | 14 | STRINGLIB_BLOOM_ADD(mask, p[mlast]); | 215 | | | 216 | 84 | for (i = 0; i <= w; i++) { | 217 | | /* note: using mlast in the skip path slows things down on x86 */ | 218 | 70 | if (ss[i] == pp[0]) { | 219 | | /* candidate match */ | 220 | 28 | for (j = 0; j < mlast; j++) | 221 | 28 | if (s[i+j] != p[j]) | 222 | 28 | break; | 223 | 28 | if (j == mlast) { | 224 | | /* got a match! */ | 225 | 0 | if (mode != FAST_COUNT) | 226 | 0 | return i; | 227 | 0 | count++; | 228 | 0 | if (count == maxcount) | 229 | 0 | return maxcount; | 230 | 0 | i = i + mlast; | 231 | 0 | continue; | 232 | 0 | } | 233 | | /* miss: check if next character is part of pattern */ | 234 | 28 | if (!STRINGLIB_BLOOM(mask, ss[i+1])) | 235 | 0 | i = i + m; | 236 | 28 | else | 237 | 28 | i = i + skip; | 238 | 42 | } else { | 239 | | /* skip: check if next character is part of pattern */ | 240 | 42 | if (!STRINGLIB_BLOOM(mask, ss[i+1])) | 241 | 14 | i = i + m; | 242 | 42 | } | 243 | 70 | } | 244 | 14 | } else { /* FAST_RSEARCH */ | 245 | | | 246 | | /* create compressed boyer-moore delta 1 table */ | 247 | | | 248 | | /* process pattern[0] outside the loop */ | 249 | 0 | STRINGLIB_BLOOM_ADD(mask, p[0]); | 250 | | /* process pattern[:0:-1] */ | 251 | 0 | for (i = mlast; i > 0; i--) { | 252 | 0 | STRINGLIB_BLOOM_ADD(mask, p[i]); | 253 | 0 | if (p[i] == p[0]) | 254 | 0 | skip = i - 1; | 255 | 0 | } | 256 | |
| 257 | 0 | for (i = w; i >= 0; i--) { | 258 | 0 | if (s[i] == p[0]) { | 259 | | /* candidate match */ | 260 | 0 | for (j = mlast; j > 0; j--) | 261 | 0 | if (s[i+j] != p[j]) | 262 | 0 | break; | 263 | 0 | if (j == 0) | 264 | | /* got a match! */ | 265 | 0 | return i; | 266 | | /* miss: check if previous character is part of pattern */ | 267 | 0 | if (i > 0 && !STRINGLIB_BLOOM(mask, s[i-1])) | 268 | 0 | i = i - m; | 269 | 0 | else | 270 | 0 | i = i - skip; | 271 | 0 | } else { | 272 | | /* skip: check if previous character is part of pattern */ | 273 | 0 | if (i > 0 && !STRINGLIB_BLOOM(mask, s[i-1])) | 274 | 0 | i = i - m; | 275 | 0 | } | 276 | 0 | } | 277 | 0 | } | 278 | | | 279 | 14 | if (mode != FAST_COUNT) | 280 | 14 | return -1; | 281 | 0 | return count; | 282 | 14 | } |
unicodeobject.c:ucs1lib_fastsearch Line | Count | Source | 167 | 21 | { | 168 | 21 | unsigned long mask; | 169 | 21 | Py_ssize_t skip, count = 0; | 170 | 21 | Py_ssize_t i, j, mlast, w; | 171 | | | 172 | 21 | w = n - m; | 173 | | | 174 | 21 | if (w < 0 || (mode == FAST_COUNT && maxcount == 0)) | 175 | 0 | return -1; | 176 | | | 177 | | /* look for special cases */ | 178 | 21 | if (m <= 1) { | 179 | 0 | if (m <= 0) | 180 | 0 | return -1; | 181 | | /* use special case for 1-character strings */ | 182 | 0 | if (mode == FAST_SEARCH) | 183 | 0 | return STRINGLIB(find_char)(s, n, p[0]); | 184 | 0 | else if (mode == FAST_RSEARCH) | 185 | 0 | return STRINGLIB(rfind_char)(s, n, p[0]); | 186 | 0 | else { /* FAST_COUNT */ | 187 | 0 | for (i = 0; i < n; i++) | 188 | 0 | if (s[i] == p[0]) { | 189 | 0 | count++; | 190 | 0 | if (count == maxcount) | 191 | 0 | return maxcount; | 192 | 0 | } | 193 | 0 | return count; | 194 | 0 | } | 195 | 0 | } | 196 | | | 197 | 21 | mlast = m - 1; | 198 | 21 | skip = mlast - 1; | 199 | 21 | mask = 0; | 200 | | | 201 | 21 | if (mode != FAST_RSEARCH) { | 202 | 21 | const STRINGLIB_CHAR *ss = s + m - 1; | 203 | 21 | const STRINGLIB_CHAR *pp = p + m - 1; | 204 | | | 205 | | /* create compressed boyer-moore delta 1 table */ | 206 | | | 207 | | /* process pattern[:-1] */ | 208 | 42 | for (i = 0; i < mlast; i++) { | 209 | 21 | STRINGLIB_BLOOM_ADD(mask, p[i]); | 210 | 21 | if (p[i] == p[mlast]) | 211 | 0 | skip = mlast - i - 1; | 212 | 21 | } | 213 | | /* process pattern[-1] outside the loop */ | 214 | 21 | STRINGLIB_BLOOM_ADD(mask, p[mlast]); | 215 | | | 216 | 42 | for (i = 0; i <= w; i++) { | 217 | | /* note: using mlast in the skip path slows things down on x86 */ | 218 | 21 | if (ss[i] == pp[0]) { | 219 | | /* candidate match */ | 220 | 1 | for (j = 0; j < mlast; j++) | 221 | 1 | if (s[i+j] != p[j]) | 222 | 1 | break; | 223 | 1 | if (j == mlast) { | 224 | | /* got a match! */ | 225 | 0 | if (mode != FAST_COUNT) | 226 | 0 | return i; | 227 | 0 | count++; | 228 | 0 | if (count == maxcount) | 229 | 0 | return maxcount; | 230 | 0 | i = i + mlast; | 231 | 0 | continue; | 232 | 0 | } | 233 | | /* miss: check if next character is part of pattern */ | 234 | 1 | if (!STRINGLIB_BLOOM(mask, ss[i+1])) | 235 | 1 | i = i + m; | 236 | 0 | else | 237 | 0 | i = i + skip; | 238 | 20 | } else { | 239 | | /* skip: check if next character is part of pattern */ | 240 | 20 | if (!STRINGLIB_BLOOM(mask, ss[i+1])) | 241 | 20 | i = i + m; | 242 | 20 | } | 243 | 21 | } | 244 | 21 | } else { /* FAST_RSEARCH */ | 245 | | | 246 | | /* create compressed boyer-moore delta 1 table */ | 247 | | | 248 | | /* process pattern[0] outside the loop */ | 249 | 0 | STRINGLIB_BLOOM_ADD(mask, p[0]); | 250 | | /* process pattern[:0:-1] */ | 251 | 0 | for (i = mlast; i > 0; i--) { | 252 | 0 | STRINGLIB_BLOOM_ADD(mask, p[i]); | 253 | 0 | if (p[i] == p[0]) | 254 | 0 | skip = i - 1; | 255 | 0 | } | 256 | |
| 257 | 0 | for (i = w; i >= 0; i--) { | 258 | 0 | if (s[i] == p[0]) { | 259 | | /* candidate match */ | 260 | 0 | for (j = mlast; j > 0; j--) | 261 | 0 | if (s[i+j] != p[j]) | 262 | 0 | break; | 263 | 0 | if (j == 0) | 264 | | /* got a match! */ | 265 | 0 | return i; | 266 | | /* miss: check if previous character is part of pattern */ | 267 | 0 | if (i > 0 && !STRINGLIB_BLOOM(mask, s[i-1])) | 268 | 0 | i = i - m; | 269 | 0 | else | 270 | 0 | i = i - skip; | 271 | 0 | } else { | 272 | | /* skip: check if previous character is part of pattern */ | 273 | 0 | if (i > 0 && !STRINGLIB_BLOOM(mask, s[i-1])) | 274 | 0 | i = i - m; | 275 | 0 | } | 276 | 0 | } | 277 | 0 | } | 278 | | | 279 | 21 | if (mode != FAST_COUNT) | 280 | 21 | return -1; | 281 | 0 | return count; | 282 | 21 | } |
Unexecuted instantiation: unicodeobject.c:ucs2lib_fastsearch Unexecuted instantiation: unicodeobject.c:ucs4lib_fastsearch Unexecuted instantiation: unicodeobject.c:fastsearch Unexecuted instantiation: bytes_methods.c:fastsearch |
283 | | |