/src/Python-3.8.3/Objects/stringlib/find_max_char.h
Line  | Count  | Source  | 
1  |  | /* Finding the optimal width of unicode characters in a buffer */  | 
2  |  |  | 
3  |  | #if !STRINGLIB_IS_UNICODE  | 
4  |  | # error "find_max_char.h is specific to Unicode"  | 
5  |  | #endif  | 
6  |  |  | 
7  |  | /* Mask to quickly check whether a C 'long' contains a  | 
8  |  |    non-ASCII, UTF8-encoded char. */  | 
9  |  | #if (SIZEOF_LONG == 8)  | 
10  | 166k  | # define UCS1_ASCII_CHAR_MASK 0x8080808080808080UL  | 
11  |  | #elif (SIZEOF_LONG == 4)  | 
12  |  | # define UCS1_ASCII_CHAR_MASK 0x80808080UL  | 
13  |  | #else  | 
14  |  | # error C 'long' size should be either 4 or 8!  | 
15  |  | #endif  | 
16  |  |  | 
17  |  | #if STRINGLIB_SIZEOF_CHAR == 1  | 
18  |  |  | 
19  |  | Py_LOCAL_INLINE(Py_UCS4)  | 
20  |  | STRINGLIB(find_max_char)(const STRINGLIB_CHAR *begin, const STRINGLIB_CHAR *end)  | 
21  | 57.9k  | { | 
22  | 57.9k  |     const unsigned char *p = (const unsigned char *) begin;  | 
23  | 57.9k  |     const unsigned char *aligned_end =  | 
24  | 57.9k  |             (const unsigned char *) _Py_ALIGN_DOWN(end, SIZEOF_LONG);  | 
25  |  |  | 
26  | 425k  |     while (p < end) { | 
27  | 371k  |         if (_Py_IS_ALIGNED(p, SIZEOF_LONG)) { | 
28  |  |             /* Help register allocation */  | 
29  | 50.4k  |             const unsigned char *_p = p;  | 
30  | 217k  |             while (_p < aligned_end) { | 
31  | 166k  |                 unsigned long value = *(const unsigned long *) _p;  | 
32  | 166k  |                 if (value & UCS1_ASCII_CHAR_MASK)  | 
33  | 0  |                     return 255;  | 
34  | 166k  |                 _p += SIZEOF_LONG;  | 
35  | 166k  |             }  | 
36  | 50.4k  |             p = _p;  | 
37  | 50.4k  |             if (p == end)  | 
38  | 4.79k  |                 break;  | 
39  | 50.4k  |         }  | 
40  | 367k  |         if (*p++ & 0x80)  | 
41  | 0  |             return 255;  | 
42  | 367k  |     }  | 
43  | 57.9k  |     return 127;  | 
44  | 57.9k  | } unicodeobject.c:ucs1lib_find_max_char Line  | Count  | Source  |  21  | 57.9k  | { |  22  | 57.9k  |     const unsigned char *p = (const unsigned char *) begin;  |  23  | 57.9k  |     const unsigned char *aligned_end =  |  24  | 57.9k  |             (const unsigned char *) _Py_ALIGN_DOWN(end, SIZEOF_LONG);  |  25  |  |  |  26  | 425k  |     while (p < end) { |  27  | 371k  |         if (_Py_IS_ALIGNED(p, SIZEOF_LONG)) { |  28  |  |             /* Help register allocation */  |  29  | 50.4k  |             const unsigned char *_p = p;  |  30  | 217k  |             while (_p < aligned_end) { |  31  | 166k  |                 unsigned long value = *(const unsigned long *) _p;  |  32  | 166k  |                 if (value & UCS1_ASCII_CHAR_MASK)  |  33  | 0  |                     return 255;  |  34  | 166k  |                 _p += SIZEOF_LONG;  |  35  | 166k  |             }  |  36  | 50.4k  |             p = _p;  |  37  | 50.4k  |             if (p == end)  |  38  | 4.79k  |                 break;  |  39  | 50.4k  |         }  |  40  | 367k  |         if (*p++ & 0x80)  |  41  | 0  |             return 255;  |  42  | 367k  |     }  |  43  | 57.9k  |     return 127;  |  44  | 57.9k  | }  |  
 Unexecuted instantiation: unicodeobject.c:asciilib_find_max_char  | 
45  |  |  | 
46  |  | #undef ASCII_CHAR_MASK  | 
47  |  |  | 
48  |  | #else /* STRINGLIB_SIZEOF_CHAR == 1 */  | 
49  |  |  | 
50  | 0  | #define MASK_ASCII 0xFFFFFF80  | 
51  | 0  | #define MASK_UCS1 0xFFFFFF00  | 
52  | 0  | #define MASK_UCS2 0xFFFF0000  | 
53  |  |  | 
54  | 0  | #define MAX_CHAR_ASCII 0x7f  | 
55  | 0  | #define MAX_CHAR_UCS1  0xff  | 
56  | 0  | #define MAX_CHAR_UCS2  0xffff  | 
57  | 0  | #define MAX_CHAR_UCS4  0x10ffff  | 
58  |  |  | 
59  |  | Py_LOCAL_INLINE(Py_UCS4)  | 
60  |  | STRINGLIB(find_max_char)(const STRINGLIB_CHAR *begin, const STRINGLIB_CHAR *end)  | 
61  | 0  | { | 
62  |  | #if STRINGLIB_SIZEOF_CHAR == 2  | 
63  | 0  |     const Py_UCS4 mask_limit = MASK_UCS1;  | 
64  | 0  |     const Py_UCS4 max_char_limit = MAX_CHAR_UCS2;  | 
65  |  | #elif STRINGLIB_SIZEOF_CHAR == 4  | 
66  | 0  |     const Py_UCS4 mask_limit = MASK_UCS2;  | 
67  | 0  |     const Py_UCS4 max_char_limit = MAX_CHAR_UCS4;  | 
68  |  | #else  | 
69  |  | #error Invalid STRINGLIB_SIZEOF_CHAR (must be 1, 2 or 4)  | 
70  |  | #endif  | 
71  | 0  |     Py_UCS4 mask;  | 
72  | 0  |     Py_ssize_t n = end - begin;  | 
73  | 0  |     const STRINGLIB_CHAR *p = begin;  | 
74  | 0  |     const STRINGLIB_CHAR *unrolled_end = begin + _Py_SIZE_ROUND_DOWN(n, 4);  | 
75  | 0  |     Py_UCS4 max_char;  | 
76  |  | 
  | 
77  | 0  |     max_char = MAX_CHAR_ASCII;  | 
78  | 0  |     mask = MASK_ASCII;  | 
79  | 0  |     while (p < unrolled_end) { | 
80  | 0  |         STRINGLIB_CHAR bits = p[0] | p[1] | p[2] | p[3];  | 
81  | 0  |         if (bits & mask) { | 
82  | 0  |             if (mask == mask_limit) { | 
83  |  |                 /* Limit reached */  | 
84  | 0  |                 return max_char_limit;  | 
85  | 0  |             }  | 
86  | 0  |             if (mask == MASK_ASCII) { | 
87  | 0  |                 max_char = MAX_CHAR_UCS1;  | 
88  | 0  |                 mask = MASK_UCS1;  | 
89  | 0  |             }  | 
90  | 0  |             else { | 
91  |  |                 /* mask can't be MASK_UCS2 because of mask_limit above */  | 
92  | 0  |                 assert(mask == MASK_UCS1);  | 
93  | 0  |                 max_char = MAX_CHAR_UCS2;  | 
94  | 0  |                 mask = MASK_UCS2;  | 
95  | 0  |             }  | 
96  |  |             /* We check the new mask on the same chars in the next iteration */  | 
97  | 0  |             continue;  | 
98  | 0  |         }  | 
99  | 0  |         p += 4;  | 
100  | 0  |     }  | 
101  | 0  |     while (p < end) { | 
102  | 0  |         if (p[0] & mask) { | 
103  | 0  |             if (mask == mask_limit) { | 
104  |  |                 /* Limit reached */  | 
105  | 0  |                 return max_char_limit;  | 
106  | 0  |             }  | 
107  | 0  |             if (mask == MASK_ASCII) { | 
108  | 0  |                 max_char = MAX_CHAR_UCS1;  | 
109  | 0  |                 mask = MASK_UCS1;  | 
110  | 0  |             }  | 
111  | 0  |             else { | 
112  |  |                 /* mask can't be MASK_UCS2 because of mask_limit above */  | 
113  | 0  |                 assert(mask == MASK_UCS1);  | 
114  | 0  |                 max_char = MAX_CHAR_UCS2;  | 
115  | 0  |                 mask = MASK_UCS2;  | 
116  | 0  |             }  | 
117  |  |             /* We check the new mask on the same chars in the next iteration */  | 
118  | 0  |             continue;  | 
119  | 0  |         }  | 
120  | 0  |         p++;  | 
121  | 0  |     }  | 
122  | 0  |     return max_char;  | 
123  | 0  | } Unexecuted instantiation: unicodeobject.c:ucs2lib_find_max_char Unexecuted instantiation: unicodeobject.c:ucs4lib_find_max_char  | 
124  |  |  | 
125  |  | #undef MASK_ASCII  | 
126  |  | #undef MASK_UCS1  | 
127  |  | #undef MASK_UCS2  | 
128  |  | #undef MAX_CHAR_ASCII  | 
129  |  | #undef MAX_CHAR_UCS1  | 
130  |  | #undef MAX_CHAR_UCS2  | 
131  |  | #undef MAX_CHAR_UCS4  | 
132  |  |  | 
133  |  | #endif /* STRINGLIB_SIZEOF_CHAR == 1 */  | 
134  |  |  |