/src/tidy-html5/src/tmbstr.c
Line | Count | Source |
1 | | /* tmbstr.c -- Tidy string utility functions |
2 | | |
3 | | (c) 1998-2006 (W3C) MIT, ERCIM, Keio University |
4 | | See tidy.h for the copyright notice. |
5 | | |
6 | | */ |
7 | | |
8 | | #include "forward.h" |
9 | | #include "tmbstr.h" |
10 | | #include "lexer.h" |
11 | | |
12 | | /* like strdup but using an allocator */ |
13 | | tmbstr TY_(tmbstrdup)( TidyAllocator *allocator, ctmbstr str ) |
14 | 5.72M | { |
15 | 5.72M | tmbstr s = NULL; |
16 | 5.72M | if ( str ) |
17 | 5.06M | { |
18 | 5.06M | uint len = TY_(tmbstrlen)( str ); |
19 | 5.06M | tmbstr cp = s = (tmbstr) TidyAlloc( allocator, 1+len ); |
20 | 92.3M | while ( 0 != (*cp++ = *str++) ) |
21 | 87.2M | /**/; |
22 | 5.06M | } |
23 | 5.72M | return s; |
24 | 5.72M | } |
25 | | |
26 | | /* like strndup but using an allocator */ |
27 | | tmbstr TY_(tmbstrndup)( TidyAllocator *allocator, ctmbstr str, uint len ) |
28 | 4.02M | { |
29 | 4.02M | tmbstr s = NULL; |
30 | 4.02M | if ( str && len > 0 ) |
31 | 3.75M | { |
32 | 3.75M | tmbstr cp = s = (tmbstr) TidyAlloc( allocator, 1+len ); |
33 | 53.0M | while ( len-- > 0 && (*cp++ = *str++) ) |
34 | 49.3M | /**/; |
35 | 3.75M | *cp = 0; |
36 | 3.75M | } |
37 | 4.02M | return s; |
38 | 4.02M | } |
39 | | |
40 | | /* exactly same as strncpy */ |
41 | | uint TY_(tmbstrncpy)( tmbstr s1, ctmbstr s2, uint size ) |
42 | 0 | { |
43 | 0 | if ( s1 != NULL && s2 != NULL ) |
44 | 0 | { |
45 | 0 | tmbstr cp = s1; |
46 | 0 | while ( *s2 && --size ) /* Predecrement: reserve byte */ |
47 | 0 | *cp++ = *s2++; /* for NULL terminator. */ |
48 | 0 | *cp = 0; |
49 | 0 | } |
50 | 0 | return size; |
51 | 0 | } |
52 | | |
53 | | /* Allows expressions like: cp += tmbstrcpy( cp, "joebob" ); |
54 | | */ |
55 | | uint TY_(tmbstrcpy)( tmbstr s1, ctmbstr s2 ) |
56 | 2.61k | { |
57 | 2.61k | uint ncpy = 0; |
58 | 692k | while (0 != (*s1++ = *s2++) ) |
59 | 689k | ++ncpy; |
60 | 2.61k | return ncpy; |
61 | 2.61k | } |
62 | | |
63 | | /* Allows expressions like: cp += tmbstrcat( cp, "joebob" ); |
64 | | */ |
65 | | uint TY_(tmbstrcat)( tmbstr s1, ctmbstr s2 ) |
66 | 7.26k | { |
67 | 7.26k | uint ncpy = 0; |
68 | 1.10M | while ( *s1 ) |
69 | 1.09M | ++s1; |
70 | | |
71 | 18.6k | while (0 != (*s1++ = *s2++) ) |
72 | 11.3k | ++ncpy; |
73 | 7.26k | return ncpy; |
74 | 7.26k | } |
75 | | |
76 | | /* exactly same as strcmp */ |
77 | | int TY_(tmbstrcmp)( ctmbstr s1, ctmbstr s2 ) |
78 | 127M | { |
79 | 127M | int c; |
80 | 164M | while ((c = *s1) == *s2) |
81 | 39.6M | { |
82 | 39.6M | if (c == '\0') |
83 | 2.45M | return 0; |
84 | | |
85 | 37.2M | ++s1; |
86 | 37.2M | ++s2; |
87 | 37.2M | } |
88 | | |
89 | 124M | return (*s1 > *s2 ? 1 : -1); |
90 | 127M | } |
91 | | |
92 | | /* returns byte count, not char count */ |
93 | | uint TY_(tmbstrlen)( ctmbstr str ) |
94 | 44.6M | { |
95 | 44.6M | uint len = 0; |
96 | 44.6M | if ( str ) |
97 | 44.6M | { |
98 | 48.1G | while ( *str++ ) |
99 | 48.0G | ++len; |
100 | 44.6M | } |
101 | 44.6M | return len; |
102 | 44.6M | } |
103 | | |
104 | | /* |
105 | | MS C 4.2 (and ANSI C) doesn't include strcasecmp. |
106 | | Note that tolower and toupper won't |
107 | | work on chars > 127. |
108 | | |
109 | | Neither does ToLower()! |
110 | | */ |
111 | | int TY_(tmbstrcasecmp)( ctmbstr s1, ctmbstr s2 ) |
112 | 122M | { |
113 | 122M | uint c; |
114 | | |
115 | 132M | while (c = (uint)(*s1), TY_(ToLower)(c) == TY_(ToLower)((uint)(*s2))) |
116 | 10.9M | { |
117 | 10.9M | if (c == '\0') |
118 | 744k | return 0; |
119 | | |
120 | 10.2M | ++s1; |
121 | 10.2M | ++s2; |
122 | 10.2M | } |
123 | | |
124 | 121M | return (*s1 > *s2 ? 1 : -1); |
125 | 122M | } |
126 | | |
127 | | int TY_(tmbstrncmp)( ctmbstr s1, ctmbstr s2, uint n ) |
128 | 38.0M | { |
129 | 38.0M | uint c; |
130 | | |
131 | 38.0M | if (s1 == NULL || s2 == NULL) |
132 | 0 | { |
133 | 0 | if (s1 == s2) |
134 | 0 | return 0; |
135 | 0 | return (s1 == NULL ? -1 : 1); |
136 | 0 | } |
137 | | |
138 | 38.3M | while ((c = (byte)*s1) == (byte)*s2) |
139 | 262k | { |
140 | 262k | if (c == '\0') |
141 | 3.82k | return 0; |
142 | | |
143 | 258k | if (n == 0) |
144 | 0 | return 0; |
145 | | |
146 | 258k | ++s1; |
147 | 258k | ++s2; |
148 | 258k | --n; |
149 | 258k | } |
150 | | |
151 | 38.0M | if (n == 0) |
152 | 29.4k | return 0; |
153 | | |
154 | 38.0M | return (*s1 > *s2 ? 1 : -1); |
155 | 38.0M | } |
156 | | |
157 | | int TY_(tmbstrncasecmp)( ctmbstr s1, ctmbstr s2, uint n ) |
158 | 48.8k | { |
159 | 48.8k | uint c; |
160 | | |
161 | 133k | while (c = (uint)(*s1), TY_(ToLower)(c) == TY_(ToLower)((uint)(*s2))) |
162 | 85.2k | { |
163 | 85.2k | if (c == '\0') |
164 | 403 | return 0; |
165 | | |
166 | 84.8k | if (n == 0) |
167 | 0 | return 0; |
168 | | |
169 | 84.8k | ++s1; |
170 | 84.8k | ++s2; |
171 | 84.8k | --n; |
172 | 84.8k | } |
173 | | |
174 | 48.4k | if (n == 0) |
175 | 11.6k | return 0; |
176 | | |
177 | 36.8k | return (*s1 > *s2 ? 1 : -1); |
178 | 48.4k | } |
179 | | |
180 | | ctmbstr TY_(tmbsubstrn)( ctmbstr s1, uint len1, ctmbstr s2 ) |
181 | 5.13k | { |
182 | 5.13k | uint len2 = TY_(tmbstrlen)(s2); |
183 | 5.13k | int ix, diff = len1 - len2; |
184 | | |
185 | 36.7M | for ( ix = 0; ix <= diff; ++ix ) |
186 | 36.7M | { |
187 | 36.7M | if ( TY_(tmbstrncmp)(s1+ix, s2, len2) == 0 ) |
188 | 195 | return (ctmbstr) s1+ix; |
189 | 36.7M | } |
190 | 4.93k | return NULL; |
191 | 5.13k | } |
192 | | |
193 | | ctmbstr TY_(tmbsubstr)( ctmbstr s1, ctmbstr s2 ) |
194 | 2.35k | { |
195 | 2.35k | uint len1 = TY_(tmbstrlen)(s1), len2 = TY_(tmbstrlen)(s2); |
196 | 2.35k | int ix, diff = len1 - len2; |
197 | | |
198 | 10.1k | for ( ix = 0; ix <= diff; ++ix ) |
199 | 8.56k | { |
200 | 8.56k | if ( TY_(tmbstrncasecmp)(s1+ix, s2, len2) == 0 ) |
201 | 815 | return (ctmbstr) s1+ix; |
202 | 8.56k | } |
203 | 1.53k | return NULL; |
204 | 2.35k | } |
205 | | |
206 | | /* Transform ASCII chars in string to lower case */ |
207 | | tmbstr TY_(tmbstrtolower)( tmbstr s ) |
208 | 142k | { |
209 | 142k | tmbstr cp; |
210 | 3.77M | for ( cp=s; *cp; ++cp ) |
211 | 3.63M | *cp = (tmbchar) TY_(ToLower)( *cp ); |
212 | 142k | return s; |
213 | 142k | } |
214 | | |
215 | | /* Transform ASCII chars in string to upper case */ |
216 | | tmbstr TY_(tmbstrtoupper)(tmbstr s) |
217 | 3.84k | { |
218 | 3.84k | tmbstr cp; |
219 | | |
220 | 30.7k | for (cp = s; *cp; ++cp) |
221 | 26.9k | *cp = (tmbchar)TY_(ToUpper)(*cp); |
222 | | |
223 | 3.84k | return s; |
224 | 3.84k | } |
225 | | |
226 | | int TY_(tmbvsnprintf)(tmbstr buffer, size_t count, ctmbstr format, va_list args) |
227 | 139M | { |
228 | 139M | int retval; |
229 | | |
230 | 139M | #if HAS_VSNPRINTF |
231 | 139M | retval = vsnprintf(buffer, count - 1, format, args); |
232 | | /* todo: conditionally null-terminate the string? */ |
233 | 139M | buffer[count - 1] = 0; |
234 | | #else |
235 | | retval = vsprintf(buffer, format, args); |
236 | | #endif /* HAS_VSNPRINTF */ |
237 | 139M | return retval; |
238 | 139M | } |
239 | | |
240 | | int TY_(tmbsnprintf)(tmbstr buffer, size_t count, ctmbstr format, ...) |
241 | 102M | { |
242 | 102M | int retval; |
243 | 102M | va_list args; |
244 | 102M | va_start(args, format); |
245 | 102M | retval = TY_(tmbvsnprintf)(buffer, count, format, args); |
246 | 102M | va_end(args); |
247 | 102M | return retval; |
248 | 102M | } |
249 | | |
250 | | void TY_(strrep)(tmbstr buffer, ctmbstr str, ctmbstr rep) |
251 | 149M | { |
252 | 149M | char *p = strstr(buffer, str); |
253 | 149M | do |
254 | 149M | { |
255 | 149M | if(p) |
256 | 274k | { |
257 | 274k | char buf[1024]; |
258 | 274k | memset(buf,'\0',strlen(buf)); |
259 | | |
260 | 274k | if(buffer == p) |
261 | 134k | { |
262 | 134k | strcpy(buf,rep); |
263 | 134k | strcat(buf,p+strlen(str)); |
264 | 134k | } |
265 | 140k | else |
266 | 140k | { |
267 | 140k | strncpy(buf,buffer,strlen(buffer) - strlen(p)); |
268 | 140k | strcat(buf,rep); |
269 | 140k | strcat(buf,p+strlen(str)); |
270 | 140k | } |
271 | | |
272 | 274k | memset(buffer,'\0',strlen(buffer)); |
273 | 274k | strcpy(buffer,buf); |
274 | 274k | } |
275 | | |
276 | 149M | } while(p && (p = strstr(buffer, str))); |
277 | 149M | } |
278 | | |
279 | | |
280 | | /* |
281 | | * local variables: |
282 | | * mode: c |
283 | | * indent-tabs-mode: nil |
284 | | * c-basic-offset: 4 |
285 | | * eval: (c-set-offset 'substatement-open 0) |
286 | | * end: |
287 | | */ |