/src/wget2/libwget/utils.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Copyright (c) 2012 Tim Ruehsen |
3 | | * Copyright (c) 2015-2024 Free Software Foundation, Inc. |
4 | | * |
5 | | * This file is part of libwget. |
6 | | * |
7 | | * Libwget is free software: you can redistribute it and/or modify |
8 | | * it under the terms of the GNU Lesser General Public License as published by |
9 | | * the Free Software Foundation, either version 3 of the License, or |
10 | | * (at your option) any later version. |
11 | | * |
12 | | * Libwget is distributed in the hope that it will be useful, |
13 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
14 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
15 | | * GNU Lesser General Public License for more details. |
16 | | * |
17 | | * You should have received a copy of the GNU Lesser General Public License |
18 | | * along with libwget. If not, see <https://www.gnu.org/licenses/>. |
19 | | * |
20 | | * |
21 | | * a collection of utility routines |
22 | | * |
23 | | * Changelog |
24 | | * 25.04.2012 Tim Ruehsen created |
25 | | * |
26 | | */ |
27 | | |
28 | | #include <config.h> |
29 | | |
30 | | #include <stddef.h> |
31 | | #include <string.h> |
32 | | #include <strings.h> |
33 | | #include <unistd.h> |
34 | | #include <time.h> |
35 | | #include <glob.h> |
36 | | |
37 | | #include "c-ctype.h" |
38 | | #include "c-strcase.h" |
39 | | |
40 | | #if defined __clang__ |
41 | | // silence warnings in gnulib code |
42 | | #pragma clang diagnostic ignored "-Wshorten-64-to-32" |
43 | | #endif |
44 | | |
45 | | #include "timespec.h" // gnulib gettime() |
46 | | |
47 | | #ifdef HAVE_IOCTL |
48 | | # include <sys/ioctl.h> |
49 | | # include <termios.h> |
50 | | #endif |
51 | | |
52 | | #include <wget.h> |
53 | | #include "private.h" |
54 | | |
55 | | /** |
56 | | * \file |
57 | | * \brief General utility functions |
58 | | * \defgroup libwget-utils General utility functions |
59 | | * @{ |
60 | | * |
61 | | * This is a collections of short routines that are used with libwget and/or Wget code. |
62 | | * They may be useful to other developers that is why they are exported. |
63 | | */ |
64 | | |
65 | | /** |
66 | | * \param[in] s1 String |
67 | | * \param[in] s2 String |
68 | | * \return |
69 | | * 0 if both \p s1 and \p s2 are NULL<br> |
70 | | * -1 if \p s1 is NULL and \p s2 is not NULL<br> |
71 | | * 1 if \p s1 is not NULL and \p s2 is NULL |
72 | | * else it returns strcmp(\p s1, \p s2) |
73 | | * |
74 | | * This functions compares \p s1 and \p s2 in the same way as strcmp() does, |
75 | | * except that it also handles NULL values. |
76 | | */ |
77 | | int wget_strcmp(const char *s1, const char *s2) |
78 | 0 | { |
79 | 0 | if (!s1) { |
80 | 0 | if (!s2) |
81 | 0 | return 0; |
82 | 0 | else |
83 | 0 | return -1; |
84 | 0 | } else { |
85 | 0 | if (!s2) |
86 | 0 | return 1; |
87 | 0 | else |
88 | 0 | return strcmp(s1, s2); |
89 | 0 | } |
90 | 0 | } |
91 | | |
92 | | /** |
93 | | * \param[in] s1 String |
94 | | * \param[in] s2 String |
95 | | * \return |
96 | | * 0 if both \p s1 and \p s2 are NULL<br> |
97 | | * -1 if \p s1 is NULL and \p s2 is not NULL<br> |
98 | | * 1 if \p s1 is not NULL and \p s2 is NULL |
99 | | * else it returns strcasecmp(\p s1, \p s2) |
100 | | * |
101 | | * This functions compares \p s1 and \p s2 in the same way as strcasecmp() does, |
102 | | * except that it also handles NULL values. |
103 | | */ |
104 | | int wget_strcasecmp(const char *s1, const char *s2) |
105 | 9.37k | { |
106 | 9.37k | if (!s1) { |
107 | 8.19k | if (!s2) |
108 | 3.65k | return 0; |
109 | 4.54k | else |
110 | 4.54k | return -1; |
111 | 8.19k | } else { |
112 | 1.17k | if (!s2) |
113 | 2 | return 1; |
114 | 1.17k | else |
115 | 1.17k | return strcasecmp(s1, s2); |
116 | 1.17k | } |
117 | 9.37k | } |
118 | | |
119 | | /** |
120 | | * \param[in] s1 String |
121 | | * \param[in] s2 String |
122 | | * \return |
123 | | * 0 if both \p s1 and \p s2 are the same disregarding case for ASCII letters a-z<br> |
124 | | * 0 if both \p s1 and \p s2 are NULL<br> |
125 | | * <0 if \p s1 is NULL and \p s2 is not NULL or \p s1 is smaller than \p s2<br> |
126 | | * >0 if \p s2 is NULL and \p s1 is not NULL or \p s1 is greater than \p s2. |
127 | | * |
128 | | * This functions compares \p s1 and \p s2 as ASCII strings, case insensitive. |
129 | | * It also accepts NULL values. |
130 | | */ |
131 | | int wget_strcasecmp_ascii(const char *s1, const char *s2) |
132 | 22.4k | { |
133 | 22.4k | if (!s1) { |
134 | 0 | if (!s2) |
135 | 0 | return 0; |
136 | 0 | else |
137 | 0 | return -1; |
138 | 22.4k | } else { |
139 | 22.4k | if (!s2) |
140 | 0 | return 1; |
141 | 22.4k | else |
142 | 22.4k | return c_strcasecmp(s1, s2); |
143 | 22.4k | } |
144 | 22.4k | } |
145 | | |
146 | | /** |
147 | | * \param[in] s1 String |
148 | | * \param[in] s2 String |
149 | | * \param[in] n Max. number of chars to compare |
150 | | * \return |
151 | | * 0 if both \p s1 and \p s2 are the same disregarding case for ASCII letters a-z<br> |
152 | | * 0 if both \p s1 and \p s2 are NULL<br> |
153 | | * <0 if \p s1 is NULL and \p s2 is not NULL or \p s1 is smaller than \p s2<br> |
154 | | * >0 if \p s2 is NULL and \p s1 is not NULL or \p s1 is greater than \p s2. |
155 | | * |
156 | | * This functions compares \p s1 and \p s2 as ASCII strings, case insensitive, up to a max number of \p n chars. |
157 | | * It also accepts NULL values. |
158 | | */ |
159 | | int wget_strncasecmp_ascii(const char *s1, const char *s2, size_t n) |
160 | 0 | { |
161 | 0 | if (!s1) { |
162 | 0 | if (!s2) |
163 | 0 | return 0; |
164 | 0 | else |
165 | 0 | return -1; |
166 | 0 | } else { |
167 | 0 | if (!s2) |
168 | 0 | return 1; |
169 | 0 | else |
170 | 0 | return c_strncasecmp(s1, s2, n); |
171 | 0 | } |
172 | 0 | } |
173 | | |
174 | | /** |
175 | | * @param[in,out] s String to convert |
176 | | * \return Value of s |
177 | | * |
178 | | * Converts ASCII string \p s to lowercase in place. |
179 | | */ |
180 | | char *wget_strtolower(char *s) |
181 | 46.0k | { |
182 | 46.0k | if (s) { |
183 | 1.15M | for (char *d = s; *d; d++) { |
184 | 1.10M | if (c_isupper(*d)) |
185 | 62.7k | *d = (char) c_tolower(*d); |
186 | 1.10M | } |
187 | 46.0k | } |
188 | | |
189 | 46.0k | return s; |
190 | 46.0k | } |
191 | | |
192 | | /** |
193 | | * \param[in] s1 String |
194 | | * \param[in] s2 String |
195 | | * \param[in] n Max. number of chars to compare |
196 | | * \return |
197 | | * 0 if both \p s1 and \p s2 are the same or if both \p s1 and \p s2 are NULL<br> |
198 | | * <0 if \p s1 is NULL and \p s2 is not NULL or \p s1 is smaller than \p s2<br> |
199 | | * >0 if \p s2 is NULL and \p s1 is not NULL or \p s1 is greater than \p s2. |
200 | | * |
201 | | * This functions compares \p s1 and \p s2 in the same way as strncmp() does, |
202 | | * except that it also handles NULL values. |
203 | | */ |
204 | | int wget_strncmp(const char *s1, const char *s2, size_t n) |
205 | 0 | { |
206 | 0 | if (!s1) { |
207 | 0 | if (!s2) |
208 | 0 | return 0; |
209 | 0 | else |
210 | 0 | return -1; |
211 | 0 | } else { |
212 | 0 | if (!s2) |
213 | 0 | return 1; |
214 | 0 | else |
215 | 0 | return strncmp(s1, s2, n); |
216 | 0 | } |
217 | 0 | } |
218 | | |
219 | | /** |
220 | | * \param[in] s1 String |
221 | | * \param[in] s2 String |
222 | | * \param[in] n Max. number of chars to compare |
223 | | * \return |
224 | | * 0 if both \p s1 and \p s2 are the same disregarding case or if both \p s1 and \p s2 are NULL<br> |
225 | | * <0 if \p s1 is NULL and \p s2 is not NULL or \p s1 is smaller than \p s2<br> |
226 | | * >0 if \p s2 is NULL and \p s1 is not NULL or \p s1 is greater than \p s2. |
227 | | * |
228 | | * This functions compares \p s1 and \p s2 in the same way as strncasecmp() does, |
229 | | * except that it also handles NULL values. |
230 | | */ |
231 | | int wget_strncasecmp(const char *s1, const char *s2, size_t n) |
232 | 0 | { |
233 | 0 | if (!s1) { |
234 | 0 | if (!s2) |
235 | 0 | return 0; |
236 | 0 | else |
237 | 0 | return -1; |
238 | 0 | } else { |
239 | 0 | if (!s2) |
240 | 0 | return 1; |
241 | 0 | else |
242 | 0 | return strncasecmp(s1, s2, n); |
243 | 0 | } |
244 | 0 | } |
245 | | |
246 | | /** |
247 | | * \param[in] src Pointer to input buffer |
248 | | * \param[in] src_len Number of bytes to encode |
249 | | * \param[out] dst Buffer to hold the encoded string |
250 | | * \param[in] dst_size Size of \p dst in bytes |
251 | | * |
252 | | * Encodes a number of bytes into a lowercase hexadecimal C string. |
253 | | */ |
254 | | void wget_memtohex(const unsigned char *src, size_t src_len, char *dst, size_t dst_size) |
255 | 0 | { |
256 | 0 | size_t it; |
257 | 0 | int adjust = 0, c; |
258 | |
|
259 | 0 | if (dst_size == 0 || !dst || !src) |
260 | 0 | return; |
261 | | |
262 | 0 | if (src_len * 2 >= dst_size) { |
263 | 0 | src_len = (dst_size - 1) / 2; |
264 | 0 | adjust = 1; |
265 | 0 | } |
266 | |
|
267 | 0 | for (it = 0; it < src_len; it++, src++) { |
268 | 0 | *dst++ = (char) ((c = (*src >> 4)) >= 10 ? c + 'a' - 10 : c + '0'); |
269 | 0 | *dst++ = (char) ((c = (*src & 0xf)) >= 10 ? c + 'a' - 10 : c + '0'); |
270 | 0 | } |
271 | 0 | if (adjust && (dst_size & 1) == 0) |
272 | 0 | *dst++ = (char) ((c = (*src >> 4)) >= 10 ? c + 'a' - 10 : c + '0'); |
273 | |
|
274 | 0 | *dst = 0; |
275 | 0 | } |
276 | | |
277 | | /** |
278 | | * \param[in] ms Number of milliseconds to sleep |
279 | | * |
280 | | * Pause for \p ms milliseconds. |
281 | | */ |
282 | | void wget_millisleep(int ms) |
283 | 0 | { |
284 | 0 | if (ms <= 0) |
285 | 0 | return; |
286 | | |
287 | 0 | nanosleep(&(struct timespec){ .tv_sec = ms / 1000, .tv_nsec = (ms % 1000) * 1000000 }, NULL); |
288 | 0 | } |
289 | | |
290 | | /** |
291 | | * Return the current milliseconds since the epoch. |
292 | | */ |
293 | | long long wget_get_timemillis(void) |
294 | 0 | { |
295 | 0 | struct timespec ts; |
296 | |
|
297 | 0 | gettime(&ts); |
298 | |
|
299 | 0 | return ts.tv_sec * 1000LL + ts.tv_nsec / 1000000; |
300 | 0 | } |
301 | | |
302 | | WGET_GCC_CONST |
303 | | static unsigned char unhex(unsigned char c) |
304 | 0 | { |
305 | 0 | return c <= '9' ? c - '0' : (c <= 'F' ? c - 'A' + 10 : c - 'a' + 10); |
306 | 0 | } |
307 | | |
308 | | /** |
309 | | * \param[in,out] src String to unescape |
310 | | * \return |
311 | | * 0 if the string did not change<br> |
312 | | * 1 if unescaping took place |
313 | | * |
314 | | * Does an inline percent unescape. |
315 | | * Each occurrence of %xx (x = hex digit) will converted into it's byte representation. |
316 | | */ |
317 | | int wget_percent_unescape(char *src) |
318 | 0 | { |
319 | 0 | int ret = 0; |
320 | 0 | unsigned char *s = (unsigned char *)src; // just a helper to avoid casting a lot |
321 | 0 | unsigned char *d = s; |
322 | |
|
323 | 0 | while (*s) { |
324 | 0 | if (*s == '%') { |
325 | 0 | if (c_isxdigit(s[1]) && c_isxdigit(s[2])) { |
326 | 0 | *d++ = (unsigned char) (unhex(s[1]) << 4) | unhex(s[2]); |
327 | 0 | s += 3; |
328 | 0 | ret = 1; |
329 | 0 | continue; |
330 | 0 | } |
331 | 0 | } |
332 | | |
333 | 0 | *d++ = *s++; |
334 | 0 | } |
335 | 0 | *d = 0; |
336 | |
|
337 | 0 | return ret; |
338 | 0 | } |
339 | | |
340 | | /** |
341 | | * \param[in] s String |
342 | | * \param[in] tail String |
343 | | * \return 1 if \p tail matches the end of \p s, 0 if not |
344 | | * |
345 | | * Checks if \p tail matches the end of the string \p s. |
346 | | */ |
347 | | int wget_match_tail(const char *s, const char *tail) |
348 | 0 | { |
349 | 0 | size_t s_len, tail_len; |
350 | |
|
351 | 0 | if ((s_len = strlen(s)) < (tail_len = strlen(tail))) |
352 | 0 | return 0; |
353 | | |
354 | 0 | const char *p = s + (s_len - tail_len); |
355 | |
|
356 | 0 | return !strcmp(p, tail); |
357 | 0 | } |
358 | | |
359 | | /** |
360 | | * \param[in] s String |
361 | | * \param[in] tail String |
362 | | * \return 1 if \p tail matches the end of \p s, 0 if not |
363 | | * |
364 | | * Checks if \p tail matches the end of the string \p s, disregarding the case, ASCII only. |
365 | | * |
366 | | */ |
367 | | int wget_match_tail_nocase(const char *s, const char *tail) |
368 | 0 | { |
369 | 0 | size_t s_len, tail_len; |
370 | |
|
371 | 0 | if ((s_len = strlen(s)) < (tail_len = strlen(tail))) |
372 | 0 | return 0; |
373 | | |
374 | 0 | const char *p = s + (s_len - tail_len); |
375 | |
|
376 | 0 | return !wget_strcasecmp_ascii(p, tail); |
377 | 0 | } |
378 | | |
379 | | /** |
380 | | * \param[in] str String to run glob() against |
381 | | * \param[in] n Length of string |
382 | | * \param[in] flags Flags to pass to glob() |
383 | | * \return Expanded string after running glob |
384 | | * |
385 | | * Finds a pathname by running glob(3) on the pattern in the first \p n bytes |
386 | | * of \p globstr. Returns a newly allocated string with the first \p n |
387 | | * bytes replaced with the matching pattern obtained via glob(3) if one was |
388 | | * found. Otherwise it returns NULL. |
389 | | */ |
390 | | char *wget_strnglob(const char *str, size_t n, int flags) |
391 | 0 | { |
392 | 0 | glob_t pglob; |
393 | 0 | char *expanded_str = NULL; |
394 | |
|
395 | 0 | char *globstr = wget_strmemdup(str, n); |
396 | |
|
397 | 0 | if (!globstr) |
398 | 0 | return NULL; |
399 | | |
400 | 0 | if (glob(globstr, flags, NULL, &pglob) == 0) { |
401 | 0 | if (pglob.gl_pathc > 0) { |
402 | 0 | expanded_str = wget_aprintf("%s%s", pglob.gl_pathv[0], str+n); |
403 | 0 | } |
404 | 0 | globfree(&pglob); |
405 | 0 | } |
406 | |
|
407 | 0 | xfree(globstr); |
408 | 0 | return expanded_str; |
409 | 0 | } |
410 | | |
411 | | /** |
412 | | * \param[in] buf Result buffer |
413 | | * \param[in] bufsize Size of /p buf |
414 | | * \param[in] n Number to convert |
415 | | * \return Pointer to printable representation of \p n |
416 | | * |
417 | | * Returns a human readable representation of \p n. |
418 | | * \p n, a byte quantity, is converted to a human-readable abbreviated |
419 | | * form a la sizes printed by `ls -lh'. The result is written into the |
420 | | * provided buffer. |
421 | | * |
422 | | * Unlike `with_thousand_seps', this approximates to the nearest unit. |
423 | | * Quoting GNU libit: "Most people visually process strings of 3-4 |
424 | | * digits effectively, but longer strings of digits are more prone to |
425 | | * misinterpretation. Hence, converting to an abbreviated form |
426 | | * usually improves readability." |
427 | | * |
428 | | * This intentionally uses kilobyte (KB), megabyte (MB), etc. in their |
429 | | * original computer-related meaning of "powers of 1024". We don't |
430 | | * use the "*bibyte" names invented in 1998, and seldom used in |
431 | | * practice. Wikipedia's entry on "binary prefix" discusses this in |
432 | | * some detail. |
433 | | */ |
434 | | char *wget_human_readable(char *buf, size_t bufsize, uint64_t n) |
435 | 0 | { |
436 | | /* These suffixes are compatible with those of GNU `ls -lh'. */ |
437 | 0 | static const char powers[] = { |
438 | 0 | 'K', /* kilobyte, 2^10 bytes */ |
439 | 0 | 'M', /* megabyte, 2^20 bytes */ |
440 | 0 | 'G', /* gigabyte, 2^30 bytes */ |
441 | 0 | 'T', /* terabyte, 2^40 bytes */ |
442 | 0 | 'P', /* petabyte, 2^50 bytes */ |
443 | 0 | 'E', /* exabyte, 2^60 bytes */ |
444 | 0 | 'Z', /* zettabyte, 2^70 bytes */ |
445 | 0 | 'Y', /* yottabyte, 2^80 bytes */ |
446 | 0 | }; |
447 | | |
448 | | /* If the quantity is smaller than 1K, just print it. */ |
449 | 0 | if (n < 1024) { |
450 | 0 | wget_snprintf(buf, bufsize, "%u ", (unsigned int) n); |
451 | 0 | return buf; |
452 | 0 | } |
453 | | |
454 | | /* Loop over powers, dividing N with 1024 in each iteration. This |
455 | | works unchanged for all sizes of wgint, while still avoiding |
456 | | non-portable `long double' arithmetic. */ |
457 | 0 | for (unsigned i = 0; i < countof(powers); i++) { |
458 | | /* At each iteration N is greater than the *subsequent* power. |
459 | | That way N/1024.0 produces a decimal number in the units of *this* power. */ |
460 | 0 | if ((n / 1024) < 1024 || i == countof(powers) - 1) { |
461 | 0 | double val = n / 1024.0; |
462 | | /* Print values smaller than the accuracy level (acc) with (decimal) |
463 | | * decimal digits, and others without any decimals. */ |
464 | 0 | if (val < 1000) |
465 | 0 | wget_snprintf(buf, bufsize, "%d.%02d%c", (int) val , ((int) (val * 100)) % 100, powers[i]); |
466 | 0 | else |
467 | 0 | wget_snprintf(buf, bufsize, "%d%c", (int) (val + .5), powers[i]); |
468 | 0 | return buf; |
469 | 0 | } |
470 | 0 | n /= 1024; |
471 | 0 | } |
472 | | |
473 | 0 | return NULL; /* unreached */ |
474 | 0 | } |
475 | | |
476 | | /** |
477 | | * \param[out] width Number of columns in terminal |
478 | | * \param[out] height Number of rows in terminal |
479 | | * \return Upon successful completion, \p wget_get_screen_size will return 0, |
480 | | * and the values of \p width and \p height will be set accordingly. |
481 | | * If an error was encountered, the function will return -1 without touching |
482 | | * the values of \p width and \p height. |
483 | | * |
484 | | * Get the size of the terminal to which the output is currently printed |
485 | | * (stderr). This function accepts two int pointers and will set their values |
486 | | * to the width and height of the active terminal in number of columns. If |
487 | | * either of the parameter is NULL, its value will not be set by the function. |
488 | | */ |
489 | | #ifdef HAVE_IOCTL |
490 | | int wget_get_screen_size(int *width, int *height) |
491 | 0 | { |
492 | 0 | struct winsize wsz; |
493 | 0 | int fd = fileno(stderr); // TODO: progress bar is output to stdout so we probably should be using that !? |
494 | |
|
495 | 0 | if (ioctl (fd, TIOCGWINSZ, &wsz) >= 0) { |
496 | 0 | if (width) |
497 | 0 | *width = wsz.ws_col; |
498 | 0 | if (height) |
499 | 0 | *height = wsz.ws_row; |
500 | |
|
501 | 0 | return 0; |
502 | 0 | } |
503 | | |
504 | 0 | return -1; |
505 | 0 | } |
506 | | #elif defined _WIN32 |
507 | | int wget_get_screen_size(int *width, int *height) |
508 | | { |
509 | | static CONSOLE_SCREEN_BUFFER_INFO csbiInfo; |
510 | | static HANDLE consoleHandle = NULL; |
511 | | |
512 | | if (consoleHandle == NULL) |
513 | | consoleHandle = GetStdHandle(STD_OUTPUT_HANDLE); |
514 | | |
515 | | if (!GetConsoleScreenBufferInfo(consoleHandle, &csbiInfo)) |
516 | | return -1; |
517 | | |
518 | | if (width) |
519 | | *width = csbiInfo.dwSize.X; |
520 | | if (height) |
521 | | *height = csbiInfo.dwSize.Y; |
522 | | |
523 | | return 0; |
524 | | } |
525 | | #else |
526 | | int wget_get_screen_size(WGET_GCC_UNUSED int *width, WGET_GCC_UNUSED int *height) |
527 | | { |
528 | | return -1; |
529 | | } |
530 | | #endif |
531 | | |
532 | | /**@}*/ |