Coverage Report

Created: 2024-03-08 06:32

/src/wget2/libwget/utils.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright (c) 2012 Tim Ruehsen
3
 * Copyright (c) 2015-2024 Free Software Foundation, Inc.
4
 *
5
 * This file is part of libwget.
6
 *
7
 * Libwget is free software: you can redistribute it and/or modify
8
 * it under the terms of the GNU Lesser General Public License as published by
9
 * the Free Software Foundation, either version 3 of the License, or
10
 * (at your option) any later version.
11
 *
12
 * Libwget is distributed in the hope that it will be useful,
13
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15
 * GNU Lesser General Public License for more details.
16
 *
17
 * You should have received a copy of the GNU Lesser General Public License
18
 * along with libwget.  If not, see <https://www.gnu.org/licenses/>.
19
 *
20
 *
21
 * a collection of utility routines
22
 *
23
 * Changelog
24
 * 25.04.2012  Tim Ruehsen  created
25
 *
26
 */
27
28
#include <config.h>
29
30
#include <stddef.h>
31
#include <string.h>
32
#include <strings.h>
33
#include <unistd.h>
34
#include <time.h>
35
#include <glob.h>
36
37
#include "c-ctype.h"
38
#include "c-strcase.h"
39
40
#if defined __clang__
41
  // silence warnings in gnulib code
42
  #pragma clang diagnostic ignored "-Wshorten-64-to-32"
43
#endif
44
45
#include "timespec.h" // gnulib gettime()
46
47
#ifdef HAVE_IOCTL
48
# include <sys/ioctl.h>
49
# include <termios.h>
50
#endif
51
52
#include <wget.h>
53
#include "private.h"
54
55
/**
56
 * \file
57
 * \brief General utility functions
58
 * \defgroup libwget-utils General utility functions
59
 * @{
60
 *
61
 * This is a collections of short routines that are used with libwget and/or Wget code.
62
 * They may be useful to other developers that is why they are exported.
63
 */
64
65
/**
66
 * \param[in] s1 String
67
 * \param[in] s2 String
68
 * \return
69
 * 0 if both \p s1 and \p s2 are NULL<br>
70
 * -1 if \p s1 is NULL and \p s2 is not NULL<br>
71
 * 1 if \p s1 is not NULL and \p s2 is NULL
72
 * else it returns strcmp(\p s1, \p s2)
73
 *
74
 * This functions compares \p s1 and \p s2 in the same way as strcmp() does,
75
 * except that it also handles NULL values.
76
 */
77
int wget_strcmp(const char *s1, const char *s2)
78
0
{
79
0
  if (!s1) {
80
0
    if (!s2)
81
0
      return 0;
82
0
    else
83
0
      return -1;
84
0
  } else {
85
0
    if (!s2)
86
0
      return 1;
87
0
    else
88
0
      return strcmp(s1, s2);
89
0
  }
90
0
}
91
92
/**
93
 * \param[in] s1 String
94
 * \param[in] s2 String
95
 * \return
96
 * 0 if both \p s1 and \p s2 are NULL<br>
97
 * -1 if \p s1 is NULL and \p s2 is not NULL<br>
98
 * 1 if \p s1 is not NULL and \p s2 is NULL
99
 * else it returns strcasecmp(\p s1, \p s2)
100
 *
101
 * This functions compares \p s1 and \p s2 in the same way as strcasecmp() does,
102
 * except that it also handles NULL values.
103
 */
104
int wget_strcasecmp(const char *s1, const char *s2)
105
9.37k
{
106
9.37k
  if (!s1) {
107
8.19k
    if (!s2)
108
3.65k
      return 0;
109
4.54k
    else
110
4.54k
      return -1;
111
8.19k
  } else {
112
1.17k
    if (!s2)
113
2
      return 1;
114
1.17k
    else
115
1.17k
      return strcasecmp(s1, s2);
116
1.17k
  }
117
9.37k
}
118
119
/**
120
 * \param[in] s1 String
121
 * \param[in] s2 String
122
 * \return
123
 * 0 if both \p s1 and \p s2 are the same disregarding case for ASCII letters a-z<br>
124
 * 0 if both \p s1 and \p s2 are NULL<br>
125
 * <0 if \p s1 is NULL and \p s2 is not NULL or \p s1 is smaller than \p s2<br>
126
 * >0 if \p s2 is NULL and \p s1 is not NULL or \p s1 is greater than \p s2.
127
 *
128
 * This functions compares \p s1 and \p s2 as ASCII strings, case insensitive.
129
 * It also accepts NULL values.
130
 */
131
int wget_strcasecmp_ascii(const char *s1, const char *s2)
132
22.4k
{
133
22.4k
  if (!s1) {
134
0
    if (!s2)
135
0
      return 0;
136
0
    else
137
0
      return -1;
138
22.4k
  } else {
139
22.4k
    if (!s2)
140
0
      return 1;
141
22.4k
    else
142
22.4k
      return c_strcasecmp(s1, s2);
143
22.4k
  }
144
22.4k
}
145
146
/**
147
 * \param[in] s1 String
148
 * \param[in] s2 String
149
 * \param[in] n Max. number of chars to compare
150
 * \return
151
 * 0 if both \p s1 and \p s2 are the same disregarding case for ASCII letters a-z<br>
152
 * 0 if both \p s1 and \p s2 are NULL<br>
153
 * <0 if \p s1 is NULL and \p s2 is not NULL or \p s1 is smaller than \p s2<br>
154
 * >0 if \p s2 is NULL and \p s1 is not NULL or \p s1 is greater than \p s2.
155
 *
156
 * This functions compares \p s1 and \p s2 as ASCII strings, case insensitive, up to a max number of \p n chars.
157
 * It also accepts NULL values.
158
 */
159
int wget_strncasecmp_ascii(const char *s1, const char *s2, size_t n)
160
0
{
161
0
  if (!s1) {
162
0
    if (!s2)
163
0
      return 0;
164
0
    else
165
0
      return -1;
166
0
  } else {
167
0
    if (!s2)
168
0
      return 1;
169
0
    else
170
0
      return c_strncasecmp(s1, s2, n);
171
0
  }
172
0
}
173
174
/**
175
 * @param[in,out] s String to convert
176
 * \return Value of s
177
 *
178
 * Converts ASCII string \p s to lowercase in place.
179
 */
180
char *wget_strtolower(char *s)
181
46.0k
{
182
46.0k
  if (s) {
183
1.15M
    for (char *d = s; *d; d++) {
184
1.10M
      if (c_isupper(*d))
185
62.7k
        *d = (char) c_tolower(*d);
186
1.10M
    }
187
46.0k
  }
188
189
46.0k
  return s;
190
46.0k
}
191
192
/**
193
 * \param[in] s1 String
194
 * \param[in] s2 String
195
 * \param[in] n Max. number of chars to compare
196
 * \return
197
 * 0 if both \p s1 and \p s2 are the same or if both \p s1 and \p s2 are NULL<br>
198
 * <0 if \p s1 is NULL and \p s2 is not NULL or \p s1 is smaller than \p s2<br>
199
 * >0 if \p s2 is NULL and \p s1 is not NULL or \p s1 is greater than \p s2.
200
 *
201
 * This functions compares \p s1 and \p s2 in the same way as strncmp() does,
202
 * except that it also handles NULL values.
203
 */
204
int wget_strncmp(const char *s1, const char *s2, size_t n)
205
0
{
206
0
  if (!s1) {
207
0
    if (!s2)
208
0
      return 0;
209
0
    else
210
0
      return -1;
211
0
  } else {
212
0
    if (!s2)
213
0
      return 1;
214
0
    else
215
0
      return strncmp(s1, s2, n);
216
0
  }
217
0
}
218
219
/**
220
 * \param[in] s1 String
221
 * \param[in] s2 String
222
 * \param[in] n Max. number of chars to compare
223
 * \return
224
 * 0 if both \p s1 and \p s2 are the same disregarding case or if both \p s1 and \p s2 are NULL<br>
225
 * <0 if \p s1 is NULL and \p s2 is not NULL or \p s1 is smaller than \p s2<br>
226
 * >0 if \p s2 is NULL and \p s1 is not NULL or \p s1 is greater than \p s2.
227
 *
228
 * This functions compares \p s1 and \p s2 in the same way as strncasecmp() does,
229
 * except that it also handles NULL values.
230
 */
231
int wget_strncasecmp(const char *s1, const char *s2, size_t n)
232
0
{
233
0
  if (!s1) {
234
0
    if (!s2)
235
0
      return 0;
236
0
    else
237
0
      return -1;
238
0
  } else {
239
0
    if (!s2)
240
0
      return 1;
241
0
    else
242
0
      return strncasecmp(s1, s2, n);
243
0
  }
244
0
}
245
246
/**
247
 * \param[in] src Pointer to input buffer
248
 * \param[in] src_len Number of bytes to encode
249
 * \param[out] dst Buffer to hold the encoded string
250
 * \param[in] dst_size Size of \p dst in bytes
251
 *
252
 * Encodes a number of bytes into a lowercase hexadecimal C string.
253
 */
254
void wget_memtohex(const unsigned char *src, size_t src_len, char *dst, size_t dst_size)
255
0
{
256
0
  size_t it;
257
0
  int adjust = 0, c;
258
259
0
  if (dst_size == 0 || !dst || !src)
260
0
    return;
261
262
0
  if (src_len * 2 >= dst_size) {
263
0
    src_len = (dst_size - 1) / 2;
264
0
    adjust = 1;
265
0
  }
266
267
0
  for (it = 0; it < src_len; it++, src++) {
268
0
    *dst++ = (char) ((c = (*src >> 4)) >= 10 ? c + 'a' - 10 : c + '0');
269
0
    *dst++ = (char) ((c = (*src & 0xf)) >= 10 ? c + 'a' - 10 : c + '0');
270
0
  }
271
0
  if (adjust && (dst_size & 1) == 0)
272
0
    *dst++ = (char) ((c = (*src >> 4)) >= 10 ? c + 'a' - 10 : c + '0');
273
274
0
  *dst = 0;
275
0
}
276
277
/**
278
 * \param[in] ms Number of milliseconds to sleep
279
 *
280
 * Pause for \p ms milliseconds.
281
 */
282
void wget_millisleep(int ms)
283
0
{
284
0
  if (ms <= 0)
285
0
    return;
286
287
0
  nanosleep(&(struct timespec){ .tv_sec = ms / 1000, .tv_nsec = (ms % 1000) * 1000000 }, NULL);
288
0
}
289
290
/**
291
 * Return the current milliseconds since the epoch.
292
 */
293
long long wget_get_timemillis(void)
294
0
{
295
0
  struct timespec ts;
296
297
0
  gettime(&ts);
298
299
0
  return ts.tv_sec * 1000LL + ts.tv_nsec / 1000000;
300
0
}
301
302
WGET_GCC_CONST
303
static unsigned char unhex(unsigned char c)
304
0
{
305
0
  return c <= '9' ? c - '0' : (c <= 'F' ? c - 'A' + 10 : c - 'a' + 10);
306
0
}
307
308
/**
309
 * \param[in,out] src String to unescape
310
 * \return
311
 * 0 if the string did not change<br>
312
 * 1 if unescaping took place
313
 *
314
 * Does an inline percent unescape.
315
 * Each occurrence of %xx (x = hex digit) will converted into it's byte representation.
316
 */
317
int wget_percent_unescape(char *src)
318
0
{
319
0
  int ret = 0;
320
0
  unsigned char *s = (unsigned char *)src; // just a helper to avoid casting a lot
321
0
  unsigned char *d = s;
322
323
0
  while (*s) {
324
0
    if (*s == '%') {
325
0
      if (c_isxdigit(s[1]) && c_isxdigit(s[2])) {
326
0
        *d++ = (unsigned char) (unhex(s[1]) << 4) | unhex(s[2]);
327
0
        s += 3;
328
0
        ret = 1;
329
0
        continue;
330
0
      }
331
0
    }
332
333
0
    *d++ = *s++;
334
0
  }
335
0
  *d = 0;
336
337
0
  return ret;
338
0
}
339
340
/**
341
 * \param[in] s String
342
 * \param[in] tail String
343
 * \return 1 if \p tail matches the end of \p s, 0 if not
344
 *
345
 * Checks if \p tail matches the end of the string \p s.
346
 */
347
int wget_match_tail(const char *s, const char *tail)
348
0
{
349
0
  size_t s_len, tail_len;
350
351
0
  if ((s_len = strlen(s)) < (tail_len = strlen(tail)))
352
0
    return 0;
353
354
0
  const char *p = s + (s_len - tail_len);
355
356
0
  return !strcmp(p, tail);
357
0
}
358
359
/**
360
 * \param[in] s String
361
 * \param[in] tail String
362
 * \return 1 if \p tail matches the end of \p s, 0 if not
363
 *
364
 * Checks if \p tail matches the end of the string \p s, disregarding the case, ASCII only.
365
 *
366
 */
367
int wget_match_tail_nocase(const char *s, const char *tail)
368
0
{
369
0
  size_t s_len, tail_len;
370
371
0
  if ((s_len = strlen(s)) < (tail_len = strlen(tail)))
372
0
    return 0;
373
374
0
  const char *p = s + (s_len - tail_len);
375
376
0
  return !wget_strcasecmp_ascii(p, tail);
377
0
}
378
379
/**
380
 * \param[in] str String to run glob() against
381
 * \param[in] n Length of string
382
 * \param[in] flags Flags to pass to glob()
383
 * \return Expanded string after running glob
384
 *
385
 * Finds a pathname by running glob(3) on the pattern in the first \p n bytes
386
 * of \p globstr.  Returns a newly allocated string with the first \p n
387
 * bytes replaced with the matching pattern obtained via glob(3) if one was
388
 * found. Otherwise it returns NULL.
389
 */
390
char *wget_strnglob(const char *str, size_t n, int flags)
391
0
{
392
0
  glob_t pglob;
393
0
  char *expanded_str = NULL;
394
395
0
  char *globstr = wget_strmemdup(str, n);
396
397
0
  if (!globstr)
398
0
    return NULL;
399
400
0
  if (glob(globstr, flags, NULL, &pglob) == 0) {
401
0
    if (pglob.gl_pathc > 0) {
402
0
      expanded_str = wget_aprintf("%s%s", pglob.gl_pathv[0], str+n);
403
0
    }
404
0
    globfree(&pglob);
405
0
  }
406
407
0
  xfree(globstr);
408
0
  return expanded_str;
409
0
}
410
411
/**
412
 * \param[in] buf Result buffer
413
 * \param[in] bufsize Size of /p buf
414
 * \param[in] n Number to convert
415
 * \return Pointer to printable representation of \p n
416
 *
417
 * Returns a human readable representation of \p n.
418
 * \p n, a byte quantity, is converted to a human-readable abbreviated
419
 * form a la sizes printed by `ls -lh'.  The result is written into the
420
 * provided buffer.
421
 *
422
 * Unlike `with_thousand_seps', this approximates to the nearest unit.
423
 * Quoting GNU libit: "Most people visually process strings of 3-4
424
 * digits effectively, but longer strings of digits are more prone to
425
 * misinterpretation.  Hence, converting to an abbreviated form
426
 * usually improves readability."
427
 *
428
 * This intentionally uses kilobyte (KB), megabyte (MB), etc. in their
429
 * original computer-related meaning of "powers of 1024".  We don't
430
 * use the "*bibyte" names invented in 1998, and seldom used in
431
 * practice.  Wikipedia's entry on "binary prefix" discusses this in
432
 * some detail.
433
 */
434
char *wget_human_readable(char *buf, size_t bufsize, uint64_t n)
435
0
{
436
  /* These suffixes are compatible with those of GNU `ls -lh'. */
437
0
  static const char powers[] = {
438
0
    'K', /* kilobyte,  2^10 bytes */
439
0
    'M', /* megabyte,  2^20 bytes */
440
0
    'G', /* gigabyte,  2^30 bytes */
441
0
    'T', /* terabyte,  2^40 bytes */
442
0
    'P', /* petabyte,  2^50 bytes */
443
0
    'E', /* exabyte,   2^60 bytes */
444
0
    'Z', /* zettabyte, 2^70 bytes */
445
0
    'Y', /* yottabyte, 2^80 bytes */
446
0
  };
447
448
  /* If the quantity is smaller than 1K, just print it. */
449
0
  if (n < 1024) {
450
0
    wget_snprintf(buf, bufsize, "%u ", (unsigned int) n);
451
0
    return buf;
452
0
  }
453
454
  /* Loop over powers, dividing N with 1024 in each iteration.  This
455
    works unchanged for all sizes of wgint, while still avoiding
456
    non-portable `long double' arithmetic.  */
457
0
  for (unsigned i = 0; i < countof(powers); i++) {
458
    /* At each iteration N is greater than the *subsequent* power.
459
      That way N/1024.0 produces a decimal number in the units of *this* power.  */
460
0
    if ((n / 1024) < 1024 || i == countof(powers) - 1) {
461
0
      double val = n / 1024.0;
462
      /* Print values smaller than the accuracy level (acc) with (decimal)
463
       * decimal digits, and others without any decimals.  */
464
0
      if (val < 1000)
465
0
        wget_snprintf(buf, bufsize, "%d.%02d%c", (int) val , ((int) (val * 100)) % 100, powers[i]);
466
0
      else
467
0
        wget_snprintf(buf, bufsize, "%d%c", (int) (val + .5), powers[i]);
468
0
      return buf;
469
0
    }
470
0
    n /= 1024;
471
0
  }
472
473
0
  return NULL; /* unreached */
474
0
}
475
476
/**
477
 * \param[out] width Number of columns in terminal
478
 * \param[out] height Number of rows in terminal
479
 * \return Upon successful completion, \p wget_get_screen_size will return 0,
480
 * and the values of \p width and \p height will be set accordingly.
481
 * If an error was encountered, the function will return -1 without touching
482
 * the values of \p width and \p height.
483
 *
484
 * Get the size of the terminal to which the output is currently printed
485
 * (stderr). This function accepts two int pointers and will set their values
486
 * to the width and height of the active terminal in number of columns. If
487
 * either of the parameter is NULL, its value will not be set by the function.
488
 */
489
#ifdef HAVE_IOCTL
490
int wget_get_screen_size(int *width, int *height)
491
0
{
492
0
  struct winsize wsz;
493
0
  int fd = fileno(stderr); // TODO: progress bar is output to stdout so we probably should be using that !?
494
495
0
  if (ioctl (fd, TIOCGWINSZ, &wsz) >= 0) {
496
0
    if (width)
497
0
      *width = wsz.ws_col;
498
0
    if (height)
499
0
      *height = wsz.ws_row;
500
501
0
    return 0;
502
0
  }
503
504
0
  return -1;
505
0
}
506
#elif defined _WIN32
507
int wget_get_screen_size(int *width, int *height)
508
{
509
  static CONSOLE_SCREEN_BUFFER_INFO csbiInfo;
510
  static HANDLE consoleHandle = NULL;
511
512
  if (consoleHandle == NULL)
513
    consoleHandle = GetStdHandle(STD_OUTPUT_HANDLE);
514
515
  if (!GetConsoleScreenBufferInfo(consoleHandle, &csbiInfo))
516
    return -1;
517
518
  if (width)
519
    *width = csbiInfo.dwSize.X;
520
  if (height)
521
    *height = csbiInfo.dwSize.Y;
522
523
  return 0;
524
}
525
#else
526
int wget_get_screen_size(WGET_GCC_UNUSED int *width, WGET_GCC_UNUSED int *height)
527
{
528
  return -1;
529
}
530
#endif
531
532
/**@}*/