Coverage Report

Created: 2025-06-24 07:01

/src/ghostpdl/brotli/c/enc/utf8_util.c
Line
Count
Source (jump to first uncovered line)
1
/* Copyright 2013 Google Inc. All Rights Reserved.
2
3
   Distributed under MIT license.
4
   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5
*/
6
7
/* Heuristics for deciding about the UTF8-ness of strings. */
8
9
#include "utf8_util.h"
10
11
#include <brotli/types.h>
12
13
#if defined(__cplusplus) || defined(c_plusplus)
14
extern "C" {
15
#endif
16
17
static size_t BrotliParseAsUTF8(
18
0
    int* symbol, const uint8_t* input, size_t size) {
19
  /* ASCII */
20
0
  if ((input[0] & 0x80) == 0) {
21
0
    *symbol = input[0];
22
0
    if (*symbol > 0) {
23
0
      return 1;
24
0
    }
25
0
  }
26
  /* 2-byte UTF8 */
27
0
  if (size > 1u &&
28
0
      (input[0] & 0xE0) == 0xC0 &&
29
0
      (input[1] & 0xC0) == 0x80) {
30
0
    *symbol = (((input[0] & 0x1F) << 6) |
31
0
               (input[1] & 0x3F));
32
0
    if (*symbol > 0x7F) {
33
0
      return 2;
34
0
    }
35
0
  }
36
  /* 3-byte UFT8 */
37
0
  if (size > 2u &&
38
0
      (input[0] & 0xF0) == 0xE0 &&
39
0
      (input[1] & 0xC0) == 0x80 &&
40
0
      (input[2] & 0xC0) == 0x80) {
41
0
    *symbol = (((input[0] & 0x0F) << 12) |
42
0
               ((input[1] & 0x3F) << 6) |
43
0
               (input[2] & 0x3F));
44
0
    if (*symbol > 0x7FF) {
45
0
      return 3;
46
0
    }
47
0
  }
48
  /* 4-byte UFT8 */
49
0
  if (size > 3u &&
50
0
      (input[0] & 0xF8) == 0xF0 &&
51
0
      (input[1] & 0xC0) == 0x80 &&
52
0
      (input[2] & 0xC0) == 0x80 &&
53
0
      (input[3] & 0xC0) == 0x80) {
54
0
    *symbol = (((input[0] & 0x07) << 18) |
55
0
               ((input[1] & 0x3F) << 12) |
56
0
               ((input[2] & 0x3F) << 6) |
57
0
               (input[3] & 0x3F));
58
0
    if (*symbol > 0xFFFF && *symbol <= 0x10FFFF) {
59
0
      return 4;
60
0
    }
61
0
  }
62
  /* Not UTF8, emit a special symbol above the UTF8-code space */
63
0
  *symbol = 0x110000 | input[0];
64
0
  return 1;
65
0
}
66
67
/* Returns 1 if at least min_fraction of the data is UTF8-encoded.*/
68
BROTLI_BOOL BrotliIsMostlyUTF8(
69
    const uint8_t* data, const size_t pos, const size_t mask,
70
0
    const size_t length, const double min_fraction) {
71
0
  size_t size_utf8 = 0;
72
0
  size_t i = 0;
73
0
  while (i < length) {
74
0
    int symbol;
75
0
    size_t bytes_read =
76
0
        BrotliParseAsUTF8(&symbol, &data[(pos + i) & mask], length - i);
77
0
    i += bytes_read;
78
0
    if (symbol < 0x110000) size_utf8 += bytes_read;
79
0
  }
80
0
  return TO_BROTLI_BOOL((double)size_utf8 > min_fraction * (double)length);
81
0
}
82
83
#if defined(__cplusplus) || defined(c_plusplus)
84
}  /* extern "C" */
85
#endif