Coverage Report

Created: 2025-07-18 06:26

/src/libcbor/src/cbor/internal/unicode.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright (c) 2014-2020 Pavel Kalvoda <me@pavelkalvoda.com>
3
 *
4
 * libcbor is free software; you can redistribute it and/or modify
5
 * it under the terms of the MIT license. See LICENSE for details.
6
 */
7
8
#include "unicode.h"
9
#include <stdint.h>
10
11
6.61M
#define UTF8_ACCEPT 0
12
21.9k
#define UTF8_REJECT 1
13
14
static const uint8_t utf8d[] = {
15
    0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
16
    0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
17
    0,   0,   0,   0,   0,   0,   0,   0,   0,   0, /* 00..1f */
18
    0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
19
    0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
20
    0,   0,   0,   0,   0,   0,   0,   0,   0,   0, /* 20..3f */
21
    0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
22
    0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
23
    0,   0,   0,   0,   0,   0,   0,   0,   0,   0, /* 40..5f */
24
    0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
25
    0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
26
    0,   0,   0,   0,   0,   0,   0,   0,   0,   0, /* 60..7f */
27
    1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,
28
    1,   1,   1,   1,   1,   9,   9,   9,   9,   9,   9,
29
    9,   9,   9,   9,   9,   9,   9,   9,   9,   9, /* 80..9f */
30
    7,   7,   7,   7,   7,   7,   7,   7,   7,   7,   7,
31
    7,   7,   7,   7,   7,   7,   7,   7,   7,   7,   7,
32
    7,   7,   7,   7,   7,   7,   7,   7,   7,   7, /* a0..bf */
33
    8,   8,   2,   2,   2,   2,   2,   2,   2,   2,   2,
34
    2,   2,   2,   2,   2,   2,   2,   2,   2,   2,   2,
35
    2,   2,   2,   2,   2,   2,   2,   2,   2,   2, /* c0..df */
36
    0xa, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3,
37
    0x3, 0x3, 0x4, 0x3, 0x3, /* e0..ef */
38
    0xb, 0x6, 0x6, 0x6, 0x5, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8,
39
    0x8, 0x8, 0x8, 0x8, 0x8, /* f0..ff */
40
    0x0, 0x1, 0x2, 0x3, 0x5, 0x8, 0x7, 0x1, 0x1, 0x1, 0x4,
41
    0x6, 0x1, 0x1, 0x1, 0x1, /* s0..s0 */
42
    1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,
43
    1,   1,   1,   1,   1,   1,   0,   1,   1,   1,   1,
44
    1,   0,   1,   0,   1,   1,   1,   1,   1,   1, /* s1..s2 */
45
    1,   2,   1,   1,   1,   1,   1,   2,   1,   2,   1,
46
    1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,
47
    1,   2,   1,   1,   1,   1,   1,   1,   1,   1, /* s3..s4 */
48
    1,   2,   1,   1,   1,   1,   1,   1,   1,   2,   1,
49
    1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,
50
    1,   3,   1,   3,   1,   1,   1,   1,   1,   1, /* s5..s6 */
51
    1,   3,   1,   1,   1,   1,   1,   3,   1,   3,   1,
52
    1,   1,   1,   1,   1,   1,   3,   1,   1,   1,   1,
53
    1,   1,   1,   1,   1,   1,   1,   1,   1,   1, /* s7..s8 */
54
};
55
56
/* Copyright of this function: (c) 2008-2009 Bjoern Hoehrmann
57
 * <bjoern@hoehrmann.de> */
58
/* See http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ for details. */
59
3.01M
uint32_t _cbor_unicode_decode(uint32_t* state, uint32_t* codep, uint32_t byte) {
60
3.01M
  uint32_t type = utf8d[byte];
61
62
3.01M
  *codep = (*state != UTF8_ACCEPT) ? (byte & 0x3fu) | (*codep << 6)
63
3.01M
                                   : (0xff >> type) & (byte);
64
65
3.01M
  *state = utf8d[256 + *state * 16 + type];
66
3.01M
  return *state;
67
3.01M
}
68
69
size_t _cbor_unicode_codepoint_count(cbor_data source, size_t source_length,
70
302k
                                     struct _cbor_unicode_status* status) {
71
302k
  *status =
72
302k
      (struct _cbor_unicode_status){.location = 0, .status = _CBOR_UNICODE_OK};
73
302k
  uint32_t codepoint, state = UTF8_ACCEPT, res;
74
302k
  size_t pos = 0, count = 0;
75
76
3.29M
  for (; pos < source_length; pos++) {
77
3.01M
    res = _cbor_unicode_decode(&state, &codepoint, source[pos]);
78
79
3.01M
    if (res == UTF8_ACCEPT) {
80
2.98M
      count++;
81
2.98M
    } else if (res == UTF8_REJECT) {
82
18.0k
      goto error;
83
18.0k
    }
84
3.01M
  }
85
86
  /* Unfinished multibyte codepoint */
87
284k
  if (state != UTF8_ACCEPT) goto error;
88
89
284k
  return count;
90
91
18.3k
error:
92
18.3k
  *status = (struct _cbor_unicode_status){.location = pos,
93
18.3k
                                          .status = _CBOR_UNICODE_BADCP};
94
18.3k
  return 0;
95
284k
}