Coverage Report

Created: 2025-10-24 06:29

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/mosquitto/libcommon/utf8_common.c
Line
Count
Source
1
/*
2
Copyright (c) 2016-2021 Roger Light <roger@atchoo.org>
3
4
All rights reserved. This program and the accompanying materials
5
are made available under the terms of the Eclipse Public License 2.0
6
and Eclipse Distribution License v1.0 which accompany this distribution.
7
8
The Eclipse Public License is available at
9
   https://www.eclipse.org/legal/epl-2.0/
10
and the Eclipse Distribution License is available at
11
  http://www.eclipse.org/org/documents/edl-v10.php.
12
13
SPDX-License-Identifier: EPL-2.0 OR BSD-3-Clause
14
15
Contributors:
16
   Roger Light - initial implementation.
17
*/
18
19
#include "config.h"
20
21
#include <stdio.h>
22
#include "mosquitto.h"
23
24
25
BROKER_EXPORT int mosquitto_validate_utf8(const char *str, int len)
26
7.21M
{
27
7.21M
  int i;
28
7.21M
  int j;
29
7.21M
  int codelen;
30
7.21M
  int codepoint;
31
7.21M
  const unsigned char *ustr = (const unsigned char *)str;
32
33
7.21M
  if(!str){
34
111k
    return MOSQ_ERR_INVAL;
35
111k
  }
36
7.10M
  if(len < 0 || len > 65536){
37
10
    return MOSQ_ERR_INVAL;
38
10
  }
39
40
22.7M
  for(i=0; i<len; i++){
41
15.6M
    if(ustr[i] == 0){
42
751
      return MOSQ_ERR_MALFORMED_UTF8;
43
15.6M
    }else if(ustr[i] <= 0x7f){
44
15.6M
      codelen = 1;
45
15.6M
      codepoint = ustr[i];
46
15.6M
    }else if((ustr[i] & 0xE0) == 0xC0){
47
      /* 110xxxxx - 2 byte sequence */
48
6.07k
      if(ustr[i] == 0xC0 || ustr[i] == 0xC1){
49
        /* Invalid bytes */
50
492
        return MOSQ_ERR_MALFORMED_UTF8;
51
492
      }
52
5.58k
      codelen = 2;
53
5.58k
      codepoint = (ustr[i] & 0x1F);
54
14.5k
    }else if((ustr[i] & 0xF0) == 0xE0){
55
      /* 1110xxxx - 3 byte sequence */
56
6.66k
      codelen = 3;
57
6.66k
      codepoint = (ustr[i] & 0x0F);
58
7.92k
    }else if((ustr[i] & 0xF8) == 0xF0){
59
      /* 11110xxx - 4 byte sequence */
60
7.21k
      if(ustr[i] > 0xF4){
61
        /* Invalid, this would produce values > 0x10FFFF. */
62
219
        return MOSQ_ERR_MALFORMED_UTF8;
63
219
      }
64
6.99k
      codelen = 4;
65
6.99k
      codepoint = (ustr[i] & 0x07);
66
6.99k
    }else{
67
      /* Unexpected continuation byte. */
68
709
      return MOSQ_ERR_MALFORMED_UTF8;
69
709
    }
70
71
    /* Reconstruct full code point */
72
15.6M
    if(i >= len-codelen+1){
73
      /* Not enough data */
74
838
      return MOSQ_ERR_MALFORMED_UTF8;
75
838
    }
76
15.6M
    for(j=0; j<codelen-1; j++){
77
38.1k
      if((ustr[++i] & 0xC0) != 0x80){
78
        /* Not a continuation byte */
79
496
        return MOSQ_ERR_MALFORMED_UTF8;
80
496
      }
81
37.6k
      codepoint = (codepoint<<6) | (ustr[i] & 0x3F);
82
37.6k
    }
83
84
    /* Check for UTF-16 high/low surrogates */
85
15.6M
    if(codepoint >= 0xD800 && codepoint <= 0xDFFF){
86
302
      return MOSQ_ERR_MALFORMED_UTF8;
87
302
    }
88
89
    /* Check for overlong or out of range encodings */
90
    /* Checking codelen == 2 isn't necessary here, because it is already
91
     * covered above in the C0 and C1 checks.
92
     * if(codelen == 2 && codepoint < 0x0080){
93
     *   return MOSQ_ERR_MALFORMED_UTF8;
94
     * }else
95
    */
96
15.6M
    if(codelen == 3 && codepoint < 0x0800){
97
304
      return MOSQ_ERR_MALFORMED_UTF8;
98
15.6M
    }else if(codelen == 4 && (codepoint < 0x10000 || codepoint > 0x10FFFF)){
99
1.00k
      return MOSQ_ERR_MALFORMED_UTF8;
100
1.00k
    }
101
102
    /* Check for non-characters */
103
15.6M
    if(codepoint >= 0xFDD0 && codepoint <= 0xFDEF){
104
242
      return MOSQ_ERR_MALFORMED_UTF8;
105
242
    }
106
15.6M
    if((codepoint & 0xFFFF) == 0xFFFE || (codepoint & 0xFFFF) == 0xFFFF){
107
317
      return MOSQ_ERR_MALFORMED_UTF8;
108
317
    }
109
    /* Check for control characters */
110
15.6M
    if(codepoint <= 0x001F || (codepoint >= 0x007F && codepoint <= 0x009F)){
111
684
      return MOSQ_ERR_MALFORMED_UTF8;
112
684
    }
113
15.6M
  }
114
7.09M
  return MOSQ_ERR_SUCCESS;
115
7.10M
}