Coverage Report

Created: 2023-09-19 06:58

/src/mosquitto/lib/utf8_mosq.c
Line
Count
Source
1
/*
2
Copyright (c) 2016-2021 Roger Light <roger@atchoo.org>
3
4
All rights reserved. This program and the accompanying materials
5
are made available under the terms of the Eclipse Public License 2.0
6
and Eclipse Distribution License v1.0 which accompany this distribution.
7
8
The Eclipse Public License is available at
9
   https://www.eclipse.org/legal/epl-2.0/
10
and the Eclipse Distribution License is available at
11
  http://www.eclipse.org/org/documents/edl-v10.php.
12
13
SPDX-License-Identifier: EPL-2.0 OR BSD-3-Clause
14
15
Contributors:
16
   Roger Light - initial implementation.
17
*/
18
19
#include "config.h"
20
21
#include <stdio.h>
22
#include "mosquitto.h"
23
24
BROKER_EXPORT int mosquitto_validate_utf8(const char *str, int len)
25
2.02M
{
26
2.02M
  int i;
27
2.02M
  int j;
28
2.02M
  int codelen;
29
2.02M
  int codepoint;
30
2.02M
  const unsigned char *ustr = (const unsigned char *)str;
31
32
2.02M
  if(!str) return MOSQ_ERR_INVAL;
33
1.98M
  if(len < 0 || len > 65536) return MOSQ_ERR_INVAL;
34
35
7.08M
  for(i=0; i<len; i++){
36
5.10M
    if(ustr[i] == 0){
37
935
      return MOSQ_ERR_MALFORMED_UTF8;
38
5.10M
    }else if(ustr[i] <= 0x7f){
39
5.09M
      codelen = 1;
40
5.09M
      codepoint = ustr[i];
41
5.09M
    }else if((ustr[i] & 0xE0) == 0xC0){
42
      /* 110xxxxx - 2 byte sequence */
43
2.88k
      if(ustr[i] == 0xC0 || ustr[i] == 0xC1){
44
        /* Invalid bytes */
45
484
        return MOSQ_ERR_MALFORMED_UTF8;
46
484
      }
47
2.40k
      codelen = 2;
48
2.40k
      codepoint = (ustr[i] & 0x1F);
49
10.2k
    }else if((ustr[i] & 0xF0) == 0xE0){
50
      /* 1110xxxx - 3 byte sequence */
51
4.16k
      codelen = 3;
52
4.16k
      codepoint = (ustr[i] & 0x0F);
53
6.04k
    }else if((ustr[i] & 0xF8) == 0xF0){
54
      /* 11110xxx - 4 byte sequence */
55
5.18k
      if(ustr[i] > 0xF4){
56
        /* Invalid, this would produce values > 0x10FFFF. */
57
268
        return MOSQ_ERR_MALFORMED_UTF8;
58
268
      }
59
4.91k
      codelen = 4;
60
4.91k
      codepoint = (ustr[i] & 0x07);
61
4.91k
    }else{
62
      /* Unexpected continuation byte. */
63
863
      return MOSQ_ERR_MALFORMED_UTF8;
64
863
    }
65
66
    /* Reconstruct full code point */
67
5.10M
    if(i >= len-codelen+1){
68
      /* Not enough data */
69
788
      return MOSQ_ERR_MALFORMED_UTF8;
70
788
    }
71
5.13M
    for(j=0; j<codelen-1; j++){
72
24.2k
      if((ustr[++i] & 0xC0) != 0x80){
73
        /* Not a continuation byte */
74
494
        return MOSQ_ERR_MALFORMED_UTF8;
75
494
      }
76
23.7k
      codepoint = (codepoint<<6) | (ustr[i] & 0x3F);
77
23.7k
    }
78
79
    /* Check for UTF-16 high/low surrogates */
80
5.10M
    if(codepoint >= 0xD800 && codepoint <= 0xDFFF){
81
274
      return MOSQ_ERR_MALFORMED_UTF8;
82
274
    }
83
84
    /* Check for overlong or out of range encodings */
85
    /* Checking codelen == 2 isn't necessary here, because it is already
86
     * covered above in the C0 and C1 checks.
87
     * if(codelen == 2 && codepoint < 0x0080){
88
     *   return MOSQ_ERR_MALFORMED_UTF8;
89
     * }else
90
    */
91
5.10M
    if(codelen == 3 && codepoint < 0x0800){
92
254
      return MOSQ_ERR_MALFORMED_UTF8;
93
5.10M
    }else if(codelen == 4 && (codepoint < 0x10000 || codepoint > 0x10FFFF)){
94
1.35k
      return MOSQ_ERR_MALFORMED_UTF8;
95
1.35k
    }
96
97
    /* Check for non-characters */
98
5.10M
    if(codepoint >= 0xFDD0 && codepoint <= 0xFDEF){
99
228
      return MOSQ_ERR_MALFORMED_UTF8;
100
228
    }
101
5.10M
    if((codepoint & 0xFFFF) == 0xFFFE || (codepoint & 0xFFFF) == 0xFFFF){
102
412
      return MOSQ_ERR_MALFORMED_UTF8;
103
412
    }
104
    /* Check for control characters */
105
5.10M
    if(codepoint <= 0x001F || (codepoint >= 0x007F && codepoint <= 0x009F)){
106
672
      return MOSQ_ERR_MALFORMED_UTF8;
107
672
    }
108
5.10M
  }
109
1.97M
  return MOSQ_ERR_SUCCESS;
110
1.98M
}