/src/libressl/crypto/asn1/a_utf8.c

Source (jump to first uncovered line)
/* $OpenBSD: a_utf8.c,v 1.8 2014/07/11 08:44:47 jsing Exp $ */
/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
 * All rights reserved.
 *
 * This package is an SSL implementation written
 * by Eric Young (eay@cryptsoft.com).
 * The implementation was written so as to conform with Netscapes SSL.
 *
 * This library is free for commercial and non-commercial use as long as
 * the following conditions are aheared to.  The following conditions
 * apply to all code found in this distribution, be it the RC4, RSA,
 * lhash, DES, etc., code; not just the SSL code.  The SSL documentation
 * included with this distribution is covered by the same copyright terms
 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
 *
 * Copyright remains Eric Young's, and as such any Copyright notices in
 * the code are not to be removed.
 * If this package is used in a product, Eric Young should be given attribution
 * as the author of the parts of the library used.
 * This can be in the form of a textual message at program startup or
 * in documentation (online or textual) provided with the package.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. All advertising materials mentioning features or use of this software
 *    must display the following acknowledgement:
 *    "This product includes cryptographic software written by
 *     Eric Young (eay@cryptsoft.com)"
 *    The word 'cryptographic' can be left out if the rouines from the library
 *    being used are not cryptographic related :-).
 * 4. If you include any Windows specific code (or a derivative thereof) from
 *    the apps directory (application code) you must include an acknowledgement:
 *    "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
 *
 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 * The licence and distribution terms for any publically available version or
 * derivative of this code cannot be changed.  i.e. this code cannot simply be
 * copied and put under another distribution licence
 * [including the GNU Public Licence.]
 */

#include <stdio.h>

#include <openssl/asn1.h>

#include "asn1_locl.h"

/* UTF8 utilities */

/*
 * This parses a UTF8 string one character at a time. It is passed a pointer
 * to the string and the length of the string. It sets 'value' to the value of
 * the current character. It returns the number of characters read or a
 * negative error code:
 * -1 = string too short
 * -2 = illegal character
 * -3 = subsequent characters not of the form 10xxxxxx
 * -4 = character encoded incorrectly (not minimal length).
 */

int
UTF8_getc(const unsigned char *str, int len, unsigned long *val)
{
  const unsigned char *p;
  unsigned long value;
  int ret;
  if (len <= 0)
    return 0;
  p = str;

  /* Check syntax and work out the encoded value (if correct) */
  if ((*p & 0x80) == 0) {
    value = *p++ & 0x7f;
    ret = 1;
  } else if ((*p & 0xe0) == 0xc0) {
    if (*p < 0xc2)
      return -2;
    if (len < 2)
      return -1;
    if ((p[1] & 0xc0) != 0x80)
      return -3;
    value = (*p++ & 0x1f) << 6;
    value |= *p++ & 0x3f;
    if (value < 0x80)
      return -4;
    ret = 2;
  } else if ((*p & 0xf0) == 0xe0) {
    if (len < 3)
      return -1;
    if (((p[1] & 0xc0) != 0x80) ||
        ((p[2] & 0xc0) != 0x80))
      return -3;
    value = (*p++ & 0xf) << 12;
    value |= (*p++ & 0x3f) << 6;
    value |= *p++ & 0x3f;
    if (value < 0x800)
      return -4;
    /* surrogate pair code points are not valid */
    if (value >= 0xd800 && value < 0xe000)
      return -2;
    ret = 3;
  } else if ((*p & 0xf8) == 0xf0 && (*p < 0xf5)) {
    if (len < 4)
      return -1;
    if (((p[1] & 0xc0) != 0x80) ||
        ((p[2] & 0xc0) != 0x80) ||
        ((p[3] & 0xc0) != 0x80))
      return -3;
    value = ((unsigned long)(*p++ & 0x7)) << 18;
    value |= (*p++ & 0x3f) << 12;
    value |= (*p++ & 0x3f) << 6;
    value |= *p++ & 0x3f;
    if (value < 0x10000)
      return -4;
    if (value > UNICODE_MAX)
      return -2;
    ret = 4;
  } else
    return -2;
  *val = value;
  return ret;
}

/* This takes a Unicode code point 'value' and writes its UTF-8 encoded form
 * in 'str' where 'str' is a buffer of at least length 'len'.  If 'str'
 * is NULL, then nothing is written and just the return code is determined.

 * Returns less than zero on error:
 *  -1 if 'str' is not NULL and 'len' is too small
 *  -2 if 'value' is an invalid character (surrogate or out-of-range)
 *
 * Otherwise, returns the number of bytes in 'value's encoded form
 * (i.e., the number of bytes written to 'str' when it's not NULL).
 *
 * It will need at most 4 characters.
 */

int
UTF8_putc(unsigned char *str, int len, unsigned long value)
{
  if (value < 0x80) {
    if (str != NULL) {
      if (len < 1)
        return -1;
      str[0] = (unsigned char)value;
    }
    return 1;
  }
  if (value < 0x800) {
    if (str != NULL) {
      if (len < 2)
        return -1;
      str[0] = (unsigned char)(((value >> 6) & 0x1f) | 0xc0);
      str[1] = (unsigned char)((value & 0x3f) | 0x80);
    }
    return 2;
  }
  if (value < 0x10000) {
    if (UNICODE_IS_SURROGATE(value))
      return -2;
    if (str != NULL) {
      if (len < 3)
        return -1;
      str[0] = (unsigned char)(((value >> 12) & 0xf) | 0xe0);
      str[1] = (unsigned char)(((value >> 6) & 0x3f) | 0x80);
      str[2] = (unsigned char)((value & 0x3f) | 0x80);
    }
    return 3;
  }
  if (value <= UNICODE_MAX) {
    if (str != NULL) {
      if (len < 4)
        return -1;
      str[0] = (unsigned char)(((value >> 18) & 0x7) | 0xf0);
      str[1] = (unsigned char)(((value >> 12) & 0x3f) | 0x80);
      str[2] = (unsigned char)(((value >> 6) & 0x3f) | 0x80);
      str[3] = (unsigned char)((value & 0x3f) | 0x80);
    }
    return 4;
  }
  return -2;
}

Coverage Report

Created: 2022-08-24 06:30

Line	Count	Source (jump to first uncovered line)
1		/* $OpenBSD: a_utf8.c,v 1.8 2014/07/11 08:44:47 jsing Exp $ */
2		/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3		* All rights reserved.
4		*
5		* This package is an SSL implementation written
6		* by Eric Young (eay@cryptsoft.com).
7		* The implementation was written so as to conform with Netscapes SSL.
8		*
9		* This library is free for commercial and non-commercial use as long as
10		* the following conditions are aheared to. The following conditions
11		* apply to all code found in this distribution, be it the RC4, RSA,
12		* lhash, DES, etc., code; not just the SSL code. The SSL documentation
13		* included with this distribution is covered by the same copyright terms
14		* except that the holder is Tim Hudson (tjh@cryptsoft.com).
15		*
16		* Copyright remains Eric Young's, and as such any Copyright notices in
17		* the code are not to be removed.
18		* If this package is used in a product, Eric Young should be given attribution
19		* as the author of the parts of the library used.
20		* This can be in the form of a textual message at program startup or
21		* in documentation (online or textual) provided with the package.
22		*
23		* Redistribution and use in source and binary forms, with or without
24		* modification, are permitted provided that the following conditions
25		* are met:
26		* 1. Redistributions of source code must retain the copyright
27		* notice, this list of conditions and the following disclaimer.
28		* 2. Redistributions in binary form must reproduce the above copyright
29		* notice, this list of conditions and the following disclaimer in the
30		* documentation and/or other materials provided with the distribution.
31		* 3. All advertising materials mentioning features or use of this software
32		* must display the following acknowledgement:
33		* "This product includes cryptographic software written by
34		* Eric Young (eay@cryptsoft.com)"
35		* The word 'cryptographic' can be left out if the rouines from the library
36		* being used are not cryptographic related :-).
37		* 4. If you include any Windows specific code (or a derivative thereof) from
38		* the apps directory (application code) you must include an acknowledgement:
39		* "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40		*
41		* THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42		* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43		* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44		* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45		* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46		* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47		* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48		* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49		* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50		* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51		* SUCH DAMAGE.
52		*
53		* The licence and distribution terms for any publically available version or
54		* derivative of this code cannot be changed. i.e. this code cannot simply be
55		* copied and put under another distribution licence
56		* [including the GNU Public Licence.]
57		*/
58
59		#include <stdio.h>
60
61		#include <openssl/asn1.h>
62
63		#include "asn1_locl.h"
64
65		/* UTF8 utilities */
66
67		/*
68		* This parses a UTF8 string one character at a time. It is passed a pointer
69		* to the string and the length of the string. It sets 'value' to the value of
70		* the current character. It returns the number of characters read or a
71		* negative error code:
72		* -1 = string too short
73		* -2 = illegal character
74		* -3 = subsequent characters not of the form 10xxxxxx
75		* -4 = character encoded incorrectly (not minimal length).
76		*/
77
78		int
79		UTF8_getc(const unsigned char str, int len, unsigned long val)
80	0	{
81	0	const unsigned char *p;
82	0	unsigned long value;
83	0	int ret;
84	0	if (len <= 0)
85	0	return 0;
86	0	p = str;
87
88		/* Check syntax and work out the encoded value (if correct) */
89	0	if ((*p & 0x80) == 0) {
90	0	value = *p++ & 0x7f;
91	0	ret = 1;
92	0	} else if ((*p & 0xe0) == 0xc0) {
93	0	if (*p < 0xc2)
94	0	return -2;
95	0	if (len < 2)
96	0	return -1;
97	0	if ((p[1] & 0xc0) != 0x80)
98	0	return -3;
99	0	value = (*p++ & 0x1f) << 6;
100	0	value \|= *p++ & 0x3f;
101	0	if (value < 0x80)
102	0	return -4;
103	0	ret = 2;
104	0	} else if ((*p & 0xf0) == 0xe0) {
105	0	if (len < 3)
106	0	return -1;
107	0	if (((p[1] & 0xc0) != 0x80) \|\|
108	0	((p[2] & 0xc0) != 0x80))
109	0	return -3;
110	0	value = (*p++ & 0xf) << 12;
111	0	value \|= (*p++ & 0x3f) << 6;
112	0	value \|= *p++ & 0x3f;
113	0	if (value < 0x800)
114	0	return -4;
115		/* surrogate pair code points are not valid */
116	0	if (value >= 0xd800 && value < 0xe000)
117	0	return -2;
118	0	ret = 3;
119	0	} else if ((p & 0xf8) == 0xf0 && (p < 0xf5)) {
120	0	if (len < 4)
121	0	return -1;
122	0	if (((p[1] & 0xc0) != 0x80) \|\|
123	0	((p[2] & 0xc0) != 0x80) \|\|
124	0	((p[3] & 0xc0) != 0x80))
125	0	return -3;
126	0	value = ((unsigned long)(*p++ & 0x7)) << 18;
127	0	value \|= (*p++ & 0x3f) << 12;
128	0	value \|= (*p++ & 0x3f) << 6;
129	0	value \|= *p++ & 0x3f;
130	0	if (value < 0x10000)
131	0	return -4;
132	0	if (value > UNICODE_MAX)
133	0	return -2;
134	0	ret = 4;
135	0	} else
136	0	return -2;
137	0	*val = value;
138	0	return ret;
139	0	}
140
141		/* This takes a Unicode code point 'value' and writes its UTF-8 encoded form
142		* in 'str' where 'str' is a buffer of at least length 'len'. If 'str'
143		* is NULL, then nothing is written and just the return code is determined.
144
145		* Returns less than zero on error:
146		* -1 if 'str' is not NULL and 'len' is too small
147		* -2 if 'value' is an invalid character (surrogate or out-of-range)
148		*
149		* Otherwise, returns the number of bytes in 'value's encoded form
150		* (i.e., the number of bytes written to 'str' when it's not NULL).
151		*
152		* It will need at most 4 characters.
153		*/
154
155		int
156		UTF8_putc(unsigned char *str, int len, unsigned long value)
157	0	{
158	0	if (value < 0x80) {
159	0	if (str != NULL) {
160	0	if (len < 1)
161	0	return -1;
162	0	str[0] = (unsigned char)value;
163	0	}
164	0	return 1;
165	0	}
166	0	if (value < 0x800) {
167	0	if (str != NULL) {
168	0	if (len < 2)
169	0	return -1;
170	0	str[0] = (unsigned char)(((value >> 6) & 0x1f) \| 0xc0);
171	0	str[1] = (unsigned char)((value & 0x3f) \| 0x80);
172	0	}
173	0	return 2;
174	0	}
175	0	if (value < 0x10000) {
176	0	if (UNICODE_IS_SURROGATE(value))
177	0	return -2;
178	0	if (str != NULL) {
179	0	if (len < 3)
180	0	return -1;
181	0	str[0] = (unsigned char)(((value >> 12) & 0xf) \| 0xe0);
182	0	str[1] = (unsigned char)(((value >> 6) & 0x3f) \| 0x80);
183	0	str[2] = (unsigned char)((value & 0x3f) \| 0x80);
184	0	}
185	0	return 3;
186	0	}
187	0	if (value <= UNICODE_MAX) {
188	0	if (str != NULL) {
189	0	if (len < 4)
190	0	return -1;
191	0	str[0] = (unsigned char)(((value >> 18) & 0x7) \| 0xf0);
192	0	str[1] = (unsigned char)(((value >> 12) & 0x3f) \| 0x80);
193	0	str[2] = (unsigned char)(((value >> 6) & 0x3f) \| 0x80);
194	0	str[3] = (unsigned char)((value & 0x3f) \| 0x80);
195	0	}
196	0	return 4;
197	0	}
198	0	return -2;
199	0	}