/src/gettext-0.26/gettext-tools/libgettextpo/unistr/u8-strmbtouc.c
Line | Count | Source |
1 | | /* Look at first character in UTF-8 string. |
2 | | Copyright (C) 1999-2000, 2002, 2006-2007, 2009-2025 Free Software |
3 | | Foundation, Inc. |
4 | | Written by Bruno Haible <bruno@clisp.org>, 2002. |
5 | | |
6 | | This file is free software. |
7 | | It is dual-licensed under "the GNU LGPLv3+ or the GNU GPLv2+". |
8 | | You can redistribute it and/or modify it under either |
9 | | - the terms of the GNU Lesser General Public License as published |
10 | | by the Free Software Foundation, either version 3, or (at your |
11 | | option) any later version, or |
12 | | - the terms of the GNU General Public License as published by the |
13 | | Free Software Foundation; either version 2, or (at your option) |
14 | | any later version, or |
15 | | - the same dual license "the GNU LGPLv3+ or the GNU GPLv2+". |
16 | | |
17 | | This file is distributed in the hope that it will be useful, |
18 | | but WITHOUT ANY WARRANTY; without even the implied warranty of |
19 | | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
20 | | Lesser General Public License and the GNU General Public License |
21 | | for more details. |
22 | | |
23 | | You should have received a copy of the GNU Lesser General Public |
24 | | License and of the GNU General Public License along with this |
25 | | program. If not, see <https://www.gnu.org/licenses/>. */ |
26 | | |
27 | | #include <config.h> |
28 | | |
29 | | /* Specification. */ |
30 | | #include "unistr.h" |
31 | | |
32 | | int |
33 | | u8_strmbtouc (ucs4_t *puc, const uint8_t *s) |
34 | 0 | { |
35 | | /* Keep in sync with unistr.h and u8-mbtouc-aux.c. */ |
36 | 0 | uint8_t c = *s; |
37 | |
|
38 | 0 | if (c < 0x80) |
39 | 0 | { |
40 | 0 | *puc = c; |
41 | 0 | return (c != 0 ? 1 : 0); |
42 | 0 | } |
43 | 0 | if (c >= 0xc2) |
44 | 0 | { |
45 | 0 | if (c < 0xe0) |
46 | 0 | { |
47 | 0 | if ((s[1] ^ 0x80) < 0x40) |
48 | 0 | { |
49 | 0 | *puc = ((unsigned int) (c & 0x1f) << 6) |
50 | 0 | | (unsigned int) (s[1] ^ 0x80); |
51 | 0 | return 2; |
52 | 0 | } |
53 | 0 | } |
54 | 0 | else if (c < 0xf0) |
55 | 0 | { |
56 | 0 | if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40 |
57 | 0 | && (c >= 0xe1 || s[1] >= 0xa0) |
58 | 0 | && (c != 0xed || s[1] < 0xa0)) |
59 | 0 | { |
60 | 0 | *puc = ((unsigned int) (c & 0x0f) << 12) |
61 | 0 | | ((unsigned int) (s[1] ^ 0x80) << 6) |
62 | 0 | | (unsigned int) (s[2] ^ 0x80); |
63 | 0 | return 3; |
64 | 0 | } |
65 | 0 | } |
66 | 0 | else if (c <= 0xf4) |
67 | 0 | { |
68 | 0 | if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40 |
69 | 0 | && (s[3] ^ 0x80) < 0x40 |
70 | 0 | && (c >= 0xf1 || s[1] >= 0x90) |
71 | 0 | && (c < 0xf4 || (/* c == 0xf4 && */ s[1] < 0x90))) |
72 | 0 | { |
73 | 0 | *puc = ((unsigned int) (c & 0x07) << 18) |
74 | 0 | | ((unsigned int) (s[1] ^ 0x80) << 12) |
75 | 0 | | ((unsigned int) (s[2] ^ 0x80) << 6) |
76 | 0 | | (unsigned int) (s[3] ^ 0x80); |
77 | 0 | return 4; |
78 | 0 | } |
79 | 0 | } |
80 | 0 | } |
81 | | /* invalid or incomplete multibyte character */ |
82 | 0 | return -1; |
83 | 0 | } |