/src/yaml-cpp/src/exp.cpp
Line | Count | Source |
1 | | #include <sstream> |
2 | | |
3 | | #include "exp.h" |
4 | | #include "stream.h" |
5 | | #include "yaml-cpp/exceptions.h" // IWYU pragma: keep |
6 | | |
7 | | namespace YAML { |
8 | | struct Mark; |
9 | | } // namespace YAML |
10 | | |
11 | | namespace YAML { |
12 | | namespace Exp { |
13 | 1.78k | unsigned ParseHex(const std::string& str, const Mark& mark) { |
14 | 1.78k | unsigned value = 0; |
15 | 8.01k | for (char ch : str) { |
16 | 8.01k | int digit = 0; |
17 | 8.01k | if ('a' <= ch && ch <= 'f') |
18 | 2.01k | digit = ch - 'a' + 10; |
19 | 6.00k | else if ('A' <= ch && ch <= 'F') |
20 | 1.74k | digit = ch - 'A' + 10; |
21 | 4.25k | else if ('0' <= ch && ch <= '9') |
22 | 4.17k | digit = ch - '0'; |
23 | 82 | else |
24 | 82 | throw ParserException(mark, ErrorMsg::INVALID_HEX); |
25 | | |
26 | 7.93k | value = (value << 4) + digit; |
27 | 7.93k | } |
28 | | |
29 | 1.70k | return value; |
30 | 1.78k | } |
31 | | |
32 | 4.54k | std::string Str(unsigned ch) { return std::string(1, static_cast<char>(ch)); } |
33 | | |
34 | | // Escape |
35 | | // . Translates the next 'codeLength' characters into a hex number and returns |
36 | | // the result. |
37 | | // . Throws if it's not actually hex. |
38 | 1.78k | std::string Escape(Stream& in, int codeLength) { |
39 | | // grab string |
40 | 1.78k | std::string str; |
41 | 10.0k | for (int i = 0; i < codeLength; i++) |
42 | 8.31k | str += in.get(); |
43 | | |
44 | | // get the value |
45 | 1.78k | unsigned value = ParseHex(str, in.mark()); |
46 | | |
47 | | // legal unicode? |
48 | 1.78k | if ((value >= 0xD800 && value <= 0xDFFF) || value > 0x10FFFF) { |
49 | 49 | std::stringstream msg; |
50 | 49 | msg << ErrorMsg::INVALID_UNICODE << value; |
51 | 49 | throw ParserException(in.mark(), msg.str()); |
52 | 49 | } |
53 | | |
54 | | // now break it up into chars |
55 | 1.73k | if (value <= 0x7F) |
56 | 207 | return Str(value); |
57 | | |
58 | 1.52k | if (value <= 0x7FF) |
59 | 338 | return Str(0xC0 + (value >> 6)) + Str(0x80 + (value & 0x3F)); |
60 | | |
61 | 1.19k | if (value <= 0xFFFF) |
62 | 773 | return Str(0xE0 + (value >> 12)) + Str(0x80 + ((value >> 6) & 0x3F)) + |
63 | 773 | Str(0x80 + (value & 0x3F)); |
64 | | |
65 | 417 | return Str(0xF0 + (value >> 18)) + Str(0x80 + ((value >> 12) & 0x3F)) + |
66 | 417 | Str(0x80 + ((value >> 6) & 0x3F)) + Str(0x80 + (value & 0x3F)); |
67 | 1.19k | } |
68 | | |
69 | | // Escape |
70 | | // . Escapes the sequence starting 'in' (it must begin with a '\' or single |
71 | | // quote) |
72 | | // and returns the result. |
73 | | // . Throws if it's an unknown escape character. |
74 | 38.3k | std::string Escape(Stream& in) { |
75 | | // eat slash |
76 | 38.3k | char escape = in.get(); |
77 | | |
78 | | // switch on escape character |
79 | 38.3k | char ch = in.get(); |
80 | | |
81 | | // first do single quote, since it's easier |
82 | 38.3k | if (escape == '\'' && ch == '\'') |
83 | 590 | return "\'"; |
84 | | |
85 | | // now do the slash (we're not gonna check if it's a slash - you better pass |
86 | | // one!) |
87 | 37.7k | switch (ch) { |
88 | 710 | case '0': |
89 | 710 | return std::string(1, '\x00'); |
90 | 221 | case 'a': |
91 | 221 | return "\x07"; |
92 | 766 | case 'b': |
93 | 766 | return "\x08"; |
94 | 363 | case 't': |
95 | 6.26k | case '\t': |
96 | 6.26k | return "\x09"; |
97 | 5.06k | case 'n': |
98 | 5.06k | return "\x0A"; |
99 | 539 | case 'v': |
100 | 539 | return "\x0B"; |
101 | 250 | case 'f': |
102 | 250 | return "\x0C"; |
103 | 199 | case 'r': |
104 | 199 | return "\x0D"; |
105 | 717 | case 'e': |
106 | 717 | return "\x1B"; |
107 | 561 | case ' ': |
108 | 561 | return R"( )"; |
109 | 4.95k | case '\"': |
110 | 4.95k | return "\""; |
111 | 305 | case '\'': |
112 | 305 | return "\'"; |
113 | 412 | case '\\': |
114 | 412 | return "\\"; |
115 | 213 | case '/': |
116 | 213 | return "/"; |
117 | 339 | case 'N': |
118 | 339 | return "\xC2\x85"; // NEL (U+0085) |
119 | 237 | case '_': |
120 | 237 | return "\xC2\xA0"; // NBSP (U+00A0) |
121 | 9.53k | case 'L': |
122 | 9.53k | return "\xE2\x80\xA8"; // LS (U+2028) |
123 | 4.48k | case 'P': |
124 | 4.48k | return "\xE2\x80\xA9"; // PS (U+2029) |
125 | 523 | case 'x': |
126 | 523 | return Escape(in, 2); |
127 | 705 | case 'u': |
128 | 705 | return Escape(in, 4); |
129 | 556 | case 'U': |
130 | 556 | return Escape(in, 8); |
131 | 37.7k | } |
132 | | |
133 | 213 | std::stringstream msg; |
134 | 213 | throw ParserException(in.mark(), std::string(ErrorMsg::INVALID_ESCAPE) + ch); |
135 | 37.7k | } |
136 | | } // namespace Exp |
137 | | } // namespace YAML |