/src/CMake/Source/cmXMLSafe.cxx
Line | Count | Source |
1 | | /* Distributed under the OSI-approved BSD 3-Clause License. See accompanying |
2 | | file LICENSE.rst or https://cmake.org/licensing for details. */ |
3 | | #include "cmXMLSafe.h" |
4 | | |
5 | | #include <cstdio> |
6 | | #include <cstring> |
7 | | #include <sstream> |
8 | | |
9 | | #include "cm_utf8.h" |
10 | | |
11 | | cmXMLSafe::cmXMLSafe(char const* s) |
12 | 0 | : Data(s) |
13 | 0 | , Size(static_cast<unsigned long>(strlen(s))) |
14 | 0 | , DoQuotes(true) |
15 | 0 | { |
16 | 0 | } |
17 | | |
18 | | cmXMLSafe::cmXMLSafe(std::string const& s) |
19 | 0 | : Data(s.c_str()) |
20 | 0 | , Size(static_cast<unsigned long>(s.length())) |
21 | 0 | , DoQuotes(true) |
22 | 0 | { |
23 | 0 | } |
24 | | |
25 | | cmXMLSafe& cmXMLSafe::Quotes(bool b) |
26 | 0 | { |
27 | 0 | this->DoQuotes = b; |
28 | 0 | return *this; |
29 | 0 | } |
30 | | |
31 | | std::string cmXMLSafe::str() const |
32 | 0 | { |
33 | 0 | std::ostringstream ss; |
34 | 0 | ss << *this; |
35 | 0 | return ss.str(); |
36 | 0 | } |
37 | | |
38 | | std::ostream& operator<<(std::ostream& os, cmXMLSafe const& self) |
39 | 0 | { |
40 | 0 | char const* first = self.Data; |
41 | 0 | char const* last = self.Data + self.Size; |
42 | 0 | while (first != last) { |
43 | 0 | unsigned int ch; |
44 | 0 | if (char const* next = cm_utf8_decode_character(first, last, &ch)) { |
45 | | // http://www.w3.org/TR/REC-xml/#NT-Char |
46 | 0 | if ((ch >= 0x20 && ch <= 0xD7FF) || (ch >= 0xE000 && ch <= 0xFFFD) || |
47 | 0 | (ch >= 0x10000 && ch <= 0x10FFFF) || ch == 0x9 || ch == 0xA || |
48 | 0 | ch == 0xD) { |
49 | 0 | switch (ch) { |
50 | | // Escape XML control characters. |
51 | 0 | case '&': |
52 | 0 | os << "&"; |
53 | 0 | break; |
54 | 0 | case '<': |
55 | 0 | os << "<"; |
56 | 0 | break; |
57 | 0 | case '>': |
58 | 0 | os << ">"; |
59 | 0 | break; |
60 | 0 | case '"': |
61 | 0 | os << (self.DoQuotes ? """ : "\""); |
62 | 0 | break; |
63 | 0 | case '\'': |
64 | 0 | os << (self.DoQuotes ? "'" : "'"); |
65 | 0 | break; |
66 | 0 | case '\r': |
67 | 0 | break; // Ignore CR |
68 | | // Print the UTF-8 character. |
69 | 0 | default: |
70 | 0 | os.write(first, next - first); |
71 | 0 | break; |
72 | 0 | } |
73 | 0 | } else { |
74 | | // Use a human-readable hex value for this invalid character. |
75 | 0 | char buf[16]; |
76 | 0 | snprintf(buf, sizeof(buf), "%X", ch); |
77 | 0 | os << "[NON-XML-CHAR-0x" << buf << "]"; |
78 | 0 | } |
79 | | |
80 | 0 | first = next; |
81 | 0 | } else { |
82 | 0 | ch = static_cast<unsigned char>(*first++); |
83 | | // Use a human-readable hex value for this invalid byte. |
84 | 0 | char buf[16]; |
85 | 0 | snprintf(buf, sizeof(buf), "%X", ch); |
86 | 0 | os << "[NON-UTF-8-BYTE-0x" << buf << "]"; |
87 | 0 | } |
88 | 0 | } |
89 | 0 | return os; |
90 | 0 | } |