/proc/self/cwd/external/antlr4-cpp-runtime~/runtime/src/ANTLRInputStream.cpp
Line | Count | Source |
1 | | /* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. |
2 | | * Use of this file is governed by the BSD 3-clause license that |
3 | | * can be found in the LICENSE.txt file in the project root. |
4 | | */ |
5 | | |
6 | | #include <string.h> |
7 | | |
8 | | #include "Exceptions.h" |
9 | | #include "misc/Interval.h" |
10 | | #include "IntStream.h" |
11 | | |
12 | | #include "support/Utf8.h" |
13 | | #include "support/CPPUtils.h" |
14 | | |
15 | | #include "ANTLRInputStream.h" |
16 | | |
17 | | using namespace antlr4; |
18 | | using namespace antlrcpp; |
19 | | |
20 | | using misc::Interval; |
21 | | |
22 | 0 | ANTLRInputStream::ANTLRInputStream() { |
23 | 0 | InitializeInstanceFields(); |
24 | 0 | } |
25 | | |
26 | 0 | ANTLRInputStream::ANTLRInputStream(std::string_view input): ANTLRInputStream() { |
27 | 0 | load(input.data(), input.length()); |
28 | 0 | } |
29 | | |
30 | 0 | ANTLRInputStream::ANTLRInputStream(const char *data, size_t length) { |
31 | 0 | load(data, length); |
32 | 0 | } |
33 | | |
34 | 0 | ANTLRInputStream::ANTLRInputStream(std::istream &stream): ANTLRInputStream() { |
35 | 0 | load(stream); |
36 | 0 | } |
37 | | |
38 | 0 | void ANTLRInputStream::load(const std::string &input, bool lenient) { |
39 | 0 | load(input.data(), input.size(), lenient); |
40 | 0 | } |
41 | | |
42 | 0 | void ANTLRInputStream::load(const char *data, size_t length, bool lenient) { |
43 | | // Remove the UTF-8 BOM if present. |
44 | 0 | const char *bom = "\xef\xbb\xbf"; |
45 | 0 | if (length >= 3 && strncmp(data, bom, 3) == 0) { |
46 | 0 | data += 3; |
47 | 0 | length -= 3; |
48 | 0 | } |
49 | 0 | if (lenient) { |
50 | 0 | _data = Utf8::lenientDecode(std::string_view(data, length)); |
51 | 0 | } else { |
52 | 0 | auto maybe_utf32 = Utf8::strictDecode(std::string_view(data, length)); |
53 | 0 | if (!maybe_utf32.has_value()) { |
54 | 0 | throw IllegalArgumentException("UTF-8 string contains an illegal byte sequence"); |
55 | 0 | } |
56 | 0 | _data = std::move(maybe_utf32).value(); |
57 | 0 | } |
58 | 0 | p = 0; |
59 | 0 | } |
60 | | |
61 | 0 | void ANTLRInputStream::load(std::istream &stream, bool lenient) { |
62 | 0 | if (!stream.good() || stream.eof()) // No fail, bad or EOF. |
63 | 0 | return; |
64 | | |
65 | 0 | _data.clear(); |
66 | |
|
67 | 0 | std::string s((std::istreambuf_iterator<char>(stream)), std::istreambuf_iterator<char>()); |
68 | 0 | load(s.data(), s.length(), lenient); |
69 | 0 | } |
70 | | |
71 | 0 | void ANTLRInputStream::reset() { |
72 | 0 | p = 0; |
73 | 0 | } |
74 | | |
75 | 0 | void ANTLRInputStream::consume() { |
76 | 0 | if (p >= _data.size()) { |
77 | 0 | assert(LA(1) == IntStream::EOF); |
78 | 0 | throw IllegalStateException("cannot consume EOF"); |
79 | 0 | } |
80 | | |
81 | 0 | if (p < _data.size()) { |
82 | 0 | p++; |
83 | 0 | } |
84 | 0 | } |
85 | | |
86 | 0 | size_t ANTLRInputStream::LA(ssize_t i) { |
87 | 0 | if (i == 0) { |
88 | 0 | return 0; // undefined |
89 | 0 | } |
90 | | |
91 | 0 | ssize_t position = static_cast<ssize_t>(p); |
92 | 0 | if (i < 0) { |
93 | 0 | i++; // e.g., translate LA(-1) to use offset i=0; then _data[p+0-1] |
94 | 0 | if ((position + i - 1) < 0) { |
95 | 0 | return IntStream::EOF; // invalid; no char before first char |
96 | 0 | } |
97 | 0 | } |
98 | | |
99 | 0 | if ((position + i - 1) >= static_cast<ssize_t>(_data.size())) { |
100 | 0 | return IntStream::EOF; |
101 | 0 | } |
102 | | |
103 | 0 | return _data[static_cast<size_t>((position + i - 1))]; |
104 | 0 | } |
105 | | |
106 | 0 | size_t ANTLRInputStream::LT(ssize_t i) { |
107 | 0 | return LA(i); |
108 | 0 | } |
109 | | |
110 | 0 | size_t ANTLRInputStream::index() { |
111 | 0 | return p; |
112 | 0 | } |
113 | | |
114 | 0 | size_t ANTLRInputStream::size() { |
115 | 0 | return _data.size(); |
116 | 0 | } |
117 | | |
118 | | // Mark/release do nothing. We have entire buffer. |
119 | 0 | ssize_t ANTLRInputStream::mark() { |
120 | 0 | return -1; |
121 | 0 | } |
122 | | |
123 | 0 | void ANTLRInputStream::release(ssize_t /* marker */) { |
124 | 0 | } |
125 | | |
126 | 0 | void ANTLRInputStream::seek(size_t index) { |
127 | 0 | if (index <= p) { |
128 | 0 | p = index; // just jump; don't update stream state (line, ...) |
129 | 0 | return; |
130 | 0 | } |
131 | | // seek forward, consume until p hits index or n (whichever comes first) |
132 | 0 | index = std::min(index, _data.size()); |
133 | 0 | while (p < index) { |
134 | 0 | consume(); |
135 | 0 | } |
136 | 0 | } |
137 | | |
138 | 0 | std::string ANTLRInputStream::getText(const Interval &interval) { |
139 | 0 | if (interval.a < 0 || interval.b < 0) { |
140 | 0 | return ""; |
141 | 0 | } |
142 | | |
143 | 0 | size_t start = static_cast<size_t>(interval.a); |
144 | 0 | size_t stop = static_cast<size_t>(interval.b); |
145 | | |
146 | |
|
147 | 0 | if (stop >= _data.size()) { |
148 | 0 | stop = _data.size() - 1; |
149 | 0 | } |
150 | |
|
151 | 0 | size_t count = stop - start + 1; |
152 | 0 | if (start >= _data.size()) { |
153 | 0 | return ""; |
154 | 0 | } |
155 | | |
156 | 0 | auto maybeUtf8 = Utf8::strictEncode(std::u32string_view(_data).substr(start, count)); |
157 | 0 | if (!maybeUtf8.has_value()) { |
158 | 0 | throw IllegalArgumentException("Input stream contains invalid Unicode code points"); |
159 | 0 | } |
160 | 0 | return std::move(maybeUtf8).value(); |
161 | 0 | } |
162 | | |
163 | 0 | std::string ANTLRInputStream::getSourceName() const { |
164 | 0 | if (name.empty()) { |
165 | 0 | return IntStream::UNKNOWN_SOURCE_NAME; |
166 | 0 | } |
167 | 0 | return name; |
168 | 0 | } |
169 | | |
170 | 0 | std::string ANTLRInputStream::toString() const { |
171 | 0 | auto maybeUtf8 = Utf8::strictEncode(_data); |
172 | 0 | if (!maybeUtf8.has_value()) { |
173 | 0 | throw IllegalArgumentException("Input stream contains invalid Unicode code points"); |
174 | 0 | } |
175 | 0 | return std::move(maybeUtf8).value(); |
176 | 0 | } |
177 | | |
178 | 0 | void ANTLRInputStream::InitializeInstanceFields() { |
179 | 0 | p = 0; |
180 | 0 | } |