/proc/self/cwd/external/antlr4-cpp-runtime~/runtime/src/ANTLRInputStream.cpp

Source
/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
 * Use of this file is governed by the BSD 3-clause license that
 * can be found in the LICENSE.txt file in the project root.
 */

#include <string.h>

#include "Exceptions.h"
#include "misc/Interval.h"
#include "IntStream.h"

#include "support/Utf8.h"
#include "support/CPPUtils.h"

#include "ANTLRInputStream.h"

using namespace antlr4;
using namespace antlrcpp;

using misc::Interval;

ANTLRInputStream::ANTLRInputStream() {
  InitializeInstanceFields();
}

ANTLRInputStream::ANTLRInputStream(std::string_view input): ANTLRInputStream() {
  load(input.data(), input.length());
}

ANTLRInputStream::ANTLRInputStream(const char *data, size_t length) {
  load(data, length);
}

ANTLRInputStream::ANTLRInputStream(std::istream &stream): ANTLRInputStream() {
  load(stream);
}

void ANTLRInputStream::load(const std::string &input, bool lenient) {
  load(input.data(), input.size(), lenient);
}

void ANTLRInputStream::load(const char *data, size_t length, bool lenient) {
  // Remove the UTF-8 BOM if present.
  const char *bom = "\xef\xbb\xbf";
  if (length >= 3 && strncmp(data, bom, 3) == 0) {
    data += 3;
    length -= 3;
  }
  if (lenient) {
    _data = Utf8::lenientDecode(std::string_view(data, length));
  } else {
    auto maybe_utf32 = Utf8::strictDecode(std::string_view(data, length));
    if (!maybe_utf32.has_value()) {
      throw IllegalArgumentException("UTF-8 string contains an illegal byte sequence");
    }
    _data = std::move(maybe_utf32).value();
  }
  p = 0;
}

void ANTLRInputStream::load(std::istream &stream, bool lenient) {
  if (!stream.good() || stream.eof()) // No fail, bad or EOF.
    return;

  _data.clear();

  std::string s((std::istreambuf_iterator<char>(stream)), std::istreambuf_iterator<char>());
  load(s.data(), s.length(), lenient);
}

void ANTLRInputStream::reset() {
  p = 0;
}

void ANTLRInputStream::consume() {
  if (p >= _data.size()) {
    assert(LA(1) == IntStream::EOF);
    throw IllegalStateException("cannot consume EOF");
  }

  if (p < _data.size()) {
    p++;
  }
}

size_t ANTLRInputStream::LA(ssize_t i) {
  if (i == 0) {
    return 0; // undefined
  }

  ssize_t position = static_cast<ssize_t>(p);
  if (i < 0) {
    i++; // e.g., translate LA(-1) to use offset i=0; then _data[p+0-1]
    if ((position + i - 1) < 0) {
      return IntStream::EOF; // invalid; no char before first char
    }
  }

  if ((position + i - 1) >= static_cast<ssize_t>(_data.size())) {
    return IntStream::EOF;
  }

  return _data[static_cast<size_t>((position + i - 1))];
}

size_t ANTLRInputStream::LT(ssize_t i) {
  return LA(i);
}

size_t ANTLRInputStream::index() {
  return p;
}

size_t ANTLRInputStream::size() {
  return _data.size();
}

// Mark/release do nothing. We have entire buffer.
ssize_t ANTLRInputStream::mark() {
  return -1;
}

void ANTLRInputStream::release(ssize_t /* marker */) {
}

void ANTLRInputStream::seek(size_t index) {
  if (index <= p) {
    p = index; // just jump; don't update stream state (line, ...)
    return;
  }
  // seek forward, consume until p hits index or n (whichever comes first)
  index = std::min(index, _data.size());
  while (p < index) {
    consume();
  }
}

std::string ANTLRInputStream::getText(const Interval &interval) {
  if (interval.a < 0 || interval.b < 0) {
    return "";
  }

  size_t start = static_cast<size_t>(interval.a);
  size_t stop = static_cast<size_t>(interval.b);


  if (stop >= _data.size()) {
    stop = _data.size() - 1;
  }

  size_t count = stop - start + 1;
  if (start >= _data.size()) {
    return "";
  }

  auto maybeUtf8 = Utf8::strictEncode(std::u32string_view(_data).substr(start, count));
  if (!maybeUtf8.has_value()) {
    throw IllegalArgumentException("Input stream contains invalid Unicode code points");
  }
  return std::move(maybeUtf8).value();
}

std::string ANTLRInputStream::getSourceName() const {
  if (name.empty()) {
    return IntStream::UNKNOWN_SOURCE_NAME;
  }
  return name;
}

std::string ANTLRInputStream::toString() const {
  auto maybeUtf8 = Utf8::strictEncode(_data);
  if (!maybeUtf8.has_value()) {
    throw IllegalArgumentException("Input stream contains invalid Unicode code points");
  }
  return std::move(maybeUtf8).value();
}

void ANTLRInputStream::InitializeInstanceFields() {
  p = 0;
}

Line	Count	Source
1		/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
2		* Use of this file is governed by the BSD 3-clause license that
3		* can be found in the LICENSE.txt file in the project root.
4		*/
5
6		#include <string.h>
7
8		#include "Exceptions.h"
9		#include "misc/Interval.h"
10		#include "IntStream.h"
11
12		#include "support/Utf8.h"
13		#include "support/CPPUtils.h"
14
15		#include "ANTLRInputStream.h"
16
17		using namespace antlr4;
18		using namespace antlrcpp;
19
20		using misc::Interval;
21
22	0	ANTLRInputStream::ANTLRInputStream() {
23	0	InitializeInstanceFields();
24	0	}
25
26	0	ANTLRInputStream::ANTLRInputStream(std::string_view input): ANTLRInputStream() {
27	0	load(input.data(), input.length());
28	0	}
29
30	0	ANTLRInputStream::ANTLRInputStream(const char *data, size_t length) {
31	0	load(data, length);
32	0	}
33
34	0	ANTLRInputStream::ANTLRInputStream(std::istream &stream): ANTLRInputStream() {
35	0	load(stream);
36	0	}
37
38	0	void ANTLRInputStream::load(const std::string &input, bool lenient) {
39	0	load(input.data(), input.size(), lenient);
40	0	}
41
42	0	void ANTLRInputStream::load(const char *data, size_t length, bool lenient) {
43		// Remove the UTF-8 BOM if present.
44	0	const char *bom = "\xef\xbb\xbf";
45	0	if (length >= 3 && strncmp(data, bom, 3) == 0) {
46	0	data += 3;
47	0	length -= 3;
48	0	}
49	0	if (lenient) {
50	0	_data = Utf8::lenientDecode(std::string_view(data, length));
51	0	} else {
52	0	auto maybe_utf32 = Utf8::strictDecode(std::string_view(data, length));
53	0	if (!maybe_utf32.has_value()) {
54	0	throw IllegalArgumentException("UTF-8 string contains an illegal byte sequence");
55	0	}
56	0	_data = std::move(maybe_utf32).value();
57	0	}
58	0	p = 0;
59	0	}
60
61	0	void ANTLRInputStream::load(std::istream &stream, bool lenient) {
62	0	if (!stream.good() \|\| stream.eof()) // No fail, bad or EOF.
63	0	return;
64
65	0	_data.clear();
66
67	0	std::string s((std::istreambuf_iterator<char>(stream)), std::istreambuf_iterator<char>());
68	0	load(s.data(), s.length(), lenient);
69	0	}
70
71	0	void ANTLRInputStream::reset() {
72	0	p = 0;
73	0	}
74
75	0	void ANTLRInputStream::consume() {
76	0	if (p >= _data.size()) {
77	0	assert(LA(1) == IntStream::EOF);
78	0	throw IllegalStateException("cannot consume EOF");
79	0	}
80
81	0	if (p < _data.size()) {
82	0	p++;
83	0	}
84	0	}
85
86	0	size_t ANTLRInputStream::LA(ssize_t i) {
87	0	if (i == 0) {
88	0	return 0; // undefined
89	0	}
90
91	0	ssize_t position = static_cast<ssize_t>(p);
92	0	if (i < 0) {
93	0	i++; // e.g., translate LA(-1) to use offset i=0; then _data[p+0-1]
94	0	if ((position + i - 1) < 0) {
95	0	return IntStream::EOF; // invalid; no char before first char
96	0	}
97	0	}
98
99	0	if ((position + i - 1) >= static_cast<ssize_t>(_data.size())) {
100	0	return IntStream::EOF;
101	0	}
102
103	0	return _data[static_cast<size_t>((position + i - 1))];
104	0	}
105
106	0	size_t ANTLRInputStream::LT(ssize_t i) {
107	0	return LA(i);
108	0	}
109
110	0	size_t ANTLRInputStream::index() {
111	0	return p;
112	0	}
113
114	0	size_t ANTLRInputStream::size() {
115	0	return _data.size();
116	0	}
117
118		// Mark/release do nothing. We have entire buffer.
119	0	ssize_t ANTLRInputStream::mark() {
120	0	return -1;
121	0	}
122
123	0	void ANTLRInputStream::release(ssize_t /* marker */) {
124	0	}
125
126	0	void ANTLRInputStream::seek(size_t index) {
127	0	if (index <= p) {
128	0	p = index; // just jump; don't update stream state (line, ...)
129	0	return;
130	0	}
131		// seek forward, consume until p hits index or n (whichever comes first)
132	0	index = std::min(index, _data.size());
133	0	while (p < index) {
134	0	consume();
135	0	}
136	0	}
137
138	0	std::string ANTLRInputStream::getText(const Interval &interval) {
139	0	if (interval.a < 0 \|\| interval.b < 0) {
140	0	return "";
141	0	}
142
143	0	size_t start = static_cast<size_t>(interval.a);
144	0	size_t stop = static_cast<size_t>(interval.b);
145
146
147	0	if (stop >= _data.size()) {
148	0	stop = _data.size() - 1;
149	0	}
150
151	0	size_t count = stop - start + 1;
152	0	if (start >= _data.size()) {
153	0	return "";
154	0	}
155
156	0	auto maybeUtf8 = Utf8::strictEncode(std::u32string_view(_data).substr(start, count));
157	0	if (!maybeUtf8.has_value()) {
158	0	throw IllegalArgumentException("Input stream contains invalid Unicode code points");
159	0	}
160	0	return std::move(maybeUtf8).value();
161	0	}
162
163	0	std::string ANTLRInputStream::getSourceName() const {
164	0	if (name.empty()) {
165	0	return IntStream::UNKNOWN_SOURCE_NAME;
166	0	}
167	0	return name;
168	0	}
169
170	0	std::string ANTLRInputStream::toString() const {
171	0	auto maybeUtf8 = Utf8::strictEncode(_data);
172	0	if (!maybeUtf8.has_value()) {
173	0	throw IllegalArgumentException("Input stream contains invalid Unicode code points");
174	0	}
175	0	return std::move(maybeUtf8).value();
176	0	}
177
178	0	void ANTLRInputStream::InitializeInstanceFields() {
179	0	p = 0;
180	0	}

Coverage Report

Created: 2025-11-29 07:01