Files
mysysy/antlr/antlr4-runtime-4.13.2/runtime/src/ANTLRInputStream.cpp
2025-02-27 23:14:53 +08:00

181 lines
4.2 KiB
C++
Executable File

/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
* Use of this file is governed by the BSD 3-clause license that
* can be found in the LICENSE.txt file in the project root.
*/
#include <string.h>
#include "Exceptions.h"
#include "misc/Interval.h"
#include "IntStream.h"
#include "support/Utf8.h"
#include "support/CPPUtils.h"
#include "ANTLRInputStream.h"
using namespace antlr4;
using namespace antlrcpp;
using misc::Interval;
ANTLRInputStream::ANTLRInputStream() {
InitializeInstanceFields();
}
ANTLRInputStream::ANTLRInputStream(std::string_view input): ANTLRInputStream() {
load(input.data(), input.length());
}
ANTLRInputStream::ANTLRInputStream(const char *data, size_t length) {
load(data, length);
}
ANTLRInputStream::ANTLRInputStream(std::istream &stream): ANTLRInputStream() {
load(stream);
}
void ANTLRInputStream::load(const std::string &input, bool lenient) {
load(input.data(), input.size(), lenient);
}
void ANTLRInputStream::load(const char *data, size_t length, bool lenient) {
// Remove the UTF-8 BOM if present.
const char *bom = "\xef\xbb\xbf";
if (length >= 3 && strncmp(data, bom, 3) == 0) {
data += 3;
length -= 3;
}
if (lenient) {
_data = Utf8::lenientDecode(std::string_view(data, length));
} else {
auto maybe_utf32 = Utf8::strictDecode(std::string_view(data, length));
if (!maybe_utf32.has_value()) {
throw IllegalArgumentException("UTF-8 string contains an illegal byte sequence");
}
_data = std::move(maybe_utf32).value();
}
p = 0;
}
void ANTLRInputStream::load(std::istream &stream, bool lenient) {
if (!stream.good() || stream.eof()) // No fail, bad or EOF.
return;
_data.clear();
std::string s((std::istreambuf_iterator<char>(stream)), std::istreambuf_iterator<char>());
load(s.data(), s.length(), lenient);
}
void ANTLRInputStream::reset() {
p = 0;
}
void ANTLRInputStream::consume() {
if (p >= _data.size()) {
assert(LA(1) == IntStream::EOF);
throw IllegalStateException("cannot consume EOF");
}
if (p < _data.size()) {
p++;
}
}
size_t ANTLRInputStream::LA(ssize_t i) {
if (i == 0) {
return 0; // undefined
}
ssize_t position = static_cast<ssize_t>(p);
if (i < 0) {
i++; // e.g., translate LA(-1) to use offset i=0; then _data[p+0-1]
if ((position + i - 1) < 0) {
return IntStream::EOF; // invalid; no char before first char
}
}
if ((position + i - 1) >= static_cast<ssize_t>(_data.size())) {
return IntStream::EOF;
}
return _data[static_cast<size_t>((position + i - 1))];
}
size_t ANTLRInputStream::LT(ssize_t i) {
return LA(i);
}
size_t ANTLRInputStream::index() {
return p;
}
size_t ANTLRInputStream::size() {
return _data.size();
}
// Mark/release do nothing. We have entire buffer.
ssize_t ANTLRInputStream::mark() {
return -1;
}
void ANTLRInputStream::release(ssize_t /* marker */) {
}
void ANTLRInputStream::seek(size_t index) {
if (index <= p) {
p = index; // just jump; don't update stream state (line, ...)
return;
}
// seek forward, consume until p hits index or n (whichever comes first)
index = std::min(index, _data.size());
while (p < index) {
consume();
}
}
std::string ANTLRInputStream::getText(const Interval &interval) {
if (interval.a < 0 || interval.b < 0) {
return "";
}
size_t start = static_cast<size_t>(interval.a);
size_t stop = static_cast<size_t>(interval.b);
if (stop >= _data.size()) {
stop = _data.size() - 1;
}
size_t count = stop - start + 1;
if (start >= _data.size()) {
return "";
}
auto maybeUtf8 = Utf8::strictEncode(std::u32string_view(_data).substr(start, count));
if (!maybeUtf8.has_value()) {
throw IllegalArgumentException("Input stream contains invalid Unicode code points");
}
return std::move(maybeUtf8).value();
}
std::string ANTLRInputStream::getSourceName() const {
if (name.empty()) {
return IntStream::UNKNOWN_SOURCE_NAME;
}
return name;
}
std::string ANTLRInputStream::toString() const {
auto maybeUtf8 = Utf8::strictEncode(_data);
if (!maybeUtf8.has_value()) {
throw IllegalArgumentException("Input stream contains invalid Unicode code points");
}
return std::move(maybeUtf8).value();
}
void ANTLRInputStream::InitializeInstanceFields() {
p = 0;
}