415 lines
9.7 KiB
C++
Executable File
415 lines
9.7 KiB
C++
Executable File
/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
|
|
* Use of this file is governed by the BSD 3-clause license that
|
|
* can be found in the LICENSE.txt file in the project root.
|
|
*/
|
|
|
|
#include "WritableToken.h"
|
|
#include "Lexer.h"
|
|
#include "RuleContext.h"
|
|
#include "misc/Interval.h"
|
|
#include "Exceptions.h"
|
|
#include "support/CPPUtils.h"
|
|
|
|
#include "BufferedTokenStream.h"
|
|
|
|
using namespace antlr4;
|
|
using namespace antlrcpp;
|
|
|
|
BufferedTokenStream::BufferedTokenStream(TokenSource *tokenSource) : _tokenSource(tokenSource){
|
|
InitializeInstanceFields();
|
|
}
|
|
|
|
TokenSource* BufferedTokenStream::getTokenSource() const {
|
|
return _tokenSource;
|
|
}
|
|
|
|
size_t BufferedTokenStream::index() {
|
|
return _p;
|
|
}
|
|
|
|
ssize_t BufferedTokenStream::mark() {
|
|
return 0;
|
|
}
|
|
|
|
void BufferedTokenStream::release(ssize_t /*marker*/) {
|
|
// no resources to release
|
|
}
|
|
|
|
void BufferedTokenStream::reset() {
|
|
seek(0);
|
|
}
|
|
|
|
void BufferedTokenStream::seek(size_t index) {
|
|
lazyInit();
|
|
_p = adjustSeekIndex(index);
|
|
}
|
|
|
|
size_t BufferedTokenStream::size() {
|
|
return _tokens.size();
|
|
}
|
|
|
|
void BufferedTokenStream::consume() {
|
|
bool skipEofCheck = false;
|
|
if (!_needSetup) {
|
|
if (_fetchedEOF) {
|
|
// the last token in tokens is EOF. skip check if p indexes any
|
|
// fetched token except the last.
|
|
skipEofCheck = _p < _tokens.size() - 1;
|
|
} else {
|
|
// no EOF token in tokens. skip check if p indexes a fetched token.
|
|
skipEofCheck = _p < _tokens.size();
|
|
}
|
|
} else {
|
|
// not yet initialized
|
|
skipEofCheck = false;
|
|
}
|
|
|
|
if (!skipEofCheck && LA(1) == Token::EOF) {
|
|
throw IllegalStateException("cannot consume EOF");
|
|
}
|
|
|
|
if (sync(_p + 1)) {
|
|
_p = adjustSeekIndex(_p + 1);
|
|
}
|
|
}
|
|
|
|
bool BufferedTokenStream::sync(size_t i) {
|
|
if (i + 1 < _tokens.size())
|
|
return true;
|
|
size_t n = i - _tokens.size() + 1; // how many more elements we need?
|
|
|
|
if (n > 0) {
|
|
size_t fetched = fetch(n);
|
|
return fetched >= n;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
size_t BufferedTokenStream::fetch(size_t n) {
|
|
if (_fetchedEOF) {
|
|
return 0;
|
|
}
|
|
|
|
size_t i = 0;
|
|
while (i < n) {
|
|
std::unique_ptr<Token> t(_tokenSource->nextToken());
|
|
|
|
if (is<WritableToken *>(t.get())) {
|
|
(static_cast<WritableToken *>(t.get()))->setTokenIndex(_tokens.size());
|
|
}
|
|
|
|
_tokens.push_back(std::move(t));
|
|
++i;
|
|
|
|
if (_tokens.back()->getType() == Token::EOF) {
|
|
_fetchedEOF = true;
|
|
break;
|
|
}
|
|
}
|
|
|
|
return i;
|
|
}
|
|
|
|
Token* BufferedTokenStream::get(size_t i) const {
|
|
if (i >= _tokens.size()) {
|
|
throw IndexOutOfBoundsException(std::string("token index ") +
|
|
std::to_string(i) +
|
|
std::string(" out of range 0..") +
|
|
std::to_string(_tokens.size() - 1));
|
|
}
|
|
return _tokens[i].get();
|
|
}
|
|
|
|
std::vector<Token *> BufferedTokenStream::get(size_t start, size_t stop) {
|
|
std::vector<Token *> subset;
|
|
|
|
lazyInit();
|
|
|
|
if (_tokens.empty()) {
|
|
return subset;
|
|
}
|
|
|
|
if (stop >= _tokens.size()) {
|
|
stop = _tokens.size() - 1;
|
|
}
|
|
for (size_t i = start; i <= stop; i++) {
|
|
Token *t = _tokens[i].get();
|
|
if (t->getType() == Token::EOF) {
|
|
break;
|
|
}
|
|
subset.push_back(t);
|
|
}
|
|
return subset;
|
|
}
|
|
|
|
size_t BufferedTokenStream::LA(ssize_t i) {
|
|
return LT(i)->getType();
|
|
}
|
|
|
|
Token* BufferedTokenStream::LB(size_t k) {
|
|
if (k > _p) {
|
|
return nullptr;
|
|
}
|
|
return _tokens[_p - k].get();
|
|
}
|
|
|
|
Token* BufferedTokenStream::LT(ssize_t k) {
|
|
lazyInit();
|
|
if (k == 0) {
|
|
return nullptr;
|
|
}
|
|
if (k < 0) {
|
|
return LB(-k);
|
|
}
|
|
|
|
size_t i = _p + k - 1;
|
|
sync(i);
|
|
if (i >= _tokens.size()) { // return EOF token
|
|
// EOF must be last token
|
|
return _tokens.back().get();
|
|
}
|
|
|
|
return _tokens[i].get();
|
|
}
|
|
|
|
ssize_t BufferedTokenStream::adjustSeekIndex(size_t i) {
|
|
return i;
|
|
}
|
|
|
|
void BufferedTokenStream::lazyInit() {
|
|
if (_needSetup) {
|
|
setup();
|
|
}
|
|
}
|
|
|
|
void BufferedTokenStream::setup() {
|
|
_needSetup = false;
|
|
sync(0);
|
|
_p = adjustSeekIndex(0);
|
|
}
|
|
|
|
void BufferedTokenStream::setTokenSource(TokenSource *tokenSource) {
|
|
_tokenSource = tokenSource;
|
|
_tokens.clear();
|
|
_fetchedEOF = false;
|
|
_needSetup = true;
|
|
}
|
|
|
|
std::vector<Token *> BufferedTokenStream::getTokens() {
|
|
std::vector<Token *> result;
|
|
for (auto &t : _tokens)
|
|
result.push_back(t.get());
|
|
return result;
|
|
}
|
|
|
|
std::vector<Token *> BufferedTokenStream::getTokens(size_t start, size_t stop) {
|
|
return getTokens(start, stop, std::vector<size_t>());
|
|
}
|
|
|
|
std::vector<Token *> BufferedTokenStream::getTokens(size_t start, size_t stop, const std::vector<size_t> &types) {
|
|
lazyInit();
|
|
if (stop >= _tokens.size() || start >= _tokens.size()) {
|
|
throw IndexOutOfBoundsException(std::string("start ") +
|
|
std::to_string(start) +
|
|
std::string(" or stop ") +
|
|
std::to_string(stop) +
|
|
std::string(" not in 0..") +
|
|
std::to_string(_tokens.size() - 1));
|
|
}
|
|
|
|
std::vector<Token *> filteredTokens;
|
|
|
|
if (start > stop) {
|
|
return filteredTokens;
|
|
}
|
|
|
|
for (size_t i = start; i <= stop; i++) {
|
|
Token *tok = _tokens[i].get();
|
|
|
|
if (types.empty() || std::find(types.begin(), types.end(), tok->getType()) != types.end()) {
|
|
filteredTokens.push_back(tok);
|
|
}
|
|
}
|
|
return filteredTokens;
|
|
}
|
|
|
|
std::vector<Token *> BufferedTokenStream::getTokens(size_t start, size_t stop, size_t ttype) {
|
|
std::vector<size_t> s;
|
|
s.push_back(ttype);
|
|
return getTokens(start, stop, s);
|
|
}
|
|
|
|
ssize_t BufferedTokenStream::nextTokenOnChannel(size_t i, size_t channel) {
|
|
sync(i);
|
|
if (i >= size()) {
|
|
return size() - 1;
|
|
}
|
|
|
|
Token *token = _tokens[i].get();
|
|
while (token->getChannel() != channel) {
|
|
if (token->getType() == Token::EOF) {
|
|
return i;
|
|
}
|
|
i++;
|
|
sync(i);
|
|
token = _tokens[i].get();
|
|
}
|
|
return i;
|
|
}
|
|
|
|
ssize_t BufferedTokenStream::previousTokenOnChannel(size_t i, size_t channel) {
|
|
sync(i);
|
|
if (i >= size()) {
|
|
// the EOF token is on every channel
|
|
return size() - 1;
|
|
}
|
|
|
|
while (true) {
|
|
Token *token = _tokens[i].get();
|
|
if (token->getType() == Token::EOF || token->getChannel() == channel) {
|
|
return i;
|
|
}
|
|
|
|
if (i == 0)
|
|
return -1;
|
|
i--;
|
|
}
|
|
return i;
|
|
}
|
|
|
|
std::vector<Token *> BufferedTokenStream::getHiddenTokensToRight(size_t tokenIndex, ssize_t channel) {
|
|
lazyInit();
|
|
if (tokenIndex >= _tokens.size()) {
|
|
throw IndexOutOfBoundsException(std::to_string(tokenIndex) + " not in 0.." + std::to_string(_tokens.size() - 1));
|
|
}
|
|
|
|
ssize_t nextOnChannel = nextTokenOnChannel(tokenIndex + 1, Lexer::DEFAULT_TOKEN_CHANNEL);
|
|
size_t to;
|
|
size_t from = tokenIndex + 1;
|
|
// if none onchannel to right, nextOnChannel=-1 so set to = last token
|
|
if (nextOnChannel == -1) {
|
|
to = static_cast<ssize_t>(size() - 1);
|
|
} else {
|
|
to = nextOnChannel;
|
|
}
|
|
|
|
return filterForChannel(from, to, channel);
|
|
}
|
|
|
|
std::vector<Token *> BufferedTokenStream::getHiddenTokensToRight(size_t tokenIndex) {
|
|
return getHiddenTokensToRight(tokenIndex, -1);
|
|
}
|
|
|
|
std::vector<Token *> BufferedTokenStream::getHiddenTokensToLeft(size_t tokenIndex, ssize_t channel) {
|
|
lazyInit();
|
|
if (tokenIndex >= _tokens.size()) {
|
|
throw IndexOutOfBoundsException(std::to_string(tokenIndex) + " not in 0.." + std::to_string(_tokens.size() - 1));
|
|
}
|
|
|
|
if (tokenIndex == 0) {
|
|
// Obviously no tokens can appear before the first token.
|
|
return { };
|
|
}
|
|
|
|
ssize_t prevOnChannel = previousTokenOnChannel(tokenIndex - 1, Lexer::DEFAULT_TOKEN_CHANNEL);
|
|
if (prevOnChannel == static_cast<ssize_t>(tokenIndex - 1)) {
|
|
return { };
|
|
}
|
|
// if none onchannel to left, prevOnChannel=-1 then from=0
|
|
size_t from = static_cast<size_t>(prevOnChannel + 1);
|
|
size_t to = tokenIndex - 1;
|
|
|
|
return filterForChannel(from, to, channel);
|
|
}
|
|
|
|
std::vector<Token *> BufferedTokenStream::getHiddenTokensToLeft(size_t tokenIndex) {
|
|
return getHiddenTokensToLeft(tokenIndex, -1);
|
|
}
|
|
|
|
std::vector<Token *> BufferedTokenStream::filterForChannel(size_t from, size_t to, ssize_t channel) {
|
|
std::vector<Token *> hidden;
|
|
for (size_t i = from; i <= to; i++) {
|
|
Token *t = _tokens[i].get();
|
|
if (channel == -1) {
|
|
if (t->getChannel() != Lexer::DEFAULT_TOKEN_CHANNEL) {
|
|
hidden.push_back(t);
|
|
}
|
|
} else {
|
|
if (t->getChannel() == static_cast<size_t>(channel)) {
|
|
hidden.push_back(t);
|
|
}
|
|
}
|
|
}
|
|
|
|
return hidden;
|
|
}
|
|
|
|
bool BufferedTokenStream::isInitialized() const {
|
|
return !_needSetup;
|
|
}
|
|
|
|
/**
|
|
* Get the text of all tokens in this buffer.
|
|
*/
|
|
std::string BufferedTokenStream::getSourceName() const
|
|
{
|
|
return _tokenSource->getSourceName();
|
|
}
|
|
|
|
std::string BufferedTokenStream::getText() {
|
|
fill();
|
|
return getText(misc::Interval(0U, size() - 1));
|
|
}
|
|
|
|
std::string BufferedTokenStream::getText(const misc::Interval &interval) {
|
|
lazyInit();
|
|
size_t start = interval.a;
|
|
size_t stop = interval.b;
|
|
if (start == INVALID_INDEX || stop == INVALID_INDEX) {
|
|
return "";
|
|
}
|
|
sync(stop);
|
|
if (stop >= _tokens.size()) {
|
|
stop = _tokens.size() - 1;
|
|
}
|
|
|
|
std::stringstream ss;
|
|
for (size_t i = start; i <= stop; i++) {
|
|
Token *t = _tokens[i].get();
|
|
if (t->getType() == Token::EOF) {
|
|
break;
|
|
}
|
|
ss << t->getText();
|
|
}
|
|
return ss.str();
|
|
}
|
|
|
|
std::string BufferedTokenStream::getText(RuleContext *ctx) {
|
|
return getText(ctx->getSourceInterval());
|
|
}
|
|
|
|
std::string BufferedTokenStream::getText(Token *start, Token *stop) {
|
|
if (start != nullptr && stop != nullptr) {
|
|
return getText(misc::Interval(start->getTokenIndex(), stop->getTokenIndex()));
|
|
}
|
|
|
|
return "";
|
|
}
|
|
|
|
void BufferedTokenStream::fill() {
|
|
lazyInit();
|
|
const size_t blockSize = 1000;
|
|
while (true) {
|
|
size_t fetched = fetch(blockSize);
|
|
if (fetched < blockSize) {
|
|
return;
|
|
}
|
|
}
|
|
}
|
|
|
|
void BufferedTokenStream::InitializeInstanceFields() {
|
|
_needSetup = true;
|
|
_fetchedEOF = false;
|
|
}
|