build(antlr): 引入第三方 ANTLR4 runtime/tool 并接入构建

This commit is contained in:
Lane0218
2025-12-27 17:01:37 +08:00
parent 34b4484709
commit 90dc2ff8de
312 changed files with 29417 additions and 16 deletions

View File

@@ -0,0 +1,159 @@
/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
* Use of this file is governed by the BSD 3-clause license that
* can be found in the LICENSE.txt file in the project root.
*/
#include "atn/LL1Analyzer.h"
#include "Token.h"
#include "atn/RuleTransition.h"
#include "misc/IntervalSet.h"
#include "RuleContext.h"
#include "atn/DecisionState.h"
#include "Recognizer.h"
#include "atn/ATNType.h"
#include "Exceptions.h"
#include "support/CPPUtils.h"
#include "atn/ATN.h"
using namespace antlr4;
using namespace antlr4::atn;
using namespace antlr4::internal;
using namespace antlrcpp;
ATN::ATN() : ATN(ATNType::LEXER, 0) {}
ATN::ATN(ATNType grammarType_, size_t maxTokenType_) : grammarType(grammarType_), maxTokenType(maxTokenType_) {}
ATN::~ATN() {
for (ATNState *state : states) {
delete state;
}
}
misc::IntervalSet ATN::nextTokens(ATNState *s, RuleContext *ctx) const {
LL1Analyzer analyzer(*this);
return analyzer.LOOK(s, ctx);
}
misc::IntervalSet const& ATN::nextTokens(ATNState *s) const {
if (!s->_nextTokenUpdated) {
UniqueLock<Mutex> lock(_mutex);
if (!s->_nextTokenUpdated) {
s->_nextTokenWithinRule = nextTokens(s, nullptr);
s->_nextTokenUpdated = true;
}
}
return s->_nextTokenWithinRule;
}
void ATN::addState(ATNState *state) {
if (state != nullptr) {
//state->atn = this;
state->stateNumber = static_cast<int>(states.size());
}
states.push_back(state);
}
void ATN::removeState(ATNState *state) {
delete states.at(state->stateNumber);// just free mem, don't shift states in list
states.at(state->stateNumber) = nullptr;
}
int ATN::defineDecisionState(DecisionState *s) {
decisionToState.push_back(s);
s->decision = static_cast<int>(decisionToState.size() - 1);
return s->decision;
}
DecisionState *ATN::getDecisionState(size_t decision) const {
if (!decisionToState.empty()) {
return decisionToState[decision];
}
return nullptr;
}
size_t ATN::getNumberOfDecisions() const {
return decisionToState.size();
}
misc::IntervalSet ATN::getExpectedTokens(size_t stateNumber, RuleContext *context) const {
if (stateNumber == ATNState::INVALID_STATE_NUMBER || stateNumber >= states.size()) {
throw IllegalArgumentException("Invalid state number.");
}
RuleContext *ctx = context;
ATNState *s = states.at(stateNumber);
misc::IntervalSet following = nextTokens(s);
if (!following.contains(Token::EPSILON)) {
return following;
}
misc::IntervalSet expected;
expected.addAll(following);
expected.remove(Token::EPSILON);
while (ctx && ctx->invokingState != ATNState::INVALID_STATE_NUMBER && following.contains(Token::EPSILON)) {
ATNState *invokingState = states.at(ctx->invokingState);
const RuleTransition *rt = static_cast<const RuleTransition*>(invokingState->transitions[0].get());
following = nextTokens(rt->followState);
expected.addAll(following);
expected.remove(Token::EPSILON);
if (ctx->parent == nullptr) {
break;
}
ctx = static_cast<RuleContext *>(ctx->parent);
}
if (following.contains(Token::EPSILON)) {
expected.add(Token::EOF);
}
return expected;
}
std::string ATN::toString() const {
std::stringstream ss;
std::string type;
switch (grammarType) {
case ATNType::LEXER:
type = "LEXER ";
break;
case ATNType::PARSER:
type = "PARSER ";
break;
default:
break;
}
ss << "(" << type << "ATN " << std::hex << this << std::dec << ") maxTokenType: " << maxTokenType << std::endl;
ss << "states (" << states.size() << ") {" << std::endl;
size_t index = 0;
for (auto *state : states) {
if (state == nullptr) {
ss << " " << index++ << ": nul" << std::endl;
} else {
std::string text = state->toString();
ss << " " << index++ << ": " << indent(text, " ", false) << std::endl;
}
}
index = 0;
for (auto *state : decisionToState) {
if (state == nullptr) {
ss << " " << index++ << ": nul" << std::endl;
} else {
std::string text = state->toString();
ss << " " << index++ << ": " << indent(text, " ", false) << std::endl;
}
}
ss << "}";
return ss.str();
}

View File

@@ -0,0 +1,133 @@
/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
* Use of this file is governed by the BSD 3-clause license that
* can be found in the LICENSE.txt file in the project root.
*/
#pragma once
#include "RuleContext.h"
#include "internal/Synchronization.h"
// GCC generates a warning when forward-declaring ATN if ATN has already been
// declared due to the attributes added by ANTLR4CPP_PUBLIC.
// See: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=39159
// Add constant that can be checked so forward-declarations can be omitted.
#define ANTLR4CPP_ATN_DECLARED
namespace antlr4 {
namespace atn {
class LexerATNSimulator;
class ParserATNSimulator;
class ANTLR4CPP_PUBLIC ATN {
public:
static constexpr size_t INVALID_ALT_NUMBER = 0;
/// Used for runtime deserialization of ATNs from strings.
ATN();
ATN(ATNType grammarType, size_t maxTokenType);
ATN(const ATN&) = delete;
ATN(ATN&&) = delete;
~ATN();
ATN& operator=(const ATN&) = delete;
ATN& operator=(ATN&&) = delete;
std::vector<ATNState *> states;
/// Each subrule/rule is a decision point and we must track them so we
/// can go back later and build DFA predictors for them. This includes
/// all the rules, subrules, optional blocks, ()+, ()* etc...
std::vector<DecisionState *> decisionToState;
/// Maps from rule index to starting state number.
std::vector<RuleStartState *> ruleToStartState;
/// Maps from rule index to stop state number.
std::vector<RuleStopState *> ruleToStopState;
/// The type of the ATN.
ATNType grammarType;
/// The maximum value for any symbol recognized by a transition in the ATN.
size_t maxTokenType;
/// <summary>
/// For lexer ATNs, this maps the rule index to the resulting token type.
/// For parser ATNs, this maps the rule index to the generated bypass token
/// type if the
/// <seealso cref="ATNDeserializationOptions#isGenerateRuleBypassTransitions"/>
/// deserialization option was specified; otherwise, this is {@code null}.
/// </summary>
std::vector<size_t> ruleToTokenType;
/// For lexer ATNs, this is an array of {@link LexerAction} objects which may
/// be referenced by action transitions in the ATN.
std::vector<Ref<const LexerAction>> lexerActions;
std::vector<TokensStartState *> modeToStartState;
/// <summary>
/// Compute the set of valid tokens that can occur starting in state {@code s}.
/// If {@code ctx} is null, the set of tokens will not include what can follow
/// the rule surrounding {@code s}. In other words, the set will be
/// restricted to tokens reachable staying within {@code s}'s rule.
/// </summary>
misc::IntervalSet nextTokens(ATNState *s, RuleContext *ctx) const;
/// <summary>
/// Compute the set of valid tokens that can occur starting in {@code s} and
/// staying in same rule. <seealso cref="Token#EPSILON"/> is in set if we reach end of
/// rule.
/// </summary>
misc::IntervalSet const& nextTokens(ATNState *s) const;
void addState(ATNState *state);
void removeState(ATNState *state);
int defineDecisionState(DecisionState *s);
DecisionState *getDecisionState(size_t decision) const;
size_t getNumberOfDecisions() const;
/// <summary>
/// Computes the set of input symbols which could follow ATN state number
/// {@code stateNumber} in the specified full {@code context}. This method
/// considers the complete parser context, but does not evaluate semantic
/// predicates (i.e. all predicates encountered during the calculation are
/// assumed true). If a path in the ATN exists from the starting state to the
/// <seealso cref="RuleStopState"/> of the outermost context without matching any
/// symbols, <seealso cref="Token#EOF"/> is added to the returned set.
/// <p/>
/// If {@code context} is {@code null}, it is treated as
/// <seealso cref="ParserRuleContext#EMPTY"/>.
/// </summary>
/// <param name="stateNumber"> the ATN state number </param>
/// <param name="context"> the full parse context </param>
/// <returns> The set of potentially valid input symbols which could follow the
/// specified state in the specified context. </returns>
/// <exception cref="IllegalArgumentException"> if the ATN does not contain a state with
/// number {@code stateNumber} </exception>
misc::IntervalSet getExpectedTokens(size_t stateNumber, RuleContext *context) const;
std::string toString() const;
private:
friend class LexerATNSimulator;
friend class ParserATNSimulator;
mutable internal::Mutex _mutex;
mutable internal::SharedMutex _stateMutex;
mutable internal::SharedMutex _edgeMutex;
};
} // namespace atn
} // namespace antlr4

View File

@@ -0,0 +1,106 @@
/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
* Use of this file is governed by the BSD 3-clause license that
* can be found in the LICENSE.txt file in the project root.
*/
#include "misc/MurmurHash.h"
#include "atn/PredictionContext.h"
#include "SemanticContext.h"
#include "atn/ATNConfig.h"
using namespace antlr4::atn;
namespace {
/**
* This field stores the bit mask for implementing the
* {@link #isPrecedenceFilterSuppressed} property as a bit within the
* existing {@link #reachesIntoOuterContext} field.
*/
inline constexpr size_t SUPPRESS_PRECEDENCE_FILTER = 0x40000000;
}
ATNConfig::ATNConfig(ATNState *state, size_t alt, Ref<const PredictionContext> context)
: ATNConfig(state, alt, std::move(context), 0, SemanticContext::Empty::Instance) {}
ATNConfig::ATNConfig(ATNState *state, size_t alt, Ref<const PredictionContext> context, Ref<const SemanticContext> semanticContext)
: ATNConfig(state, alt, std::move(context), 0, std::move(semanticContext)) {}
ATNConfig::ATNConfig(ATNConfig const& other, Ref<const SemanticContext> semanticContext)
: ATNConfig(other.state, other.alt, other.context, other.reachesIntoOuterContext, std::move(semanticContext)) {}
ATNConfig::ATNConfig(ATNConfig const& other, ATNState *state)
: ATNConfig(state, other.alt, other.context, other.reachesIntoOuterContext, other.semanticContext) {}
ATNConfig::ATNConfig(ATNConfig const& other, ATNState *state, Ref<const SemanticContext> semanticContext)
: ATNConfig(state, other.alt, other.context, other.reachesIntoOuterContext, std::move(semanticContext)) {}
ATNConfig::ATNConfig(ATNConfig const& other, ATNState *state, Ref<const PredictionContext> context)
: ATNConfig(state, other.alt, std::move(context), other.reachesIntoOuterContext, other.semanticContext) {}
ATNConfig::ATNConfig(ATNConfig const& other, ATNState *state, Ref<const PredictionContext> context, Ref<const SemanticContext> semanticContext)
: ATNConfig(state, other.alt, std::move(context), other.reachesIntoOuterContext, std::move(semanticContext)) {}
ATNConfig::ATNConfig(ATNState *state, size_t alt, Ref<const PredictionContext> context, size_t reachesIntoOuterContext, Ref<const SemanticContext> semanticContext)
: state(state), alt(alt), context(std::move(context)), reachesIntoOuterContext(reachesIntoOuterContext), semanticContext(std::move(semanticContext)) {}
size_t ATNConfig::hashCode() const {
size_t hashCode = misc::MurmurHash::initialize(7);
hashCode = misc::MurmurHash::update(hashCode, state->stateNumber);
hashCode = misc::MurmurHash::update(hashCode, alt);
hashCode = misc::MurmurHash::update(hashCode, context);
hashCode = misc::MurmurHash::update(hashCode, semanticContext);
hashCode = misc::MurmurHash::finish(hashCode, 4);
return hashCode;
}
size_t ATNConfig::getOuterContextDepth() const {
return reachesIntoOuterContext & ~SUPPRESS_PRECEDENCE_FILTER;
}
bool ATNConfig::isPrecedenceFilterSuppressed() const {
return (reachesIntoOuterContext & SUPPRESS_PRECEDENCE_FILTER) != 0;
}
void ATNConfig::setPrecedenceFilterSuppressed(bool value) {
if (value) {
reachesIntoOuterContext |= SUPPRESS_PRECEDENCE_FILTER;
} else {
reachesIntoOuterContext &= ~SUPPRESS_PRECEDENCE_FILTER;
}
}
bool ATNConfig::operator==(const ATNConfig &other) const {
return state->stateNumber == other.state->stateNumber && alt == other.alt &&
((context == other.context) || (*context == *other.context)) &&
*semanticContext == *other.semanticContext &&
isPrecedenceFilterSuppressed() == other.isPrecedenceFilterSuppressed();
}
std::string ATNConfig::toString() const {
return toString(true);
}
std::string ATNConfig::toString(bool showAlt) const {
std::stringstream ss;
ss << "(";
ss << state->toString();
if (showAlt) {
ss << "," << alt;
}
if (context) {
ss << ",[" << context->toString() << "]";
}
if (semanticContext != nullptr && semanticContext != SemanticContext::Empty::Instance) {
ss << "," << semanticContext->toString();
}
if (getOuterContextDepth() > 0) {
ss << ",up=" << getOuterContextDepth();
}
ss << ")";
return ss.str();
}

View File

@@ -0,0 +1,157 @@
/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
* Use of this file is governed by the BSD 3-clause license that
* can be found in the LICENSE.txt file in the project root.
*/
#pragma once
#include <cassert>
#include "antlr4-common.h"
#include "atn/SemanticContext.h"
namespace antlr4 {
namespace atn {
/// <summary>
/// A tuple: (ATN state, predicted alt, syntactic, semantic context).
/// The syntactic context is a graph-structured stack node whose
/// path(s) to the root is the rule invocation(s)
/// chain used to arrive at the state. The semantic context is
/// the tree of semantic predicates encountered before reaching
/// an ATN state.
/// </summary>
class ANTLR4CPP_PUBLIC ATNConfig {
public:
struct Hasher
{
size_t operator()(Ref<ATNConfig> const& k) const {
return k->hashCode();
}
size_t operator()(ATNConfig const& k) const {
return k.hashCode();
}
};
struct Comparer {
bool operator()(Ref<ATNConfig> const& lhs, Ref<ATNConfig> const& rhs) const {
return (lhs == rhs) || (*lhs == *rhs);
}
bool operator()(ATNConfig const& lhs, ATNConfig const& rhs) const {
return (&lhs == &rhs) || (lhs == rhs);
}
};
using Set = std::unordered_set<Ref<ATNConfig>, Hasher, Comparer>;
/// The ATN state associated with this configuration.
ATNState *state = nullptr;
/// What alt (or lexer rule) is predicted by this configuration.
const size_t alt = 0;
/// The stack of invoking states leading to the rule/states associated
/// with this config. We track only those contexts pushed during
/// execution of the ATN simulator.
///
/// Can be shared between multiple ANTConfig instances.
Ref<const PredictionContext> context;
/**
* We cannot execute predicates dependent upon local context unless
* we know for sure we are in the correct context. Because there is
* no way to do this efficiently, we simply cannot evaluate
* dependent predicates unless we are in the rule that initially
* invokes the ATN simulator.
*
* <p>
* closure() tracks the depth of how far we dip into the outer context:
* depth > 0. Note that it may not be totally accurate depth since I
* don't ever decrement. TODO: make it a boolean then</p>
*
* <p>
* For memory efficiency, the {@link #isPrecedenceFilterSuppressed} method
* is also backed by this field. Since the field is publicly accessible, the
* highest bit which would not cause the value to become negative is used to
* store this field. This choice minimizes the risk that code which only
* compares this value to 0 would be affected by the new purpose of the
* flag. It also ensures the performance of the existing {@link ATNConfig}
* constructors as well as certain operations like
* {@link ATNConfigSet#add(ATNConfig, DoubleKeyMap)} method are
* <em>completely</em> unaffected by the change.</p>
*/
size_t reachesIntoOuterContext = 0;
/// Can be shared between multiple ATNConfig instances.
Ref<const SemanticContext> semanticContext;
ATNConfig(ATNState *state, size_t alt, Ref<const PredictionContext> context);
ATNConfig(ATNState *state, size_t alt, Ref<const PredictionContext> context, Ref<const SemanticContext> semanticContext);
ATNConfig(ATNConfig const& other, Ref<const SemanticContext> semanticContext);
ATNConfig(ATNConfig const& other, ATNState *state);
ATNConfig(ATNConfig const& other, ATNState *state, Ref<const SemanticContext> semanticContext);
ATNConfig(ATNConfig const& other, ATNState *state, Ref<const PredictionContext> context);
ATNConfig(ATNConfig const& other, ATNState *state, Ref<const PredictionContext> context, Ref<const SemanticContext> semanticContext);
ATNConfig(ATNConfig const&) = default;
ATNConfig(ATNConfig&&) = default;
virtual ~ATNConfig() = default;
virtual size_t hashCode() const;
/**
* This method gets the value of the {@link #reachesIntoOuterContext} field
* as it existed prior to the introduction of the
* {@link #isPrecedenceFilterSuppressed} method.
*/
size_t getOuterContextDepth() const;
bool isPrecedenceFilterSuppressed() const;
void setPrecedenceFilterSuppressed(bool value);
/// An ATN configuration is equal to another if both have
/// the same state, they predict the same alternative, and
/// syntactic/semantic contexts are the same.
bool operator==(const ATNConfig &other) const;
bool operator!=(const ATNConfig &other) const;
virtual std::string toString() const;
std::string toString(bool showAlt) const;
private:
ATNConfig(ATNState *state, size_t alt, Ref<const PredictionContext> context, size_t reachesIntoOuterContext, Ref<const SemanticContext> semanticContext);
};
} // namespace atn
} // namespace antlr4
// Hash function for ATNConfig.
namespace std {
using antlr4::atn::ATNConfig;
template <> struct hash<ATNConfig>
{
size_t operator() (const ATNConfig &x) const
{
return x.hashCode();
}
};
template <> struct hash<std::vector<Ref<ATNConfig>>>
{
size_t operator() (const std::vector<Ref<ATNConfig>> &vector) const
{
std::size_t seed = 0;
for (const auto &config : vector) {
seed ^= config->hashCode() + 0x9e3779b9 + (seed << 6) + (seed >> 2);
}
return seed;
}
};
}

View File

@@ -0,0 +1,233 @@
/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
* Use of this file is governed by the BSD 3-clause license that
* can be found in the LICENSE.txt file in the project root.
*/
#include "atn/PredictionContext.h"
#include "atn/ATNConfig.h"
#include "atn/ATNSimulator.h"
#include "Exceptions.h"
#include "atn/SemanticContext.h"
#include "support/Arrays.h"
#include "atn/ATNConfigSet.h"
using namespace antlr4::atn;
using namespace antlrcpp;
namespace {
}
ATNConfigSet::ATNConfigSet() : ATNConfigSet(true) {}
ATNConfigSet::ATNConfigSet(const ATNConfigSet &other)
: fullCtx(other.fullCtx), _configLookup(other._configLookup.bucket_count(), ATNConfigHasher{this}, ATNConfigComparer{this}) {
addAll(other);
uniqueAlt = other.uniqueAlt;
conflictingAlts = other.conflictingAlts;
hasSemanticContext = other.hasSemanticContext;
dipsIntoOuterContext = other.dipsIntoOuterContext;
}
ATNConfigSet::ATNConfigSet(bool fullCtx)
: fullCtx(fullCtx), _configLookup(0, ATNConfigHasher{this}, ATNConfigComparer{this}) {}
bool ATNConfigSet::add(const Ref<ATNConfig> &config) {
return add(config, nullptr);
}
bool ATNConfigSet::add(const Ref<ATNConfig> &config, PredictionContextMergeCache *mergeCache) {
assert(config);
if (_readonly) {
throw IllegalStateException("This set is readonly");
}
if (config->semanticContext != SemanticContext::Empty::Instance) {
hasSemanticContext = true;
}
if (config->getOuterContextDepth() > 0) {
dipsIntoOuterContext = true;
}
auto existing = _configLookup.find(config.get());
if (existing == _configLookup.end()) {
_configLookup.insert(config.get());
_cachedHashCode = 0;
configs.push_back(config); // track order here
return true;
}
// a previous (s,i,pi,_), merge with it and save result
bool rootIsWildcard = !fullCtx;
Ref<const PredictionContext> merged = PredictionContext::merge((*existing)->context, config->context, rootIsWildcard, mergeCache);
// no need to check for existing.context, config.context in cache
// since only way to create new graphs is "call rule" and here. We
// cache at both places.
(*existing)->reachesIntoOuterContext = std::max((*existing)->reachesIntoOuterContext, config->reachesIntoOuterContext);
// make sure to preserve the precedence filter suppression during the merge
if (config->isPrecedenceFilterSuppressed()) {
(*existing)->setPrecedenceFilterSuppressed(true);
}
(*existing)->context = std::move(merged); // replace context; no need to alt mapping
return true;
}
bool ATNConfigSet::addAll(const ATNConfigSet &other) {
for (const auto &c : other.configs) {
add(c);
}
return false;
}
std::vector<ATNState*> ATNConfigSet::getStates() const {
std::vector<ATNState*> states;
states.reserve(configs.size());
for (const auto &c : configs) {
states.push_back(c->state);
}
return states;
}
/**
* Gets the complete set of represented alternatives for the configuration
* set.
*
* @return the set of represented alternatives in this configuration set
*
* @since 4.3
*/
BitSet ATNConfigSet::getAlts() const {
BitSet alts;
for (const auto &config : configs) {
alts.set(config->alt);
}
return alts;
}
std::vector<Ref<const SemanticContext>> ATNConfigSet::getPredicates() const {
std::vector<Ref<const SemanticContext>> preds;
preds.reserve(configs.size());
for (const auto &c : configs) {
if (c->semanticContext != SemanticContext::Empty::Instance) {
preds.push_back(c->semanticContext);
}
}
return preds;
}
const Ref<ATNConfig>& ATNConfigSet::get(size_t i) const {
return configs[i];
}
void ATNConfigSet::optimizeConfigs(ATNSimulator *interpreter) {
assert(interpreter);
if (_readonly) {
throw IllegalStateException("This set is readonly");
}
if (_configLookup.empty())
return;
for (const auto &config : configs) {
config->context = interpreter->getCachedContext(config->context);
}
}
bool ATNConfigSet::equals(const ATNConfigSet &other) const {
if (&other == this) {
return true;
}
if (configs.size() != other.configs.size())
return false;
if (fullCtx != other.fullCtx || uniqueAlt != other.uniqueAlt ||
conflictingAlts != other.conflictingAlts || hasSemanticContext != other.hasSemanticContext ||
dipsIntoOuterContext != other.dipsIntoOuterContext) // includes stack context
return false;
return Arrays::equals(configs, other.configs);
}
size_t ATNConfigSet::hashCode() const {
size_t cachedHashCode = _cachedHashCode.load(std::memory_order_relaxed);
if (!isReadonly() || cachedHashCode == 0) {
cachedHashCode = 1;
for (const auto &i : configs) {
cachedHashCode = 31 * cachedHashCode + i->hashCode(); // Same as Java's list hashCode impl.
}
_cachedHashCode.store(cachedHashCode, std::memory_order_relaxed);
}
return cachedHashCode;
}
size_t ATNConfigSet::size() const {
return configs.size();
}
bool ATNConfigSet::isEmpty() const {
return configs.empty();
}
void ATNConfigSet::clear() {
if (_readonly) {
throw IllegalStateException("This set is readonly");
}
configs.clear();
_cachedHashCode = 0;
_configLookup.clear();
}
bool ATNConfigSet::isReadonly() const {
return _readonly;
}
void ATNConfigSet::setReadonly(bool readonly) {
_readonly = readonly;
LookupContainer(0, ATNConfigHasher{this}, ATNConfigComparer{this}).swap(_configLookup);
}
std::string ATNConfigSet::toString() const {
std::stringstream ss;
ss << "[";
for (size_t i = 0; i < configs.size(); i++) {
if ( i>0 ) ss << ", ";
ss << configs[i]->toString();
}
ss << "]";
if (hasSemanticContext) {
ss << ",hasSemanticContext=" << (hasSemanticContext?"true":"false");
}
if (uniqueAlt != ATN::INVALID_ALT_NUMBER) {
ss << ",uniqueAlt=" << uniqueAlt;
}
if (conflictingAlts.count() > 0) {
ss << ",conflictingAlts=";
ss << conflictingAlts.toString();
}
if (dipsIntoOuterContext) {
ss << ",dipsIntoOuterContext";
}
return ss.str();
}
size_t ATNConfigSet::hashCode(const ATNConfig &other) const {
size_t hashCode = 7;
hashCode = 31 * hashCode + other.state->stateNumber;
hashCode = 31 * hashCode + other.alt;
hashCode = 31 * hashCode + other.semanticContext->hashCode();
return hashCode;
}
bool ATNConfigSet::equals(const ATNConfig &lhs, const ATNConfig &rhs) const {
return lhs.state->stateNumber == rhs.state->stateNumber && lhs.alt == rhs.alt && *lhs.semanticContext == *rhs.semanticContext;
}

View File

@@ -0,0 +1,157 @@
/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
* Use of this file is governed by the BSD 3-clause license that
* can be found in the LICENSE.txt file in the project root.
*/
#pragma once
#include <cassert>
#include "support/BitSet.h"
#include "atn/PredictionContext.h"
#include "atn/ATNConfig.h"
#include "FlatHashSet.h"
namespace antlr4 {
namespace atn {
/// Specialized set that can track info about the set, with support for combining similar configurations using a
/// graph-structured stack.
class ANTLR4CPP_PUBLIC ATNConfigSet {
public:
/// Track the elements as they are added to the set; supports get(i)
std::vector<Ref<ATNConfig>> configs;
// TODO: these fields make me pretty uncomfortable but nice to pack up info together, saves recomputation
// TODO: can we track conflicts as they are added to save scanning configs later?
size_t uniqueAlt = 0;
/** Currently this is only used when we detect SLL conflict; this does
* not necessarily represent the ambiguous alternatives. In fact,
* I should also point out that this seems to include predicated alternatives
* that have predicates that evaluate to false. Computed in computeTargetState().
*/
antlrcpp::BitSet conflictingAlts;
// Used in parser and lexer. In lexer, it indicates we hit a pred
// while computing a closure operation. Don't make a DFA state from this.
bool hasSemanticContext = false;
bool dipsIntoOuterContext = false;
/// Indicates that this configuration set is part of a full context
/// LL prediction. It will be used to determine how to merge $. With SLL
/// it's a wildcard whereas it is not for LL context merge.
const bool fullCtx = true;
ATNConfigSet();
ATNConfigSet(const ATNConfigSet &other);
ATNConfigSet(ATNConfigSet&&) = delete;
explicit ATNConfigSet(bool fullCtx);
virtual ~ATNConfigSet() = default;
bool add(const Ref<ATNConfig> &config);
/// <summary>
/// Adding a new config means merging contexts with existing configs for
/// {@code (s, i, pi, _)}, where {@code s} is the
/// <seealso cref="ATNConfig#state"/>, {@code i} is the <seealso cref="ATNConfig#alt"/>, and
/// {@code pi} is the <seealso cref="ATNConfig#semanticContext"/>. We use
/// {@code (s,i,pi)} as key.
/// <p/>
/// This method updates <seealso cref="#dipsIntoOuterContext"/> and
/// <seealso cref="#hasSemanticContext"/> when necessary.
/// </summary>
bool add(const Ref<ATNConfig> &config, PredictionContextMergeCache *mergeCache);
bool addAll(const ATNConfigSet &other);
std::vector<ATNState*> getStates() const;
/**
* Gets the complete set of represented alternatives for the configuration
* set.
*
* @return the set of represented alternatives in this configuration set
*
* @since 4.3
*/
antlrcpp::BitSet getAlts() const;
std::vector<Ref<const SemanticContext>> getPredicates() const;
const Ref<ATNConfig>& get(size_t i) const;
void optimizeConfigs(ATNSimulator *interpreter);
size_t size() const;
bool isEmpty() const;
void clear();
bool isReadonly() const;
void setReadonly(bool readonly);
virtual size_t hashCode() const;
virtual bool equals(const ATNConfigSet &other) const;
virtual std::string toString() const;
private:
struct ATNConfigHasher final {
const ATNConfigSet* atnConfigSet;
size_t operator()(const ATNConfig *other) const {
assert(other != nullptr);
return atnConfigSet->hashCode(*other);
}
};
struct ATNConfigComparer final {
const ATNConfigSet* atnConfigSet;
bool operator()(const ATNConfig *lhs, const ATNConfig *rhs) const {
assert(lhs != nullptr);
assert(rhs != nullptr);
return atnConfigSet->equals(*lhs, *rhs);
}
};
mutable std::atomic<size_t> _cachedHashCode = 0;
/// Indicates that the set of configurations is read-only. Do not
/// allow any code to manipulate the set; DFA states will point at
/// the sets and they must not change. This does not protect the other
/// fields; in particular, conflictingAlts is set after
/// we've made this readonly.
bool _readonly = false;
virtual size_t hashCode(const ATNConfig &atnConfig) const;
virtual bool equals(const ATNConfig &lhs, const ATNConfig &rhs) const;
using LookupContainer = FlatHashSet<ATNConfig*, ATNConfigHasher, ATNConfigComparer>;
/// All configs but hashed by (s, i, _, pi) not including context. Wiped out
/// when we go readonly as this set becomes a DFA state.
LookupContainer _configLookup;
};
inline bool operator==(const ATNConfigSet &lhs, const ATNConfigSet &rhs) { return lhs.equals(rhs); }
inline bool operator!=(const ATNConfigSet &lhs, const ATNConfigSet &rhs) { return !operator==(lhs, rhs); }
} // namespace atn
} // namespace antlr4
namespace std {
template <>
struct hash<::antlr4::atn::ATNConfigSet> {
size_t operator()(const ::antlr4::atn::ATNConfigSet &atnConfigSet) const {
return atnConfigSet.hashCode();
}
};
} // namespace std

View File

@@ -0,0 +1,39 @@
/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
* Use of this file is governed by the BSD 3-clause license that
* can be found in the LICENSE.txt file in the project root.
*/
#include "atn/ATNDeserializationOptions.h"
#include "Exceptions.h"
using namespace antlr4;
using namespace antlr4::atn;
ATNDeserializationOptions::ATNDeserializationOptions(ATNDeserializationOptions *options)
: _readOnly(false), _verifyATN(options->_verifyATN),
_generateRuleBypassTransitions(options->_generateRuleBypassTransitions) {}
const ATNDeserializationOptions& ATNDeserializationOptions::getDefaultOptions() {
static const std::unique_ptr<const ATNDeserializationOptions> defaultOptions = std::make_unique<const ATNDeserializationOptions>();
return *defaultOptions;
}
void ATNDeserializationOptions::makeReadOnly() {
_readOnly = true;
}
void ATNDeserializationOptions::setVerifyATN(bool verify) {
throwIfReadOnly();
_verifyATN = verify;
}
void ATNDeserializationOptions::setGenerateRuleBypassTransitions(bool generate) {
throwIfReadOnly();
_generateRuleBypassTransitions = generate;
}
void ATNDeserializationOptions::throwIfReadOnly() const {
if (isReadOnly()) {
throw IllegalStateException("ATNDeserializationOptions is read only.");
}
}

View File

@@ -0,0 +1,48 @@
/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
* Use of this file is governed by the BSD 3-clause license that
* can be found in the LICENSE.txt file in the project root.
*/
#pragma once
#include "antlr4-common.h"
namespace antlr4 {
namespace atn {
class ANTLR4CPP_PUBLIC ATNDeserializationOptions final {
public:
ATNDeserializationOptions()
: _readOnly(false), _verifyATN(true), _generateRuleBypassTransitions(false) {}
// TODO: Is this useful? If so we should mark it as explicit, otherwise remove it.
ATNDeserializationOptions(ATNDeserializationOptions *options);
ATNDeserializationOptions(const ATNDeserializationOptions&) = default;
ATNDeserializationOptions& operator=(const ATNDeserializationOptions&) = default;
static const ATNDeserializationOptions& getDefaultOptions();
bool isReadOnly() const { return _readOnly; }
void makeReadOnly();
bool isVerifyATN() const { return _verifyATN; }
void setVerifyATN(bool verify);
bool isGenerateRuleBypassTransitions() const { return _generateRuleBypassTransitions; }
void setGenerateRuleBypassTransitions(bool generate);
private:
void throwIfReadOnly() const;
bool _readOnly;
bool _verifyATN;
bool _generateRuleBypassTransitions;
};
} // namespace atn
} // namespace antlr4

View File

@@ -0,0 +1,628 @@
/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
* Use of this file is governed by the BSD 3-clause license that
* can be found in the LICENSE.txt file in the project root.
*/
#include "atn/ATNDeserializationOptions.h"
#include "atn/ATNType.h"
#include "atn/ATNState.h"
#include "atn/ATN.h"
#include "atn/LoopEndState.h"
#include "atn/DecisionState.h"
#include "atn/RuleStartState.h"
#include "atn/RuleStopState.h"
#include "atn/TokensStartState.h"
#include "atn/RuleTransition.h"
#include "atn/EpsilonTransition.h"
#include "atn/PlusLoopbackState.h"
#include "atn/PlusBlockStartState.h"
#include "atn/StarLoopbackState.h"
#include "atn/BasicBlockStartState.h"
#include "atn/BasicState.h"
#include "atn/BlockEndState.h"
#include "atn/StarLoopEntryState.h"
#include "atn/AtomTransition.h"
#include "atn/StarBlockStartState.h"
#include "atn/RangeTransition.h"
#include "atn/PredicateTransition.h"
#include "atn/PrecedencePredicateTransition.h"
#include "atn/ActionTransition.h"
#include "atn/SetTransition.h"
#include "atn/NotSetTransition.h"
#include "atn/WildcardTransition.h"
#include "atn/TransitionType.h"
#include "Token.h"
#include "misc/IntervalSet.h"
#include "Exceptions.h"
#include "support/CPPUtils.h"
#include "support/Casts.h"
#include "atn/LexerCustomAction.h"
#include "atn/LexerChannelAction.h"
#include "atn/LexerModeAction.h"
#include "atn/LexerMoreAction.h"
#include "atn/LexerPopModeAction.h"
#include "atn/LexerPushModeAction.h"
#include "atn/LexerSkipAction.h"
#include "atn/LexerTypeAction.h"
#include "atn/ATNDeserializer.h"
#include <cassert>
#include <string>
#include <vector>
using namespace antlr4;
using namespace antlr4::atn;
using namespace antlrcpp;
namespace {
void checkCondition(bool condition, std::string_view message) {
if (!condition) {
throw IllegalStateException(std::string(message));
}
}
void checkCondition(bool condition) {
checkCondition(condition, "");
}
/**
* Analyze the {@link StarLoopEntryState} states in the specified ATN to set
* the {@link StarLoopEntryState#isPrecedenceDecision} field to the
* correct value.
*
* @param atn The ATN.
*/
void markPrecedenceDecisions(const ATN &atn) {
for (ATNState *state : atn.states) {
if (!StarLoopEntryState::is(state)) {
continue;
}
/* We analyze the ATN to determine if this ATN decision state is the
* decision for the closure block that determines whether a
* precedence rule should continue or complete.
*/
if (atn.ruleToStartState[state->ruleIndex]->isLeftRecursiveRule) {
ATNState *maybeLoopEndState = state->transitions[state->transitions.size() - 1]->target;
if (LoopEndState::is(maybeLoopEndState)) {
if (maybeLoopEndState->epsilonOnlyTransitions && RuleStopState::is(maybeLoopEndState->transitions[0]->target)) {
downCast<StarLoopEntryState*>(state)->isPrecedenceDecision = true;
}
}
}
}
}
Ref<const LexerAction> lexerActionFactory(LexerActionType type, int data1, int data2) {
switch (type) {
case LexerActionType::CHANNEL:
return std::make_shared<LexerChannelAction>(data1);
case LexerActionType::CUSTOM:
return std::make_shared<LexerCustomAction>(data1, data2);
case LexerActionType::MODE:
return std::make_shared< LexerModeAction>(data1);
case LexerActionType::MORE:
return LexerMoreAction::getInstance();
case LexerActionType::POP_MODE:
return LexerPopModeAction::getInstance();
case LexerActionType::PUSH_MODE:
return std::make_shared<LexerPushModeAction>(data1);
case LexerActionType::SKIP:
return LexerSkipAction::getInstance();
case LexerActionType::TYPE:
return std::make_shared<LexerTypeAction>(data1);
default:
throw IllegalArgumentException("The specified lexer action type " + std::to_string(static_cast<size_t>(type)) +
" is not valid.");
}
}
ConstTransitionPtr edgeFactory(const ATN &atn, TransitionType type, size_t trg, size_t arg1, size_t arg2,
size_t arg3, const std::vector<misc::IntervalSet> &sets) {
ATNState *target = atn.states[trg];
switch (type) {
case TransitionType::EPSILON:
return std::make_unique<EpsilonTransition>(target);
case TransitionType::RANGE:
if (arg3 != 0) {
return std::make_unique<RangeTransition>(target, Token::EOF, arg2);
} else {
return std::make_unique<RangeTransition>(target, arg1, arg2);
}
case TransitionType::RULE:
return std::make_unique<RuleTransition>(downCast<RuleStartState*>(atn.states[arg1]), arg2, (int)arg3, target);
case TransitionType::PREDICATE:
return std::make_unique<PredicateTransition>(target, arg1, arg2, arg3 != 0);
case TransitionType::PRECEDENCE:
return std::make_unique<PrecedencePredicateTransition>(target, (int)arg1);
case TransitionType::ATOM:
if (arg3 != 0) {
return std::make_unique<AtomTransition>(target, Token::EOF);
} else {
return std::make_unique<AtomTransition>(target, arg1);
}
case TransitionType::ACTION:
return std::make_unique<ActionTransition>(target, arg1, arg2, arg3 != 0);
case TransitionType::SET:
return std::make_unique<SetTransition>(target, sets[arg1]);
case TransitionType::NOT_SET:
return std::make_unique<NotSetTransition>(target, sets[arg1]);
case TransitionType::WILDCARD:
return std::make_unique<WildcardTransition>(target);
}
throw IllegalArgumentException("The specified transition type is not valid.");
}
/* mem check: all created instances are freed in the d-tor of the ATN. */
ATNState* stateFactory(ATNStateType type, size_t ruleIndex) {
ATNState *s;
switch (type) {
case ATNStateType::INVALID:
return nullptr;
case ATNStateType::BASIC :
s = new BasicState();
break;
case ATNStateType::RULE_START :
s = new RuleStartState();
break;
case ATNStateType::BLOCK_START :
s = new BasicBlockStartState();
break;
case ATNStateType::PLUS_BLOCK_START :
s = new PlusBlockStartState();
break;
case ATNStateType::STAR_BLOCK_START :
s = new StarBlockStartState();
break;
case ATNStateType::TOKEN_START :
s = new TokensStartState();
break;
case ATNStateType::RULE_STOP :
s = new RuleStopState();
break;
case ATNStateType::BLOCK_END :
s = new BlockEndState();
break;
case ATNStateType::STAR_LOOP_BACK :
s = new StarLoopbackState();
break;
case ATNStateType::STAR_LOOP_ENTRY :
s = new StarLoopEntryState();
break;
case ATNStateType::PLUS_LOOP_BACK :
s = new PlusLoopbackState();
break;
case ATNStateType::LOOP_END :
s = new LoopEndState();
break;
default :
std::string message = "The specified state type " + std::to_string(static_cast<size_t>(type)) + " is not valid.";
throw IllegalArgumentException(message);
}
assert(s->getStateType() == type);
s->ruleIndex = ruleIndex;
return s;
}
ssize_t readUnicodeInt32(SerializedATNView data, int& p) {
return static_cast<ssize_t>(data[p++]);
}
void deserializeSets(
SerializedATNView data,
int& p,
std::vector<misc::IntervalSet>& sets) {
size_t nsets = data[p++];
sets.reserve(sets.size() + nsets);
for (size_t i = 0; i < nsets; i++) {
size_t nintervals = data[p++];
misc::IntervalSet set;
bool containsEof = data[p++] != 0;
if (containsEof) {
set.add(-1);
}
for (size_t j = 0; j < nintervals; j++) {
auto a = readUnicodeInt32(data, p);
auto b = readUnicodeInt32(data, p);
set.add(a, b);
}
sets.push_back(set);
}
}
}
ATNDeserializer::ATNDeserializer() : ATNDeserializer(ATNDeserializationOptions::getDefaultOptions()) {}
ATNDeserializer::ATNDeserializer(ATNDeserializationOptions deserializationOptions) : _deserializationOptions(std::move(deserializationOptions)) {}
std::unique_ptr<ATN> ATNDeserializer::deserialize(SerializedATNView data) const {
int p = 0;
int version = data[p++];
if (version != SERIALIZED_VERSION) {
std::string reason = "Could not deserialize ATN with version" + std::to_string(version) + "(expected " + std::to_string(SERIALIZED_VERSION) + ").";
throw UnsupportedOperationException(reason);
}
ATNType grammarType = (ATNType)data[p++];
size_t maxTokenType = data[p++];
auto atn = std::make_unique<ATN>(grammarType, maxTokenType);
//
// STATES
//
{
std::vector<std::pair<LoopEndState*, size_t>> loopBackStateNumbers;
std::vector<std::pair<BlockStartState*, size_t>> endStateNumbers;
size_t nstates = data[p++];
atn->states.reserve(nstates);
loopBackStateNumbers.reserve(nstates); // Reserve worst case size, its short lived.
endStateNumbers.reserve(nstates); // Reserve worst case size, its short lived.
for (size_t i = 0; i < nstates; i++) {
ATNStateType stype = static_cast<ATNStateType>(data[p++]);
// ignore bad type of states
if (stype == ATNStateType::INVALID) {
atn->addState(nullptr);
continue;
}
size_t ruleIndex = data[p++];
ATNState *s = stateFactory(stype, ruleIndex);
if (stype == ATNStateType::LOOP_END) { // special case
int loopBackStateNumber = data[p++];
loopBackStateNumbers.push_back({ downCast<LoopEndState*>(s), loopBackStateNumber });
} else if (BlockStartState::is(s)) {
int endStateNumber = data[p++];
endStateNumbers.push_back({ downCast<BlockStartState*>(s), endStateNumber });
}
atn->addState(s);
}
// delay the assignment of loop back and end states until we know all the state instances have been initialized
for (auto &pair : loopBackStateNumbers) {
pair.first->loopBackState = atn->states[pair.second];
}
for (auto &pair : endStateNumbers) {
pair.first->endState = downCast<BlockEndState*>(atn->states[pair.second]);
}
}
size_t numNonGreedyStates = data[p++];
for (size_t i = 0; i < numNonGreedyStates; i++) {
size_t stateNumber = data[p++];
// The serialized ATN must be specifying the right states, so that the
// cast below is correct.
downCast<DecisionState*>(atn->states[stateNumber])->nonGreedy = true;
}
size_t numPrecedenceStates = data[p++];
for (size_t i = 0; i < numPrecedenceStates; i++) {
size_t stateNumber = data[p++];
downCast<RuleStartState*>(atn->states[stateNumber])->isLeftRecursiveRule = true;
}
//
// RULES
//
size_t nrules = data[p++];
atn->ruleToStartState.reserve(nrules);
for (size_t i = 0; i < nrules; i++) {
size_t s = data[p++];
// Also here, the serialized atn must ensure to point to the correct class type.
RuleStartState *startState = downCast<RuleStartState*>(atn->states[s]);
atn->ruleToStartState.push_back(startState);
if (atn->grammarType == ATNType::LEXER) {
size_t tokenType = data[p++];
atn->ruleToTokenType.push_back(tokenType);
}
}
atn->ruleToStopState.resize(nrules);
for (ATNState *state : atn->states) {
if (!RuleStopState::is(state)) {
continue;
}
RuleStopState *stopState = downCast<RuleStopState*>(state);
atn->ruleToStopState[state->ruleIndex] = stopState;
atn->ruleToStartState[state->ruleIndex]->stopState = stopState;
}
//
// MODES
//
size_t nmodes = data[p++];
atn->modeToStartState.reserve(nmodes);
for (size_t i = 0; i < nmodes; i++) {
size_t s = data[p++];
atn->modeToStartState.push_back(downCast<TokensStartState*>(atn->states[s]));
}
//
// SETS
//
{
std::vector<misc::IntervalSet> sets;
deserializeSets(data, p, sets);
sets.shrink_to_fit();
//
// EDGES
//
int nedges = data[p++];
for (int i = 0; i < nedges; i++) {
size_t src = data[p];
size_t trg = data[p + 1];
TransitionType ttype = static_cast<TransitionType>(data[p + 2]);
size_t arg1 = data[p + 3];
size_t arg2 = data[p + 4];
size_t arg3 = data[p + 5];
ConstTransitionPtr trans = edgeFactory(*atn, ttype, trg, arg1, arg2, arg3, sets);
ATNState *srcState = atn->states[src];
srcState->addTransition(std::move(trans));
p += 6;
}
}
// edges for rule stop states can be derived, so they aren't serialized
for (ATNState *state : atn->states) {
for (size_t i = 0; i < state->transitions.size(); i++) {
const Transition *t = state->transitions[i].get();
if (!RuleTransition::is(t)) {
continue;
}
const RuleTransition *ruleTransition = downCast<const RuleTransition*>(t);
size_t outermostPrecedenceReturn = INVALID_INDEX;
if (atn->ruleToStartState[ruleTransition->target->ruleIndex]->isLeftRecursiveRule) {
if (ruleTransition->precedence == 0) {
outermostPrecedenceReturn = ruleTransition->target->ruleIndex;
}
}
ConstTransitionPtr returnTransition = std::make_unique<EpsilonTransition>(ruleTransition->followState, outermostPrecedenceReturn);
atn->ruleToStopState[ruleTransition->target->ruleIndex]->addTransition(std::move(returnTransition));
}
}
for (ATNState *state : atn->states) {
if (BlockStartState::is(state)) {
BlockStartState *startState = downCast<BlockStartState*>(state);
// we need to know the end state to set its start state
if (startState->endState == nullptr) {
throw IllegalStateException();
}
// block end states can only be associated to a single block start state
if (startState->endState->startState != nullptr) {
throw IllegalStateException();
}
startState->endState->startState = downCast<BlockStartState*>(state);
}
if (PlusLoopbackState::is(state)) {
PlusLoopbackState *loopbackState = downCast<PlusLoopbackState*>(state);
for (size_t i = 0; i < loopbackState->transitions.size(); i++) {
ATNState *target = loopbackState->transitions[i]->target;
if (PlusBlockStartState::is(target)) {
(downCast<PlusBlockStartState*>(target))->loopBackState = loopbackState;
}
}
} else if (StarLoopbackState::is(state)) {
StarLoopbackState *loopbackState = downCast<StarLoopbackState*>(state);
for (size_t i = 0; i < loopbackState->transitions.size(); i++) {
ATNState *target = loopbackState->transitions[i]->target;
if (StarLoopEntryState::is(target)) {
downCast<StarLoopEntryState*>(target)->loopBackState = loopbackState;
}
}
}
}
//
// DECISIONS
//
size_t ndecisions = data[p++];
atn->decisionToState.reserve(ndecisions);
for (size_t i = 0; i < ndecisions; i++) {
size_t s = data[p++];
DecisionState *decState = downCast<DecisionState*>(atn->states[s]);
if (decState == nullptr)
throw IllegalStateException();
atn->decisionToState.push_back(decState);
decState->decision = static_cast<int>(i);
}
//
// LEXER ACTIONS
//
if (atn->grammarType == ATNType::LEXER) {
atn->lexerActions.resize(data[p++]);
for (size_t i = 0; i < atn->lexerActions.size(); i++) {
LexerActionType actionType = static_cast<LexerActionType>(data[p++]);
int data1 = data[p++];
int data2 = data[p++];
atn->lexerActions[i] = lexerActionFactory(actionType, data1, data2);
}
}
markPrecedenceDecisions(*atn);
if (_deserializationOptions.isVerifyATN()) {
verifyATN(*atn);
}
if (_deserializationOptions.isGenerateRuleBypassTransitions() && atn->grammarType == ATNType::PARSER) {
atn->ruleToTokenType.resize(atn->ruleToStartState.size());
for (size_t i = 0; i < atn->ruleToStartState.size(); i++) {
atn->ruleToTokenType[i] = static_cast<int>(atn->maxTokenType + i + 1);
}
for (std::vector<RuleStartState*>::size_type i = 0; i < atn->ruleToStartState.size(); i++) {
BasicBlockStartState *bypassStart = new BasicBlockStartState(); /* mem check: freed in ATN d-tor */
bypassStart->ruleIndex = static_cast<int>(i);
atn->addState(bypassStart);
BlockEndState *bypassStop = new BlockEndState(); /* mem check: freed in ATN d-tor */
bypassStop->ruleIndex = static_cast<int>(i);
atn->addState(bypassStop);
bypassStart->endState = bypassStop;
atn->defineDecisionState(bypassStart);
bypassStop->startState = bypassStart;
ATNState *endState;
const Transition *excludeTransition = nullptr;
if (atn->ruleToStartState[i]->isLeftRecursiveRule) {
// wrap from the beginning of the rule to the StarLoopEntryState
endState = nullptr;
for (ATNState *state : atn->states) {
if (state->ruleIndex != i) {
continue;
}
if (!StarLoopEntryState::is(state)) {
continue;
}
ATNState *maybeLoopEndState = state->transitions[state->transitions.size() - 1]->target;
if (!LoopEndState::is(maybeLoopEndState)) {
continue;
}
if (maybeLoopEndState->epsilonOnlyTransitions && RuleStopState::is(maybeLoopEndState->transitions[0]->target)) {
endState = state;
break;
}
}
if (endState == nullptr) {
throw UnsupportedOperationException("Couldn't identify final state of the precedence rule prefix section.");
}
excludeTransition = (static_cast<StarLoopEntryState*>(endState))->loopBackState->transitions[0].get();
} else {
endState = atn->ruleToStopState[i];
}
// all non-excluded transitions that currently target end state need to target blockEnd instead
for (ATNState *state : atn->states) {
for (auto &transition : state->transitions) {
if (transition.get() == excludeTransition) {
continue;
}
if (transition->target == endState) {
const_cast<Transition*>(transition.get())->target = bypassStop;
}
}
}
// all transitions leaving the rule start state need to leave blockStart instead
while (atn->ruleToStartState[i]->transitions.size() > 0) {
ConstTransitionPtr transition = atn->ruleToStartState[i]->removeTransition(atn->ruleToStartState[i]->transitions.size() - 1);
bypassStart->addTransition(std::move(transition));
}
// link the new states
atn->ruleToStartState[i]->addTransition(std::make_unique<EpsilonTransition>(bypassStart));
bypassStop->addTransition(std::make_unique<EpsilonTransition>(endState));
ATNState *matchState = new BasicState(); /* mem check: freed in ATN d-tor */
atn->addState(matchState);
matchState->addTransition(std::make_unique<AtomTransition>(bypassStop, atn->ruleToTokenType[i]));
bypassStart->addTransition(std::make_unique<EpsilonTransition>(matchState));
}
if (_deserializationOptions.isVerifyATN()) {
// reverify after modification
verifyATN(*atn);
}
}
return atn;
}
void ATNDeserializer::verifyATN(const ATN &atn) const {
// verify assumptions
for (ATNState *state : atn.states) {
if (state == nullptr) {
continue;
}
checkCondition(state->epsilonOnlyTransitions || state->transitions.size() <= 1);
if (PlusBlockStartState::is(state)) {
checkCondition((downCast<PlusBlockStartState*>(state))->loopBackState != nullptr);
}
if (StarLoopEntryState::is(state)) {
StarLoopEntryState *starLoopEntryState = downCast<StarLoopEntryState*>(state);
checkCondition(starLoopEntryState->loopBackState != nullptr);
checkCondition(starLoopEntryState->transitions.size() == 2);
if (StarBlockStartState::is(starLoopEntryState->transitions[0]->target)) {
checkCondition(downCast<LoopEndState*>(starLoopEntryState->transitions[1]->target) != nullptr);
checkCondition(!starLoopEntryState->nonGreedy);
} else if (LoopEndState::is(starLoopEntryState->transitions[0]->target)) {
checkCondition(StarBlockStartState::is(starLoopEntryState->transitions[1]->target));
checkCondition(starLoopEntryState->nonGreedy);
} else {
throw IllegalStateException();
}
}
if (StarLoopbackState::is(state)) {
checkCondition(state->transitions.size() == 1);
checkCondition(StarLoopEntryState::is(state->transitions[0]->target));
}
if (LoopEndState::is(state)) {
checkCondition((downCast<LoopEndState*>(state))->loopBackState != nullptr);
}
if (RuleStartState::is(state)) {
checkCondition((downCast<RuleStartState*>(state))->stopState != nullptr);
}
if (BlockStartState::is(state)) {
checkCondition((downCast<BlockStartState*>(state))->endState != nullptr);
}
if (BlockEndState::is(state)) {
checkCondition((downCast<BlockEndState*>(state))->startState != nullptr);
}
if (DecisionState::is(state)) {
DecisionState *decisionState = downCast<DecisionState*>(state);
checkCondition(decisionState->transitions.size() <= 1 || decisionState->decision >= 0);
} else {
checkCondition(state->transitions.size() <= 1 || RuleStopState::is(state));
}
}
}

View File

@@ -0,0 +1,32 @@
/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
* Use of this file is governed by the BSD 3-clause license that
* can be found in the LICENSE.txt file in the project root.
*/
#pragma once
#include "atn/ATNDeserializationOptions.h"
#include "atn/SerializedATNView.h"
#include "atn/LexerAction.h"
#include "atn/Transition.h"
namespace antlr4 {
namespace atn {
class ANTLR4CPP_PUBLIC ATNDeserializer final {
public:
static constexpr size_t SERIALIZED_VERSION = 4;
ATNDeserializer();
explicit ATNDeserializer(ATNDeserializationOptions deserializationOptions);
std::unique_ptr<ATN> deserialize(SerializedATNView input) const;
void verifyATN(const ATN &atn) const;
private:
const ATNDeserializationOptions _deserializationOptions;
};
} // namespace atn
} // namespace antlr4

View File

@@ -0,0 +1,33 @@
/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
* Use of this file is governed by the BSD 3-clause license that
* can be found in the LICENSE.txt file in the project root.
*/
#include "atn/ATNSimulator.h"
#include "atn/ATNConfigSet.h"
#include "atn/ATNDeserializer.h"
#include "atn/ATNType.h"
#include "dfa/DFAState.h"
using namespace antlr4;
using namespace antlr4::dfa;
using namespace antlr4::atn;
const Ref<DFAState> ATNSimulator::ERROR = std::make_shared<DFAState>(std::numeric_limits<int>::max());
ATNSimulator::ATNSimulator(const ATN &atn, PredictionContextCache &sharedContextCache)
: atn(atn), _sharedContextCache(sharedContextCache) {}
void ATNSimulator::clearDFA() {
throw UnsupportedOperationException("This ATN simulator does not support clearing the DFA.");
}
PredictionContextCache& ATNSimulator::getSharedContextCache() const {
return _sharedContextCache;
}
Ref<const PredictionContext> ATNSimulator::getCachedContext(const Ref<const PredictionContext> &context) {
// This function must only be called with an active state lock, as we are going to change a shared structure.
return PredictionContext::getCachedContext(context, getSharedContextCache());
}

View File

@@ -0,0 +1,71 @@
/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
* Use of this file is governed by the BSD 3-clause license that
* can be found in the LICENSE.txt file in the project root.
*/
#pragma once
#include "atn/ATN.h"
#include "atn/PredictionContext.h"
#include "atn/PredictionContextCache.h"
#include "misc/IntervalSet.h"
#include "support/CPPUtils.h"
namespace antlr4 {
namespace atn {
class ANTLR4CPP_PUBLIC ATNSimulator {
public:
/// Must distinguish between missing edge and edge we know leads nowhere.
static const Ref<dfa::DFAState> ERROR;
const ATN &atn;
ATNSimulator(const ATN &atn, PredictionContextCache &sharedContextCache);
virtual ~ATNSimulator() = default;
virtual void reset() = 0;
/**
* Clear the DFA cache used by the current instance. Since the DFA cache may
* be shared by multiple ATN simulators, this method may affect the
* performance (but not accuracy) of other parsers which are being used
* concurrently.
*
* @throws UnsupportedOperationException if the current instance does not
* support clearing the DFA.
*
* @since 4.3
*/
virtual void clearDFA();
PredictionContextCache& getSharedContextCache() const;
Ref<const PredictionContext> getCachedContext(const Ref<const PredictionContext> &context);
protected:
/// <summary>
/// The context cache maps all PredictionContext objects that are equals()
/// to a single cached copy. This cache is shared across all contexts
/// in all ATNConfigs in all DFA states. We rebuild each ATNConfigSet
/// to use only cached nodes/graphs in addDFAState(). We don't want to
/// fill this during closure() since there are lots of contexts that
/// pop up but are not used ever again. It also greatly slows down closure().
/// <p/>
/// This cache makes a huge difference in memory and a little bit in speed.
/// For the Java grammar on java.*, it dropped the memory requirements
/// at the end from 25M to 16M. We don't store any of the full context
/// graphs in the DFA because they are limited to local context only,
/// but apparently there's a lot of repetition there as well. We optimize
/// the config contexts before storing the config set in the DFA states
/// by literally rebuilding them with cached subgraphs only.
/// <p/>
/// I tried a cache for use during closure operations, that was
/// whacked after each adaptivePredict(). It cost a little bit
/// more time I think and doesn't save on the overall footprint
/// so it's not worth the complexity.
/// </summary>
PredictionContextCache &_sharedContextCache;
};
} // namespace atn
} // namespace antlr4

View File

@@ -0,0 +1,56 @@
/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
* Use of this file is governed by the BSD 3-clause license that
* can be found in the LICENSE.txt file in the project root.
*/
#include "atn/ATN.h"
#include "atn/Transition.h"
#include "misc/IntervalSet.h"
#include "support/CPPUtils.h"
#include "atn/ATNState.h"
using namespace antlr4::atn;
using namespace antlrcpp;
size_t ATNState::hashCode() const {
return stateNumber;
}
bool ATNState::equals(const ATNState &other) const {
return stateNumber == other.stateNumber;
}
bool ATNState::isNonGreedyExitState() const {
return false;
}
std::string ATNState::toString() const {
return std::to_string(stateNumber);
}
void ATNState::addTransition(ConstTransitionPtr e) {
addTransition(transitions.size(), std::move(e));
}
void ATNState::addTransition(size_t index, ConstTransitionPtr e) {
for (const auto &transition : transitions)
if (transition->target->stateNumber == e->target->stateNumber) {
return;
}
if (transitions.empty()) {
epsilonOnlyTransitions = e->isEpsilon();
} else if (epsilonOnlyTransitions != e->isEpsilon()) {
std::cerr << "ATN state %d has both epsilon and non-epsilon transitions.\n" << stateNumber;
epsilonOnlyTransitions = false;
}
transitions.insert(transitions.begin() + index, std::move(e));
}
ConstTransitionPtr ATNState::removeTransition(size_t index) {
ConstTransitionPtr result = std::move(transitions[index]);
transitions.erase(transitions.begin() + index);
return result;
}

View File

@@ -0,0 +1,139 @@
/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
* Use of this file is governed by the BSD 3-clause license that
* can be found in the LICENSE.txt file in the project root.
*/
#pragma once
#include "misc/IntervalSet.h"
#include "atn/Transition.h"
#include "atn/ATNStateType.h"
namespace antlr4 {
namespace atn {
/// <summary>
/// The following images show the relation of states and
/// <seealso cref="ATNState#transitions"/> for various grammar constructs.
///
/// <ul>
///
/// <li>Solid edges marked with an &#0949; indicate a required
/// <seealso cref="EpsilonTransition"/>.</li>
///
/// <li>Dashed edges indicate locations where any transition derived from
/// <seealso cref="Transition"/> might appear.</li>
///
/// <li>Dashed nodes are place holders for either a sequence of linked
/// <seealso cref="BasicState"/> states or the inclusion of a block representing a nested
/// construct in one of the forms below.</li>
///
/// <li>Nodes showing multiple outgoing alternatives with a {@code ...} support
/// any number of alternatives (one or more). Nodes without the {@code ...} only
/// support the exact number of alternatives shown in the diagram.</li>
///
/// </ul>
///
/// <h2>Basic Blocks</h2>
///
/// <h3>Rule</h3>
///
/// <embed src="images/Rule.svg" type="image/svg+xml"/>
///
/// <h3>Block of 1 or more alternatives</h3>
///
/// <embed src="images/Block.svg" type="image/svg+xml"/>
///
/// <h2>Greedy Loops</h2>
///
/// <h3>Greedy Closure: {@code (...)*}</h3>
///
/// <embed src="images/ClosureGreedy.svg" type="image/svg+xml"/>
///
/// <h3>Greedy Positive Closure: {@code (...)+}</h3>
///
/// <embed src="images/PositiveClosureGreedy.svg" type="image/svg+xml"/>
///
/// <h3>Greedy Optional: {@code (...)?}</h3>
///
/// <embed src="images/OptionalGreedy.svg" type="image/svg+xml"/>
///
/// <h2>Non-Greedy Loops</h2>
///
/// <h3>Non-Greedy Closure: {@code (...)*?}</h3>
///
/// <embed src="images/ClosureNonGreedy.svg" type="image/svg+xml"/>
///
/// <h3>Non-Greedy Positive Closure: {@code (...)+?}</h3>
///
/// <embed src="images/PositiveClosureNonGreedy.svg" type="image/svg+xml"/>
///
/// <h3>Non-Greedy Optional: {@code (...)??}</h3>
///
/// <embed src="images/OptionalNonGreedy.svg" type="image/svg+xml"/>
/// </summary>
// GCC generates a warning here if ATN has already been declared due to the
// attributes added by ANTLR4CPP_PUBLIC.
// See: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=39159
// Only forward-declare if it hasn't already been declared.
#ifndef ANTLR4CPP_ATN_DECLARED
class ANTLR4CPP_PUBLIC ATN;
#endif
class ANTLR4CPP_PUBLIC ATNState {
public:
static constexpr size_t INITIAL_NUM_TRANSITIONS = 4;
static constexpr size_t INVALID_STATE_NUMBER = std::numeric_limits<size_t>::max();
size_t stateNumber = INVALID_STATE_NUMBER;
size_t ruleIndex = 0; // at runtime, we don't have Rule objects
bool epsilonOnlyTransitions = false;
/// Track the transitions emanating from this ATN state.
std::vector<ConstTransitionPtr> transitions;
ATNState() = delete;
ATNState(ATNState const&) = delete;
ATNState(ATNState&&) = delete;
virtual ~ATNState() = default;
ATNState& operator=(ATNState const&) = delete;
ATNState& operator=(ATNState&&) = delete;
void addTransition(ConstTransitionPtr e);
void addTransition(size_t index, ConstTransitionPtr e);
ConstTransitionPtr removeTransition(size_t index);
virtual size_t hashCode() const;
virtual bool equals(const ATNState &other) const;
virtual bool isNonGreedyExitState() const;
virtual std::string toString() const;
ATNStateType getStateType() const { return _stateType; }
protected:
explicit ATNState(ATNStateType stateType) : _stateType(stateType) {}
private:
/// Used to cache lookahead during parsing, not used during construction.
misc::IntervalSet _nextTokenWithinRule;
std::atomic<bool> _nextTokenUpdated { false };
const ATNStateType _stateType;
friend class ATN;
};
inline bool operator==(const ATNState &lhs, const ATNState &rhs) { return lhs.equals(rhs); }
inline bool operator!=(const ATNState &lhs, const ATNState &rhs) { return !operator==(lhs, rhs); }
} // namespace atn
} // namespace antlr4

View File

@@ -0,0 +1,33 @@
#include "atn/ATNStateType.h"
std::string antlr4::atn::atnStateTypeName(ATNStateType atnStateType) {
switch (atnStateType) {
case ATNStateType::INVALID:
return "INVALID";
case ATNStateType::BASIC:
return "BASIC";
case ATNStateType::RULE_START:
return "RULE_START";
case ATNStateType::BLOCK_START:
return "BLOCK_START";
case ATNStateType::PLUS_BLOCK_START:
return "PLUS_BLOCK_START";
case ATNStateType::STAR_BLOCK_START:
return "STAR_BLOCK_START";
case ATNStateType::TOKEN_START:
return "TOKEN_START";
case ATNStateType::RULE_STOP:
return "RULE_STOP";
case ATNStateType::BLOCK_END:
return "BLOCK_END";
case ATNStateType::STAR_LOOP_BACK:
return "STAR_LOOP_BACK";
case ATNStateType::STAR_LOOP_ENTRY:
return "STAR_LOOP_ENTRY";
case ATNStateType::PLUS_LOOP_BACK:
return "PLUS_LOOP_BACK";
case ATNStateType::LOOP_END:
return "LOOP_END";
}
return "UNKNOWN";
}

View File

@@ -0,0 +1,36 @@
/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
* Use of this file is governed by the BSD 3-clause license that
* can be found in the LICENSE.txt file in the project root.
*/
#pragma once
#include <cstddef>
#include <string>
#include "antlr4-common.h"
namespace antlr4 {
namespace atn {
// Constants for ATNState serialization.
enum class ATNStateType : size_t {
INVALID = 0,
BASIC = 1,
RULE_START = 2,
BLOCK_START = 3,
PLUS_BLOCK_START = 4,
STAR_BLOCK_START = 5,
TOKEN_START = 6,
RULE_STOP = 7,
BLOCK_END = 8,
STAR_LOOP_BACK = 9,
STAR_LOOP_ENTRY = 10,
PLUS_LOOP_BACK = 11,
LOOP_END = 12,
};
ANTLR4CPP_PUBLIC std::string atnStateTypeName(ATNStateType atnStateType);
} // namespace atn
} // namespace antlr4

View File

@@ -0,0 +1,20 @@
/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
* Use of this file is governed by the BSD 3-clause license that
* can be found in the LICENSE.txt file in the project root.
*/
#pragma once
#include "antlr4-common.h"
namespace antlr4 {
namespace atn {
/// Represents the type of recognizer an ATN applies to.
enum class ATNType {
LEXER = 0,
PARSER = 1,
};
} // namespace atn
} // namespace antlr4

View File

@@ -0,0 +1,29 @@
/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
* Use of this file is governed by the BSD 3-clause license that
* can be found in the LICENSE.txt file in the project root.
*/
#include "atn/ActionTransition.h"
using namespace antlr4::atn;
ActionTransition::ActionTransition(ATNState *target, size_t ruleIndex)
: Transition(TransitionType::ACTION, target), ruleIndex(ruleIndex), actionIndex(INVALID_INDEX), isCtxDependent(false) {
}
ActionTransition::ActionTransition(ATNState *target, size_t ruleIndex, size_t actionIndex, bool isCtxDependent)
: Transition(TransitionType::ACTION, target), ruleIndex(ruleIndex), actionIndex(actionIndex), isCtxDependent(isCtxDependent) {
}
bool ActionTransition::isEpsilon() const {
return true; // we are to be ignored by analysis 'cept for predicates
}
bool ActionTransition::matches(size_t /*symbol*/, size_t /*minVocabSymbol*/, size_t /*maxVocabSymbol*/) const {
return false;
}
std::string ActionTransition::toString() const {
return " ACTION " + Transition::toString() + " { ruleIndex: " + std::to_string(ruleIndex) + ", actionIndex: " +
std::to_string(actionIndex) + ", isCtxDependent: " + std::to_string(isCtxDependent) + " }";
}

View File

@@ -0,0 +1,35 @@
/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
* Use of this file is governed by the BSD 3-clause license that
* can be found in the LICENSE.txt file in the project root.
*/
#pragma once
#include "atn/Transition.h"
namespace antlr4 {
namespace atn {
class ANTLR4CPP_PUBLIC ActionTransition final : public Transition {
public:
static bool is(const Transition &transition) { return transition.getTransitionType() == TransitionType::ACTION; }
static bool is(const Transition *transition) { return transition != nullptr && is(*transition); }
const size_t ruleIndex;
const size_t actionIndex;
const bool isCtxDependent; // e.g., $i ref in action
ActionTransition(ATNState *target, size_t ruleIndex);
ActionTransition(ATNState *target, size_t ruleIndex, size_t actionIndex, bool isCtxDependent);
virtual bool isEpsilon() const override;
virtual bool matches(size_t symbol, size_t minVocabSymbol, size_t maxVocabSymbol) const override;
virtual std::string toString() const override;
};
} // namespace atn
} // namespace antlr4

View File

@@ -0,0 +1,16 @@
/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
* Use of this file is governed by the BSD 3-clause license that
* can be found in the LICENSE.txt file in the project root.
*/
#include "atn/AmbiguityInfo.h"
using namespace antlr4;
using namespace antlr4::atn;
AmbiguityInfo::AmbiguityInfo(size_t decision, ATNConfigSet *configs, const antlrcpp::BitSet &ambigAlts,
TokenStream *input, size_t startIndex, size_t stopIndex, bool fullCtx)
: DecisionEventInfo(decision, configs, input, startIndex, stopIndex, fullCtx) {
this->ambigAlts = ambigAlts;
}

View File

@@ -0,0 +1,68 @@
/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
* Use of this file is governed by the BSD 3-clause license that
* can be found in the LICENSE.txt file in the project root.
*/
#pragma once
#include "atn/DecisionEventInfo.h"
#include "support/BitSet.h"
namespace antlr4 {
namespace atn {
/// <summary>
/// This class represents profiling event information for an ambiguity.
/// Ambiguities are decisions where a particular input resulted in an SLL
/// conflict, followed by LL prediction also reaching a conflict state
/// (indicating a true ambiguity in the grammar).
///
/// <para>
/// This event may be reported during SLL prediction in cases where the
/// conflicting SLL configuration set provides sufficient information to
/// determine that the SLL conflict is truly an ambiguity. For example, if none
/// of the ATN configurations in the conflicting SLL configuration set have
/// traversed a global follow transition (i.e.
/// <seealso cref="ATNConfig#reachesIntoOuterContext"/> is 0 for all configurations), then
/// the result of SLL prediction for that input is known to be equivalent to the
/// result of LL prediction for that input.</para>
///
/// <para>
/// In some cases, the minimum represented alternative in the conflicting LL
/// configuration set is not equal to the minimum represented alternative in the
/// conflicting SLL configuration set. Grammars and inputs which result in this
/// scenario are unable to use <seealso cref="PredictionMode#SLL"/>, which in turn means
/// they cannot use the two-stage parsing strategy to improve parsing performance
/// for that input.</para>
/// </summary>
/// <seealso cref= ParserATNSimulator#reportAmbiguity </seealso>
/// <seealso cref= ANTLRErrorListener#reportAmbiguity
///
/// @since 4.3 </seealso>
class ANTLR4CPP_PUBLIC AmbiguityInfo : public DecisionEventInfo {
public:
/// The set of alternative numbers for this decision event that lead to a valid parse.
antlrcpp::BitSet ambigAlts;
/// <summary>
/// Constructs a new instance of the <seealso cref="AmbiguityInfo"/> class with the
/// specified detailed ambiguity information.
/// </summary>
/// <param name="decision"> The decision number </param>
/// <param name="configs"> The final configuration set identifying the ambiguous
/// alternatives for the current input </param>
/// <param name="ambigAlts"> The set of alternatives in the decision that lead to a valid parse.
/// The predicted alt is the min(ambigAlts) </param>
/// <param name="input"> The input token stream </param>
/// <param name="startIndex"> The start index for the current prediction </param>
/// <param name="stopIndex"> The index at which the ambiguity was identified during
/// prediction </param>
/// <param name="fullCtx"> {@code true} if the ambiguity was identified during LL
/// prediction; otherwise, {@code false} if the ambiguity was identified
/// during SLL prediction </param>
AmbiguityInfo(size_t decision, ATNConfigSet *configs, const antlrcpp::BitSet &ambigAlts, TokenStream *input,
size_t startIndex, size_t stopIndex, bool fullCtx);
};
} // namespace atn
} // namespace antlr4

View File

@@ -0,0 +1,129 @@
/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
* Use of this file is governed by the BSD 3-clause license that
* can be found in the LICENSE.txt file in the project root.
*/
#include "atn/ArrayPredictionContext.h"
#include <cstring>
#include "atn/SingletonPredictionContext.h"
#include "atn/HashUtils.h"
#include "misc/MurmurHash.h"
#include "support/Casts.h"
using namespace antlr4::atn;
using namespace antlr4::misc;
using namespace antlrcpp;
namespace {
bool predictionContextEqual(const Ref<const PredictionContext> &lhs, const Ref<const PredictionContext> &rhs) {
// parent PredictionContext pointers can be null during full context mode and
// the ctxs are in an ArrayPredictionContext. If both are null, return true
// if just one is null, return false. If both are non-null, do comparison.
if ( lhs == nullptr ) return rhs == nullptr;
if ( rhs == nullptr ) return false; // lhs!=null and rhs==null
return *lhs == *rhs; // both nonnull
}
}
ArrayPredictionContext::ArrayPredictionContext(const SingletonPredictionContext &predictionContext)
: ArrayPredictionContext({ predictionContext.parent }, { predictionContext.returnState }) {}
ArrayPredictionContext::ArrayPredictionContext(std::vector<Ref<const PredictionContext>> parents,
std::vector<size_t> returnStates)
: PredictionContext(PredictionContextType::ARRAY), parents(std::move(parents)), returnStates(std::move(returnStates)) {
assert(this->parents.size() > 0);
assert(this->returnStates.size() > 0);
assert(this->parents.size() == this->returnStates.size());
}
bool ArrayPredictionContext::isEmpty() const {
// Since EMPTY_RETURN_STATE can only appear in the last position, we don't need to verify that size == 1.
return returnStates[0] == EMPTY_RETURN_STATE;
}
size_t ArrayPredictionContext::size() const {
return returnStates.size();
}
const Ref<const PredictionContext>& ArrayPredictionContext::getParent(size_t index) const {
return parents[index];
}
size_t ArrayPredictionContext::getReturnState(size_t index) const {
return returnStates[index];
}
size_t ArrayPredictionContext::hashCodeImpl() const {
size_t hash = MurmurHash::initialize();
hash = MurmurHash::update(hash, static_cast<size_t>(getContextType()));
for (const auto &parent : parents) {
hash = MurmurHash::update(hash, parent);
}
for (const auto &returnState : returnStates) {
hash = MurmurHash::update(hash, returnState);
}
return MurmurHash::finish(hash, 1 + parents.size() + returnStates.size());
}
bool ArrayPredictionContext::equals(const PredictionContext &other) const {
if (this == std::addressof(other)) {
return true;
}
if (getContextType() != other.getContextType()) {
return false;
}
const auto &array = downCast<const ArrayPredictionContext&>(other);
const bool sameSize = returnStates.size() == array.returnStates.size() &&
parents.size() == array.parents.size();
if ( !sameSize ) {
return false;
}
const bool sameHash = cachedHashCodeEqual(cachedHashCode(), array.cachedHashCode());
if ( !sameHash ) {
return false;
}
const size_t stateSizeBytes = sizeof(decltype(returnStates)::value_type);
const bool returnStateArraysEqual =
std::memcmp(returnStates.data(), array.returnStates.data(),
returnStates.size() * stateSizeBytes) == 0;
if ( !returnStateArraysEqual ) {
return false;
}
// stack of contexts is the same
const bool parentCtxEqual =
std::equal(parents.begin(), parents.end(), array.parents.begin(), predictionContextEqual);
return parentCtxEqual;
}
std::string ArrayPredictionContext::toString() const {
if (isEmpty()) {
return "[]";
}
std::stringstream ss;
ss << "[";
for (size_t i = 0; i < returnStates.size(); i++) {
if (i > 0) {
ss << ", ";
}
if (returnStates[i] == EMPTY_RETURN_STATE) {
ss << "$";
continue;
}
ss << returnStates[i];
if (parents[i] != nullptr) {
ss << " " << parents[i]->toString();
} else {
ss << "nul";
}
}
ss << "]";
return ss.str();
}

View File

@@ -0,0 +1,51 @@
/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
* Use of this file is governed by the BSD 3-clause license that
* can be found in the LICENSE.txt file in the project root.
*/
#pragma once
#include "atn/PredictionContext.h"
namespace antlr4 {
namespace atn {
class SingletonPredictionContext;
class ANTLR4CPP_PUBLIC ArrayPredictionContext final : public PredictionContext {
public:
static bool is(const PredictionContext &predictionContext) { return predictionContext.getContextType() == PredictionContextType::ARRAY; }
static bool is(const PredictionContext *predictionContext) { return predictionContext != nullptr && is(*predictionContext); }
/// Parent can be empty only if full ctx mode and we make an array
/// from EMPTY and non-empty. We merge EMPTY by using null parent and
/// returnState == EMPTY_RETURN_STATE.
// Also here: we use a strong reference to our parents to avoid having them freed prematurely.
// See also SinglePredictionContext.
std::vector<Ref<const PredictionContext>> parents;
/// Sorted for merge, no duplicates; if present, EMPTY_RETURN_STATE is always last.
std::vector<size_t> returnStates;
explicit ArrayPredictionContext(const SingletonPredictionContext &predictionContext);
ArrayPredictionContext(std::vector<Ref<const PredictionContext>> parents, std::vector<size_t> returnStates);
ArrayPredictionContext(ArrayPredictionContext&&) = default;
bool isEmpty() const override;
size_t size() const override;
const Ref<const PredictionContext>& getParent(size_t index) const override;
size_t getReturnState(size_t index) const override;
bool equals(const PredictionContext &other) const override;
std::string toString() const override;
protected:
size_t hashCodeImpl() const override;
};
} // namespace atn
} // namespace antlr4

View File

@@ -0,0 +1,27 @@
/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
* Use of this file is governed by the BSD 3-clause license that
* can be found in the LICENSE.txt file in the project root.
*/
#include "misc/IntervalSet.h"
#include "atn/Transition.h"
#include "atn/AtomTransition.h"
using namespace antlr4::misc;
using namespace antlr4::atn;
AtomTransition::AtomTransition(ATNState *target, size_t label) : Transition(TransitionType::ATOM, target), _label(label) {
}
IntervalSet AtomTransition::label() const {
return IntervalSet::of((int)_label);
}
bool AtomTransition::matches(size_t symbol, size_t /*minVocabSymbol*/, size_t /*maxVocabSymbol*/) const {
return _label == symbol;
}
std::string AtomTransition::toString() const {
return "ATOM " + Transition::toString() + " { label: " + std::to_string(_label) + " }";
}

View File

@@ -0,0 +1,33 @@
/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
* Use of this file is governed by the BSD 3-clause license that
* can be found in the LICENSE.txt file in the project root.
*/
#pragma once
#include "atn/Transition.h"
namespace antlr4 {
namespace atn {
/// TODO: make all transitions sets? no, should remove set edges.
class ANTLR4CPP_PUBLIC AtomTransition final : public Transition {
public:
static bool is(const Transition &transition) { return transition.getTransitionType() == TransitionType::ATOM; }
static bool is(const Transition *transition) { return transition != nullptr && is(*transition); }
/// The token type or character value; or, signifies special label.
/// TODO: rename this to label
const size_t _label;
AtomTransition(ATNState *target, size_t label);
virtual misc::IntervalSet label() const override;
virtual bool matches(size_t symbol, size_t minVocabSymbol, size_t maxVocabSymbol) const override;
virtual std::string toString() const override;
};
} // namespace atn
} // namespace antlr4

View File

@@ -0,0 +1,24 @@
/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
* Use of this file is governed by the BSD 3-clause license that
* can be found in the LICENSE.txt file in the project root.
*/
#pragma once
#include "antlr4-common.h"
#include "atn/BlockStartState.h"
namespace antlr4 {
namespace atn {
class ANTLR4CPP_PUBLIC BasicBlockStartState final : public BlockStartState {
public:
static bool is(const ATNState &atnState) { return atnState.getStateType() == ATNStateType::BLOCK_START; }
static bool is(const ATNState *atnState) { return atnState != nullptr && is(*atnState); }
BasicBlockStartState() : BlockStartState(ATNStateType::BLOCK_START) {}
};
} // namespace atn
} // namespace antlr4

View File

@@ -0,0 +1,23 @@
/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
* Use of this file is governed by the BSD 3-clause license that
* can be found in the LICENSE.txt file in the project root.
*/
#pragma once
#include "atn/ATNState.h"
namespace antlr4 {
namespace atn {
class ANTLR4CPP_PUBLIC BasicState final : public ATNState {
public:
static bool is(const ATNState &atnState) { return atnState.getStateType() == ATNStateType::BASIC; }
static bool is(const ATNState *atnState) { return atnState != nullptr && is(*atnState); }
BasicState() : ATNState(ATNStateType::BASIC) {}
};
} // namespace atn
} // namespace antlr4

View File

@@ -0,0 +1,26 @@
/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
* Use of this file is governed by the BSD 3-clause license that
* can be found in the LICENSE.txt file in the project root.
*/
#pragma once
#include "atn/ATNState.h"
namespace antlr4 {
namespace atn {
/// Terminal node of a simple {@code (a|b|c)} block.
class ANTLR4CPP_PUBLIC BlockEndState final : public ATNState {
public:
static bool is(const ATNState &atnState) { return atnState.getStateType() == ATNStateType::BLOCK_END; }
static bool is(const ATNState *atnState) { return atnState != nullptr && is(*atnState); }
BlockStartState *startState = nullptr;
BlockEndState() : ATNState(ATNStateType::BLOCK_END) {}
};
} // namespace atn
} // namespace antlr4

View File

@@ -0,0 +1,30 @@
/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
* Use of this file is governed by the BSD 3-clause license that
* can be found in the LICENSE.txt file in the project root.
*/
#pragma once
#include "atn/DecisionState.h"
namespace antlr4 {
namespace atn {
/// The start of a regular {@code (...)} block.
class ANTLR4CPP_PUBLIC BlockStartState : public DecisionState {
public:
static bool is(const ATNState &atnState) {
const auto stateType = atnState.getStateType();
return stateType >= ATNStateType::BLOCK_START && stateType <= ATNStateType::STAR_BLOCK_START;
}
static bool is(const ATNState *atnState) { return atnState != nullptr && is(*atnState); }
BlockEndState *endState = nullptr;
protected:
using DecisionState::DecisionState;
};
} // namespace atn
} // namespace antlr4

View File

@@ -0,0 +1,14 @@
/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
* Use of this file is governed by the BSD 3-clause license that
* can be found in the LICENSE.txt file in the project root.
*/
#include "atn/ContextSensitivityInfo.h"
using namespace antlr4;
using namespace antlr4::atn;
ContextSensitivityInfo::ContextSensitivityInfo(size_t decision, ATNConfigSet *configs, TokenStream *input,
size_t startIndex, size_t stopIndex)
: DecisionEventInfo(decision, configs, input, startIndex, stopIndex, true) {
}

View File

@@ -0,0 +1,47 @@
/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
* Use of this file is governed by the BSD 3-clause license that
* can be found in the LICENSE.txt file in the project root.
*/
#pragma once
#include "atn/DecisionEventInfo.h"
namespace antlr4 {
namespace atn {
/// <summary>
/// This class represents profiling event information for a context sensitivity.
/// Context sensitivities are decisions where a particular input resulted in an
/// SLL conflict, but LL prediction produced a single unique alternative.
///
/// <para>
/// In some cases, the unique alternative identified by LL prediction is not
/// equal to the minimum represented alternative in the conflicting SLL
/// configuration set. Grammars and inputs which result in this scenario are
/// unable to use <seealso cref="PredictionMode#SLL"/>, which in turn means they cannot use
/// the two-stage parsing strategy to improve parsing performance for that
/// input.</para>
/// </summary>
/// <seealso cref= ParserATNSimulator#reportContextSensitivity </seealso>
/// <seealso cref= ANTLRErrorListener#reportContextSensitivity
///
/// @since 4.3 </seealso>
class ANTLR4CPP_PUBLIC ContextSensitivityInfo : public DecisionEventInfo {
public:
/// <summary>
/// Constructs a new instance of the <seealso cref="ContextSensitivityInfo"/> class
/// with the specified detailed context sensitivity information.
/// </summary>
/// <param name="decision"> The decision number </param>
/// <param name="configs"> The final configuration set containing the unique
/// alternative identified by full-context prediction </param>
/// <param name="input"> The input token stream </param>
/// <param name="startIndex"> The start index for the current prediction </param>
/// <param name="stopIndex"> The index at which the context sensitivity was
/// identified during full-context prediction </param>
ContextSensitivityInfo(size_t decision, ATNConfigSet *configs, TokenStream *input, size_t startIndex, size_t stopIndex);
};
} // namespace atn
} // namespace antlr4

View File

@@ -0,0 +1,14 @@
/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
* Use of this file is governed by the BSD 3-clause license that
* can be found in the LICENSE.txt file in the project root.
*/
#include "atn/DecisionEventInfo.h"
using namespace antlr4;
using namespace antlr4::atn;
DecisionEventInfo::DecisionEventInfo(size_t decision, ATNConfigSet *configs, TokenStream *input, size_t startIndex,
size_t stopIndex, bool fullCtx)
: decision(decision), configs(configs), input(input), startIndex(startIndex), stopIndex(stopIndex), fullCtx(fullCtx) {
}

View File

@@ -0,0 +1,70 @@
/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
* Use of this file is governed by the BSD 3-clause license that
* can be found in the LICENSE.txt file in the project root.
*/
#pragma once
#include "antlr4-common.h"
namespace antlr4 {
namespace atn {
/// <summary>
/// This is the base class for gathering detailed information about prediction
/// events which occur during parsing.
///
/// Note that we could record the parser call stack at the time this event
/// occurred but in the presence of left recursive rules, the stack is kind of
/// meaningless. It's better to look at the individual configurations for their
/// individual stacks. Of course that is a <seealso cref="PredictionContext"/> object
/// not a parse tree node and so it does not have information about the extent
/// (start...stop) of the various subtrees. Examining the stack tops of all
/// configurations provide the return states for the rule invocations.
/// From there you can get the enclosing rule.
///
/// @since 4.3
/// </summary>
class ANTLR4CPP_PUBLIC DecisionEventInfo {
public:
/// <summary>
/// The invoked decision number which this event is related to.
/// </summary>
/// <seealso cref= ATN#decisionToState </seealso>
const size_t decision;
/// <summary>
/// The configuration set containing additional information relevant to the
/// prediction state when the current event occurred, or {@code null} if no
/// additional information is relevant or available.
/// </summary>
const ATNConfigSet *configs;
/// <summary>
/// The input token stream which is being parsed.
/// </summary>
const TokenStream *input;
/// <summary>
/// The token index in the input stream at which the current prediction was
/// originally invoked.
/// </summary>
const size_t startIndex;
/// <summary>
/// The token index in the input stream at which the current event occurred.
/// </summary>
const size_t stopIndex;
/// <summary>
/// {@code true} if the current event occurred during LL prediction;
/// otherwise, {@code false} if the input occurred during SLL prediction.
/// </summary>
const bool fullCtx;
DecisionEventInfo(size_t decision, ATNConfigSet *configs, TokenStream *input, size_t startIndex,
size_t stopIndex, bool fullCtx);
};
} // namespace atn
} // namespace antlr4

View File

@@ -0,0 +1,25 @@
/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
* Use of this file is governed by the BSD 3-clause license that
* can be found in the LICENSE.txt file in the project root.
*/
#include "atn/ErrorInfo.h"
#include "atn/LookaheadEventInfo.h"
#include "atn/DecisionInfo.h"
using namespace antlr4::atn;
DecisionInfo::DecisionInfo(size_t decision) : decision(decision) {
}
std::string DecisionInfo::toString() const {
std::stringstream ss;
ss << "{decision=" << decision << ", contextSensitivities=" << contextSensitivities.size() << ", errors=";
ss << errors.size() << ", ambiguities=" << ambiguities.size() << ", SLL_lookahead=" << SLL_TotalLook;
ss << ", SLL_ATNTransitions=" << SLL_ATNTransitions << ", SLL_DFATransitions=" << SLL_DFATransitions;
ss << ", LL_Fallback=" << LL_Fallback << ", LL_lookahead=" << LL_TotalLook << ", LL_ATNTransitions=" << LL_ATNTransitions << '}';
return ss.str();
}

View File

@@ -0,0 +1,227 @@
/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
* Use of this file is governed by the BSD 3-clause license that
* can be found in the LICENSE.txt file in the project root.
*/
#pragma once
#include "atn/ContextSensitivityInfo.h"
#include "atn/AmbiguityInfo.h"
#include "atn/PredicateEvalInfo.h"
#include "atn/ErrorInfo.h"
namespace antlr4 {
namespace atn {
class LookaheadEventInfo;
/// <summary>
/// This class contains profiling gathered for a particular decision.
///
/// <para>
/// Parsing performance in ANTLR 4 is heavily influenced by both static factors
/// (e.g. the form of the rules in the grammar) and dynamic factors (e.g. the
/// choice of input and the state of the DFA cache at the time profiling
/// operations are started). For best results, gather and use aggregate
/// statistics from a large sample of inputs representing the inputs expected in
/// production before using the results to make changes in the grammar.</para>
///
/// @since 4.3
/// </summary>
class ANTLR4CPP_PUBLIC DecisionInfo {
public:
/// <summary>
/// The decision number, which is an index into <seealso cref="ATN#decisionToState"/>.
/// </summary>
const size_t decision;
/// <summary>
/// The total number of times <seealso cref="ParserATNSimulator#adaptivePredict"/> was
/// invoked for this decision.
/// </summary>
long long invocations = 0;
/// <summary>
/// The total time spent in <seealso cref="ParserATNSimulator#adaptivePredict"/> for
/// this decision, in nanoseconds.
///
/// <para>
/// The value of this field contains the sum of differential results obtained
/// by <seealso cref="System#nanoTime()"/>, and is not adjusted to compensate for JIT
/// and/or garbage collection overhead. For best accuracy, use a modern JVM
/// implementation that provides precise results from
/// <seealso cref="System#nanoTime()"/>, and perform profiling in a separate process
/// which is warmed up by parsing the input prior to profiling. If desired,
/// call <seealso cref="ATNSimulator#clearDFA"/> to reset the DFA cache to its initial
/// state before starting the profiling measurement pass.</para>
/// </summary>
long long timeInPrediction = 0;
/// <summary>
/// The sum of the lookahead required for SLL prediction for this decision.
/// Note that SLL prediction is used before LL prediction for performance
/// reasons even when <seealso cref="PredictionMode#LL"/> or
/// <seealso cref="PredictionMode#LL_EXACT_AMBIG_DETECTION"/> is used.
/// </summary>
long long SLL_TotalLook = 0;
/// <summary>
/// Gets the minimum lookahead required for any single SLL prediction to
/// complete for this decision, by reaching a unique prediction, reaching an
/// SLL conflict state, or encountering a syntax error.
/// </summary>
long long SLL_MinLook = 0;
/// <summary>
/// Gets the maximum lookahead required for any single SLL prediction to
/// complete for this decision, by reaching a unique prediction, reaching an
/// SLL conflict state, or encountering a syntax error.
/// </summary>
long long SLL_MaxLook = 0;
/// Gets the <seealso cref="LookaheadEventInfo"/> associated with the event where the
/// <seealso cref="#SLL_MaxLook"/> value was set.
Ref<LookaheadEventInfo> SLL_MaxLookEvent;
/// <summary>
/// The sum of the lookahead required for LL prediction for this decision.
/// Note that LL prediction is only used when SLL prediction reaches a
/// conflict state.
/// </summary>
long long LL_TotalLook = 0;
/// <summary>
/// Gets the minimum lookahead required for any single LL prediction to
/// complete for this decision. An LL prediction completes when the algorithm
/// reaches a unique prediction, a conflict state (for
/// <seealso cref="PredictionMode#LL"/>, an ambiguity state (for
/// <seealso cref="PredictionMode#LL_EXACT_AMBIG_DETECTION"/>, or a syntax error.
/// </summary>
long long LL_MinLook = 0;
/// <summary>
/// Gets the maximum lookahead required for any single LL prediction to
/// complete for this decision. An LL prediction completes when the algorithm
/// reaches a unique prediction, a conflict state (for
/// <seealso cref="PredictionMode#LL"/>, an ambiguity state (for
/// <seealso cref="PredictionMode#LL_EXACT_AMBIG_DETECTION"/>, or a syntax error.
/// </summary>
long long LL_MaxLook = 0;
/// <summary>
/// Gets the <seealso cref="LookaheadEventInfo"/> associated with the event where the
/// <seealso cref="#LL_MaxLook"/> value was set.
/// </summary>
Ref<LookaheadEventInfo> LL_MaxLookEvent;
/// <summary>
/// A collection of <seealso cref="ContextSensitivityInfo"/> instances describing the
/// context sensitivities encountered during LL prediction for this decision.
/// </summary>
/// <seealso cref= ContextSensitivityInfo </seealso>
std::vector<ContextSensitivityInfo> contextSensitivities;
/// <summary>
/// A collection of <seealso cref="ErrorInfo"/> instances describing the parse errors
/// identified during calls to <seealso cref="ParserATNSimulator#adaptivePredict"/> for
/// this decision.
/// </summary>
/// <seealso cref= ErrorInfo </seealso>
std::vector<ErrorInfo> errors;
/// <summary>
/// A collection of <seealso cref="AmbiguityInfo"/> instances describing the
/// ambiguities encountered during LL prediction for this decision.
/// </summary>
/// <seealso cref= AmbiguityInfo </seealso>
std::vector<AmbiguityInfo> ambiguities;
/// <summary>
/// A collection of <seealso cref="PredicateEvalInfo"/> instances describing the
/// results of evaluating individual predicates during prediction for this
/// decision.
/// </summary>
/// <seealso cref= PredicateEvalInfo </seealso>
std::vector<PredicateEvalInfo> predicateEvals;
/// <summary>
/// The total number of ATN transitions required during SLL prediction for
/// this decision. An ATN transition is determined by the number of times the
/// DFA does not contain an edge that is required for prediction, resulting
/// in on-the-fly computation of that edge.
///
/// <para>
/// If DFA caching of SLL transitions is employed by the implementation, ATN
/// computation may cache the computed edge for efficient lookup during
/// future parsing of this decision. Otherwise, the SLL parsing algorithm
/// will use ATN transitions exclusively.</para>
/// </summary>
/// <seealso cref= #SLL_ATNTransitions </seealso>
/// <seealso cref= ParserATNSimulator#computeTargetState </seealso>
/// <seealso cref= LexerATNSimulator#computeTargetState </seealso>
long long SLL_ATNTransitions = 0;
/// <summary>
/// The total number of DFA transitions required during SLL prediction for
/// this decision.
///
/// <para>If the ATN simulator implementation does not use DFA caching for SLL
/// transitions, this value will be 0.</para>
/// </summary>
/// <seealso cref= ParserATNSimulator#getExistingTargetState </seealso>
/// <seealso cref= LexerATNSimulator#getExistingTargetState </seealso>
long long SLL_DFATransitions = 0;
/// <summary>
/// Gets the total number of times SLL prediction completed in a conflict
/// state, resulting in fallback to LL prediction.
///
/// <para>Note that this value is not related to whether or not
/// <seealso cref="PredictionMode#SLL"/> may be used successfully with a particular
/// grammar. If the ambiguity resolution algorithm applied to the SLL
/// conflicts for this decision produce the same result as LL prediction for
/// this decision, <seealso cref="PredictionMode#SLL"/> would produce the same overall
/// parsing result as <seealso cref="PredictionMode#LL"/>.</para>
/// </summary>
long long LL_Fallback = 0;
/// <summary>
/// The total number of ATN transitions required during LL prediction for
/// this decision. An ATN transition is determined by the number of times the
/// DFA does not contain an edge that is required for prediction, resulting
/// in on-the-fly computation of that edge.
///
/// <para>
/// If DFA caching of LL transitions is employed by the implementation, ATN
/// computation may cache the computed edge for efficient lookup during
/// future parsing of this decision. Otherwise, the LL parsing algorithm will
/// use ATN transitions exclusively.</para>
/// </summary>
/// <seealso cref= #LL_DFATransitions </seealso>
/// <seealso cref= ParserATNSimulator#computeTargetState </seealso>
/// <seealso cref= LexerATNSimulator#computeTargetState </seealso>
long long LL_ATNTransitions = 0;
/// <summary>
/// The total number of DFA transitions required during LL prediction for
/// this decision.
///
/// <para>If the ATN simulator implementation does not use DFA caching for LL
/// transitions, this value will be 0.</para>
/// </summary>
/// <seealso cref= ParserATNSimulator#getExistingTargetState </seealso>
/// <seealso cref= LexerATNSimulator#getExistingTargetState </seealso>
long long LL_DFATransitions = 0;
/// <summary>
/// Constructs a new instance of the <seealso cref="DecisionInfo"/> class to contain
/// statistics for a particular decision.
/// </summary>
/// <param name="decision"> The decision number </param>
explicit DecisionInfo(size_t decision);
std::string toString() const;
};
} // namespace atn
} // namespace antlr4

View File

@@ -0,0 +1,12 @@
/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
* Use of this file is governed by the BSD 3-clause license that
* can be found in the LICENSE.txt file in the project root.
*/
#include "atn/DecisionState.h"
using namespace antlr4::atn;
std::string DecisionState::toString() const {
return ATNState::toString();
}

View File

@@ -0,0 +1,34 @@
/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
* Use of this file is governed by the BSD 3-clause license that
* can be found in the LICENSE.txt file in the project root.
*/
#pragma once
#include "atn/ATNState.h"
namespace antlr4 {
namespace atn {
class ANTLR4CPP_PUBLIC DecisionState : public ATNState {
public:
static bool is(const ATNState &atnState) {
const auto stateType = atnState.getStateType();
return (stateType >= ATNStateType::BLOCK_START && stateType <= ATNStateType::TOKEN_START) ||
stateType == ATNStateType::PLUS_LOOP_BACK ||
stateType == ATNStateType::STAR_LOOP_ENTRY;
}
static bool is(const ATNState *atnState) { return atnState != nullptr && is(*atnState); }
int decision = -1;
bool nonGreedy = false;
virtual std::string toString() const override;
protected:
using ATNState::ATNState;
};
} // namespace atn
} // namespace antlr4

View File

@@ -0,0 +1,31 @@
/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
* Use of this file is governed by the BSD 3-clause license that
* can be found in the LICENSE.txt file in the project root.
*/
#include "atn/EpsilonTransition.h"
using namespace antlr4::atn;
EpsilonTransition::EpsilonTransition(ATNState *target) : EpsilonTransition(target, INVALID_INDEX) {
}
EpsilonTransition::EpsilonTransition(ATNState *target, size_t outermostPrecedenceReturn)
: Transition(TransitionType::EPSILON, target), _outermostPrecedenceReturn(outermostPrecedenceReturn) {
}
size_t EpsilonTransition::outermostPrecedenceReturn() const {
return _outermostPrecedenceReturn;
}
bool EpsilonTransition::isEpsilon() const {
return true;
}
bool EpsilonTransition::matches(size_t /*symbol*/, size_t /*minVocabSymbol*/, size_t /*maxVocabSymbol*/) const {
return false;
}
std::string EpsilonTransition::toString() const {
return "EPSILON " + Transition::toString() + " {}";
}

View File

@@ -0,0 +1,42 @@
/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
* Use of this file is governed by the BSD 3-clause license that
* can be found in the LICENSE.txt file in the project root.
*/
#pragma once
#include "atn/Transition.h"
namespace antlr4 {
namespace atn {
class ANTLR4CPP_PUBLIC EpsilonTransition final : public Transition {
public:
static bool is(const Transition &transition) { return transition.getTransitionType() == TransitionType::EPSILON; }
static bool is(const Transition *transition) { return transition != nullptr && is(*transition); }
explicit EpsilonTransition(ATNState *target);
EpsilonTransition(ATNState *target, size_t outermostPrecedenceReturn);
/**
* @return the rule index of a precedence rule for which this transition is
* returning from, where the precedence value is 0; otherwise, INVALID_INDEX.
*
* @see ATNConfig#isPrecedenceFilterSuppressed()
* @see ParserATNSimulator#applyPrecedenceFilter(ATNConfigSet)
* @since 4.4.1
*/
size_t outermostPrecedenceReturn() const;
virtual bool isEpsilon() const override;
virtual bool matches(size_t symbol, size_t minVocabSymbol, size_t maxVocabSymbol) const override;
virtual std::string toString() const override;
private:
const size_t _outermostPrecedenceReturn; // A rule index.
};
} // namespace atn
} // namespace antlr4

View File

@@ -0,0 +1,15 @@
/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
* Use of this file is governed by the BSD 3-clause license that
* can be found in the LICENSE.txt file in the project root.
*/
#include "atn/ATNConfigSet.h"
#include "atn/ErrorInfo.h"
using namespace antlr4;
using namespace antlr4::atn;
ErrorInfo::ErrorInfo(size_t decision, ATNConfigSet *configs, TokenStream *input, size_t startIndex, size_t stopIndex, bool fullCtx)
: DecisionEventInfo(decision, configs, input, startIndex, stopIndex, fullCtx) {
}

View File

@@ -0,0 +1,43 @@
/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
* Use of this file is governed by the BSD 3-clause license that
* can be found in the LICENSE.txt file in the project root.
*/
#pragma once
#include "atn/DecisionEventInfo.h"
namespace antlr4 {
namespace atn {
/// <summary>
/// This class represents profiling event information for a syntax error
/// identified during prediction. Syntax errors occur when the prediction
/// algorithm is unable to identify an alternative which would lead to a
/// successful parse.
/// </summary>
/// <seealso cref= Parser#notifyErrorListeners(Token, String, RecognitionException) </seealso>
/// <seealso cref= ANTLRErrorListener#syntaxError
///
/// @since 4.3 </seealso>
class ANTLR4CPP_PUBLIC ErrorInfo : public DecisionEventInfo {
public:
/// <summary>
/// Constructs a new instance of the <seealso cref="ErrorInfo"/> class with the
/// specified detailed syntax error information.
/// </summary>
/// <param name="decision"> The decision number </param>
/// <param name="configs"> The final configuration set reached during prediction
/// prior to reaching the <seealso cref="ATNSimulator#ERROR"/> state </param>
/// <param name="input"> The input token stream </param>
/// <param name="startIndex"> The start index for the current prediction </param>
/// <param name="stopIndex"> The index at which the syntax error was identified </param>
/// <param name="fullCtx"> {@code true} if the syntax error was identified during LL
/// prediction; otherwise, {@code false} if the syntax error was identified
/// during SLL prediction </param>
ErrorInfo(size_t decision, ATNConfigSet *configs, TokenStream *input, size_t startIndex, size_t stopIndex,
bool fullCtx);
};
} // namespace atn
} // namespace antlr4

View File

@@ -0,0 +1,18 @@
/* Copyright (c) 2022 The ANTLR Project. All rights reserved.
* Use of this file is governed by the BSD 3-clause license that
* can be found in the LICENSE.txt file in the project root.
*/
#pragma once
#include <cstddef>
namespace antlr4 {
namespace atn {
inline bool cachedHashCodeEqual(size_t lhs, size_t rhs) {
return lhs == rhs || lhs == 0 || rhs == 0;
}
} // namespace atn
} // namespace antlr4

View File

@@ -0,0 +1,189 @@
/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
* Use of this file is governed by the BSD 3-clause license that
* can be found in the LICENSE.txt file in the project root.
*/
#include "atn/RuleStopState.h"
#include "atn/Transition.h"
#include "atn/RuleTransition.h"
#include "atn/SingletonPredictionContext.h"
#include "atn/WildcardTransition.h"
#include "atn/NotSetTransition.h"
#include "misc/IntervalSet.h"
#include "atn/ATNConfig.h"
#include "support/CPPUtils.h"
#include "atn/LL1Analyzer.h"
using namespace antlr4;
using namespace antlr4::atn;
using namespace antlrcpp;
namespace {
struct ATNConfigHasher final {
size_t operator()(const ATNConfig& atn_config) const {
return atn_config.hashCode();
}
};
struct ATNConfigComparer final {
bool operator()(const ATNConfig& lhs, const ATNConfig& rhs) const {
return lhs == rhs;
}
};
class LL1AnalyzerImpl final {
public:
LL1AnalyzerImpl(const ATN& atn, misc::IntervalSet& look, bool seeThruPreds, bool addEOF) : _atn(atn), _look(look), _seeThruPreds(seeThruPreds), _addEOF(addEOF) {}
/// <summary>
/// Compute set of tokens that can follow {@code s} in the ATN in the
/// specified {@code ctx}.
/// <p/>
/// If {@code ctx} is {@code null} and {@code stopState} or the end of the
/// rule containing {@code s} is reached, <seealso cref="Token#EPSILON"/> is added to
/// the result set. If {@code ctx} is not {@code null} and {@code addEOF} is
/// {@code true} and {@code stopState} or the end of the outermost rule is
/// reached, <seealso cref="Token#EOF"/> is added to the result set.
/// </summary>
/// <param name="s"> the ATN state. </param>
/// <param name="stopState"> the ATN state to stop at. This can be a
/// <seealso cref="BlockEndState"/> to detect epsilon paths through a closure. </param>
/// <param name="ctx"> The outer context, or {@code null} if the outer context should
/// not be used. </param>
/// <param name="look"> The result lookahead set. </param>
/// <param name="lookBusy"> A set used for preventing epsilon closures in the ATN
/// from causing a stack overflow. Outside code should pass
/// {@code new HashSet<ATNConfig>} for this argument. </param>
/// <param name="calledRuleStack"> A set used for preventing left recursion in the
/// ATN from causing a stack overflow. Outside code should pass
/// {@code new BitSet()} for this argument. </param>
/// <param name="seeThruPreds"> {@code true} to true semantic predicates as
/// implicitly {@code true} and "see through them", otherwise {@code false}
/// to treat semantic predicates as opaque and add <seealso cref="#HIT_PRED"/> to the
/// result if one is encountered. </param>
/// <param name="addEOF"> Add <seealso cref="Token#EOF"/> to the result if the end of the
/// outermost context is reached. This parameter has no effect if {@code ctx}
/// is {@code null}. </param>
void LOOK(ATNState *s, ATNState *stopState, Ref<const PredictionContext> const& ctx) {
if (!_lookBusy.insert(ATNConfig(s, 0, ctx)).second) {
return;
}
// ml: s can never be null, hence no need to check if stopState is != null.
if (s == stopState) {
if (ctx == nullptr) {
_look.add(Token::EPSILON);
return;
} else if (ctx->isEmpty() && _addEOF) {
_look.add(Token::EOF);
return;
}
}
if (s->getStateType() == ATNStateType::RULE_STOP) {
if (ctx == nullptr) {
_look.add(Token::EPSILON);
return;
} else if (ctx->isEmpty() && _addEOF) {
_look.add(Token::EOF);
return;
}
if (ctx != PredictionContext::EMPTY) {
bool removed = _calledRuleStack.test(s->ruleIndex);
_calledRuleStack[s->ruleIndex] = false;
// run thru all possible stack tops in ctx
for (size_t i = 0; i < ctx->size(); i++) {
ATNState *returnState = _atn.states[ctx->getReturnState(i)];
LOOK(returnState, stopState, ctx->getParent(i));
}
if (removed) {
_calledRuleStack.set(s->ruleIndex);
}
return;
}
}
size_t n = s->transitions.size();
for (size_t i = 0; i < n; i++) {
const Transition *t = s->transitions[i].get();
const auto tType = t->getTransitionType();
if (tType == TransitionType::RULE) {
if (_calledRuleStack[(static_cast<const RuleTransition*>(t))->target->ruleIndex]) {
continue;
}
Ref<const PredictionContext> newContext = SingletonPredictionContext::create(ctx, (static_cast<const RuleTransition*>(t))->followState->stateNumber);
_calledRuleStack.set((static_cast<const RuleTransition*>(t))->target->ruleIndex);
LOOK(t->target, stopState, newContext);
_calledRuleStack[(static_cast<const RuleTransition*>(t))->target->ruleIndex] = false;
} else if (tType == TransitionType::PREDICATE || tType == TransitionType::PRECEDENCE) {
if (_seeThruPreds) {
LOOK(t->target, stopState, ctx);
} else {
_look.add(LL1Analyzer::HIT_PRED);
}
} else if (t->isEpsilon()) {
LOOK(t->target, stopState, ctx);
} else if (tType == TransitionType::WILDCARD) {
_look.addAll(misc::IntervalSet::of(Token::MIN_USER_TOKEN_TYPE, static_cast<ssize_t>(_atn.maxTokenType)));
} else {
misc::IntervalSet set = t->label();
if (!set.isEmpty()) {
if (tType == TransitionType::NOT_SET) {
set = set.complement(misc::IntervalSet::of(Token::MIN_USER_TOKEN_TYPE, static_cast<ssize_t>(_atn.maxTokenType)));
}
_look.addAll(set);
}
}
}
}
private:
const ATN& _atn;
misc::IntervalSet& _look;
antlrcpp::BitSet _calledRuleStack;
std::unordered_set<ATNConfig, ATNConfigHasher, ATNConfigComparer> _lookBusy;
bool _seeThruPreds;
bool _addEOF;
};
}
std::vector<misc::IntervalSet> LL1Analyzer::getDecisionLookahead(ATNState *s) const {
std::vector<misc::IntervalSet> look;
if (s == nullptr) {
return look;
}
look.resize(s->transitions.size()); // Fills all interval sets with defaults.
for (size_t alt = 0; alt < s->transitions.size(); alt++) {
LL1AnalyzerImpl impl(_atn, look[alt], false, false);
impl.LOOK(s->transitions[alt]->target, nullptr, PredictionContext::EMPTY);
// Wipe out lookahead for this alternative if we found nothing
// or we had a predicate when we !seeThruPreds
if (look[alt].size() == 0 || look[alt].contains(LL1Analyzer::HIT_PRED)) {
look[alt].clear();
}
}
return look;
}
misc::IntervalSet LL1Analyzer::LOOK(ATNState *s, RuleContext *ctx) const {
return LOOK(s, nullptr, ctx);
}
misc::IntervalSet LL1Analyzer::LOOK(ATNState *s, ATNState *stopState, RuleContext *ctx) const {
Ref<const PredictionContext> lookContext = ctx != nullptr ? PredictionContext::fromRuleContext(_atn, ctx) : nullptr;
misc::IntervalSet r;
LL1AnalyzerImpl impl(_atn, r, true, true);
impl.LOOK(s, stopState, lookContext);
return r;
}

View File

@@ -0,0 +1,76 @@
/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
* Use of this file is governed by the BSD 3-clause license that
* can be found in the LICENSE.txt file in the project root.
*/
#pragma once
#include "Token.h"
#include "atn/ATNConfig.h"
#include "atn/PredictionContext.h"
#include "support/BitSet.h"
namespace antlr4 {
namespace atn {
class ANTLR4CPP_PUBLIC LL1Analyzer final {
public:
/// Special value added to the lookahead sets to indicate that we hit
/// a predicate during analysis if {@code seeThruPreds==false}.
static constexpr size_t HIT_PRED = Token::INVALID_TYPE;
explicit LL1Analyzer(const atn::ATN &atn) : _atn(atn) {}
/// <summary>
/// Calculates the SLL(1) expected lookahead set for each outgoing transition
/// of an <seealso cref="ATNState"/>. The returned array has one element for each
/// outgoing transition in {@code s}. If the closure from transition
/// <em>i</em> leads to a semantic predicate before matching a symbol, the
/// element at index <em>i</em> of the result will be {@code null}.
/// </summary>
/// <param name="s"> the ATN state </param>
/// <returns> the expected symbols for each outgoing transition of {@code s}. </returns>
std::vector<misc::IntervalSet> getDecisionLookahead(ATNState *s) const;
/// <summary>
/// Compute set of tokens that can follow {@code s} in the ATN in the
/// specified {@code ctx}.
/// <p/>
/// If {@code ctx} is {@code null} and the end of the rule containing
/// {@code s} is reached, <seealso cref="Token#EPSILON"/> is added to the result set.
/// If {@code ctx} is not {@code null} and the end of the outermost rule is
/// reached, <seealso cref="Token#EOF"/> is added to the result set.
/// </summary>
/// <param name="s"> the ATN state </param>
/// <param name="ctx"> the complete parser context, or {@code null} if the context
/// should be ignored
/// </param>
/// <returns> The set of tokens that can follow {@code s} in the ATN in the
/// specified {@code ctx}. </returns>
misc::IntervalSet LOOK(ATNState *s, RuleContext *ctx) const;
/// <summary>
/// Compute set of tokens that can follow {@code s} in the ATN in the
/// specified {@code ctx}.
/// <p/>
/// If {@code ctx} is {@code null} and the end of the rule containing
/// {@code s} is reached, <seealso cref="Token#EPSILON"/> is added to the result set.
/// If {@code ctx} is not {@code null} and the end of the outermost rule is
/// reached, <seealso cref="Token#EOF"/> is added to the result set.
/// </summary>
/// <param name="s"> the ATN state </param>
/// <param name="stopState"> the ATN state to stop at. This can be a
/// <seealso cref="BlockEndState"/> to detect epsilon paths through a closure. </param>
/// <param name="ctx"> the complete parser context, or {@code null} if the context
/// should be ignored
/// </param>
/// <returns> The set of tokens that can follow {@code s} in the ATN in the
/// specified {@code ctx}. </returns>
misc::IntervalSet LOOK(ATNState *s, ATNState *stopState, RuleContext *ctx) const;
private:
const atn::ATN &_atn;
};
} // namespace atn
} // namespace antlr4

View File

@@ -0,0 +1,67 @@
/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
* Use of this file is governed by the BSD 3-clause license that
* can be found in the LICENSE.txt file in the project root.
*/
#include "misc/MurmurHash.h"
#include "atn/DecisionState.h"
#include "atn/PredictionContext.h"
#include "SemanticContext.h"
#include "atn/LexerActionExecutor.h"
#include "support/CPPUtils.h"
#include "support/Casts.h"
#include "atn/LexerATNConfig.h"
using namespace antlr4::atn;
using namespace antlrcpp;
LexerATNConfig::LexerATNConfig(ATNState *state, int alt, Ref<const PredictionContext> context)
: ATNConfig(state, alt, std::move(context)) {}
LexerATNConfig::LexerATNConfig(ATNState *state, int alt, Ref<const PredictionContext> context, Ref<const LexerActionExecutor> lexerActionExecutor)
: ATNConfig(state, alt, std::move(context)), _lexerActionExecutor(std::move(lexerActionExecutor)) {}
LexerATNConfig::LexerATNConfig(LexerATNConfig const& other, ATNState *state)
: ATNConfig(other, state), _lexerActionExecutor(other._lexerActionExecutor), _passedThroughNonGreedyDecision(checkNonGreedyDecision(other, state)) {}
LexerATNConfig::LexerATNConfig(LexerATNConfig const& other, ATNState *state, Ref<const LexerActionExecutor> lexerActionExecutor)
: ATNConfig(other, state), _lexerActionExecutor(std::move(lexerActionExecutor)), _passedThroughNonGreedyDecision(checkNonGreedyDecision(other, state)) {}
LexerATNConfig::LexerATNConfig(LexerATNConfig const& other, ATNState *state, Ref<const PredictionContext> context)
: ATNConfig(other, state, std::move(context)), _lexerActionExecutor(other._lexerActionExecutor), _passedThroughNonGreedyDecision(checkNonGreedyDecision(other, state)) {}
size_t LexerATNConfig::hashCode() const {
size_t hashCode = misc::MurmurHash::initialize(7);
hashCode = misc::MurmurHash::update(hashCode, state->stateNumber);
hashCode = misc::MurmurHash::update(hashCode, alt);
hashCode = misc::MurmurHash::update(hashCode, context);
hashCode = misc::MurmurHash::update(hashCode, semanticContext);
hashCode = misc::MurmurHash::update(hashCode, _passedThroughNonGreedyDecision ? 1 : 0);
hashCode = misc::MurmurHash::update(hashCode, _lexerActionExecutor);
hashCode = misc::MurmurHash::finish(hashCode, 6);
return hashCode;
}
bool LexerATNConfig::operator==(const LexerATNConfig& other) const
{
if (this == &other)
return true;
if (_passedThroughNonGreedyDecision != other._passedThroughNonGreedyDecision)
return false;
if (_lexerActionExecutor == nullptr)
return other._lexerActionExecutor == nullptr;
if (*_lexerActionExecutor != *(other._lexerActionExecutor)) {
return false;
}
return ATNConfig::operator==(other);
}
bool LexerATNConfig::checkNonGreedyDecision(LexerATNConfig const& source, ATNState *target) {
return source._passedThroughNonGreedyDecision ||
(DecisionState::is(target) && downCast<DecisionState*>(target)->nonGreedy);
}

View File

@@ -0,0 +1,44 @@
/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
* Use of this file is governed by the BSD 3-clause license that
* can be found in the LICENSE.txt file in the project root.
*/
#pragma once
#include "atn/ATNConfig.h"
namespace antlr4 {
namespace atn {
class ANTLR4CPP_PUBLIC LexerATNConfig final : public ATNConfig {
public:
LexerATNConfig(ATNState *state, int alt, Ref<const PredictionContext> context);
LexerATNConfig(ATNState *state, int alt, Ref<const PredictionContext> context, Ref<const LexerActionExecutor> lexerActionExecutor);
LexerATNConfig(LexerATNConfig const& other, ATNState *state);
LexerATNConfig(LexerATNConfig const& other, ATNState *state, Ref<const LexerActionExecutor> lexerActionExecutor);
LexerATNConfig(LexerATNConfig const& other, ATNState *state, Ref<const PredictionContext> context);
/**
* Gets the {@link LexerActionExecutor} capable of executing the embedded
* action(s) for the current configuration.
*/
const Ref<const LexerActionExecutor>& getLexerActionExecutor() const { return _lexerActionExecutor; }
bool hasPassedThroughNonGreedyDecision() const { return _passedThroughNonGreedyDecision; }
virtual size_t hashCode() const override;
bool operator==(const LexerATNConfig& other) const;
private:
/**
* This is the backing field for {@link #getLexerActionExecutor}.
*/
const Ref<const LexerActionExecutor> _lexerActionExecutor;
const bool _passedThroughNonGreedyDecision = false;
static bool checkNonGreedyDecision(LexerATNConfig const& source, ATNState *target);
};
} // namespace atn
} // namespace antlr4

View File

@@ -0,0 +1,621 @@
/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
* Use of this file is governed by the BSD 3-clause license that
* can be found in the LICENSE.txt file in the project root.
*/
#include "IntStream.h"
#include "atn/OrderedATNConfigSet.h"
#include "Token.h"
#include "LexerNoViableAltException.h"
#include "atn/RuleStopState.h"
#include "atn/RuleTransition.h"
#include "atn/SingletonPredictionContext.h"
#include "atn/PredicateTransition.h"
#include "atn/ActionTransition.h"
#include "atn/TokensStartState.h"
#include "misc/Interval.h"
#include "dfa/DFA.h"
#include "Lexer.h"
#include "internal/Synchronization.h"
#include "dfa/DFAState.h"
#include "atn/LexerATNConfig.h"
#include "atn/LexerActionExecutor.h"
#include "atn/LexerATNSimulator.h"
#ifndef LEXER_DEBUG_ATN
#define LEXER_DEBUG_ATN 0
#endif
#ifndef LEXER_DEBUG_DFA
#define LEXER_DEBUG_DFA 0
#endif
using namespace antlr4;
using namespace antlr4::atn;
using namespace antlr4::internal;
using namespace antlrcpp;
void LexerATNSimulator::SimState::reset() {
*this = SimState();
}
LexerATNSimulator::LexerATNSimulator(const ATN &atn, std::vector<dfa::DFA> &decisionToDFA,
PredictionContextCache &sharedContextCache)
: LexerATNSimulator(nullptr, atn, decisionToDFA, sharedContextCache) {
}
LexerATNSimulator::LexerATNSimulator(Lexer *recog, const ATN &atn, std::vector<dfa::DFA> &decisionToDFA,
PredictionContextCache &sharedContextCache)
: ATNSimulator(atn, sharedContextCache), _recog(recog), _decisionToDFA(decisionToDFA) {
InitializeInstanceFields();
}
void LexerATNSimulator::copyState(LexerATNSimulator *simulator) {
_charPositionInLine = simulator->_charPositionInLine;
_line = simulator->_line;
_mode = simulator->_mode;
_startIndex = simulator->_startIndex;
}
size_t LexerATNSimulator::match(CharStream *input, size_t mode) {
_mode = mode;
ssize_t mark = input->mark();
auto onExit = finally([input, mark] {
input->release(mark);
});
_startIndex = input->index();
_prevAccept.reset();
const dfa::DFA &dfa = _decisionToDFA[mode];
dfa::DFAState* s0;
{
SharedLock<SharedMutex> stateLock(atn._stateMutex);
s0 = dfa.s0;
}
if (s0 == nullptr) {
return matchATN(input);
} else {
return execATN(input, s0);
}
}
void LexerATNSimulator::reset() {
_prevAccept.reset();
_startIndex = 0;
_line = 1;
_charPositionInLine = 0;
_mode = Lexer::DEFAULT_MODE;
}
void LexerATNSimulator::clearDFA() {
size_t size = _decisionToDFA.size();
_decisionToDFA.clear();
for (size_t d = 0; d < size; ++d) {
_decisionToDFA.emplace_back(atn.getDecisionState(d), d);
}
}
size_t LexerATNSimulator::matchATN(CharStream *input) {
ATNState *startState = atn.modeToStartState[_mode];
std::unique_ptr<ATNConfigSet> s0_closure = computeStartState(input, startState);
bool suppressEdge = s0_closure->hasSemanticContext;
s0_closure->hasSemanticContext = false;
dfa::DFAState *next = addDFAState(s0_closure.release(), suppressEdge);
size_t predict = execATN(input, next);
return predict;
}
size_t LexerATNSimulator::execATN(CharStream *input, dfa::DFAState *ds0) {
if (ds0->isAcceptState) {
// allow zero-length tokens
// ml: in Java code this method uses 3 params. The first is a member var of the class anyway (_prevAccept), so why pass it here?
captureSimState(input, ds0);
}
size_t t = input->LA(1);
dfa::DFAState *s = ds0; // s is current/from DFA state
while (true) { // while more work
// As we move src->trg, src->trg, we keep track of the previous trg to
// avoid looking up the DFA state again, which is expensive.
// If the previous target was already part of the DFA, we might
// be able to avoid doing a reach operation upon t. If s!=null,
// it means that semantic predicates didn't prevent us from
// creating a DFA state. Once we know s!=null, we check to see if
// the DFA state has an edge already for t. If so, we can just reuse
// it's configuration set; there's no point in re-computing it.
// This is kind of like doing DFA simulation within the ATN
// simulation because DFA simulation is really just a way to avoid
// computing reach/closure sets. Technically, once we know that
// we have a previously added DFA state, we could jump over to
// the DFA simulator. But, that would mean popping back and forth
// a lot and making things more complicated algorithmically.
// This optimization makes a lot of sense for loops within DFA.
// A character will take us back to an existing DFA state
// that already has lots of edges out of it. e.g., .* in comments.
dfa::DFAState *target = getExistingTargetState(s, t);
if (target == nullptr) {
target = computeTargetState(input, s, t);
}
if (target == ERROR.get()) {
break;
}
// If this is a consumable input element, make sure to consume before
// capturing the accept state so the input index, line, and char
// position accurately reflect the state of the interpreter at the
// end of the token.
if (t != Token::EOF) {
consume(input);
}
if (target->isAcceptState) {
captureSimState(input, target);
if (t == Token::EOF) {
break;
}
}
t = input->LA(1);
s = target; // flip; current DFA target becomes new src/from state
}
return failOrAccept(input, s->configs.get(), t);
}
dfa::DFAState *LexerATNSimulator::getExistingTargetState(dfa::DFAState *s, size_t t) {
dfa::DFAState* retval = nullptr;
SharedLock<SharedMutex> edgeLock(atn._edgeMutex);
if (t <= MAX_DFA_EDGE) {
auto iterator = s->edges.find(t - MIN_DFA_EDGE);
#if LEXER_DEBUG_ATN == 1
if (iterator != s->edges.end()) {
std::cout << std::string("reuse state ") << s->stateNumber << std::string(" edge to ") << iterator->second->stateNumber << std::endl;
}
#endif
if (iterator != s->edges.end())
retval = iterator->second;
}
return retval;
}
dfa::DFAState *LexerATNSimulator::computeTargetState(CharStream *input, dfa::DFAState *s, size_t t) {
OrderedATNConfigSet *reach = new OrderedATNConfigSet(); /* mem-check: deleted on error or managed by new DFA state. */
// if we don't find an existing DFA state
// Fill reach starting from closure, following t transitions
getReachableConfigSet(input, s->configs.get(), reach, t);
if (reach->isEmpty()) { // we got nowhere on t from s
if (!reach->hasSemanticContext) {
// we got nowhere on t, don't throw out this knowledge; it'd
// cause a failover from DFA later.
addDFAEdge(s, t, ERROR.get());
}
delete reach;
// stop when we can't match any more char
return ERROR.get();
}
// Add an edge from s to target DFA found/created for reach
return addDFAEdge(s, t, reach);
}
size_t LexerATNSimulator::failOrAccept(CharStream *input, ATNConfigSet *reach, size_t t) {
if (_prevAccept.dfaState != nullptr) {
accept(input, _prevAccept.dfaState->lexerActionExecutor, _startIndex, _prevAccept.index, _prevAccept.line, _prevAccept.charPos);
return _prevAccept.dfaState->prediction;
} else {
// if no accept and EOF is first char, return EOF
if (t == Token::EOF && input->index() == _startIndex) {
return Token::EOF;
}
throw LexerNoViableAltException(_recog, input, _startIndex, reach);
}
}
void LexerATNSimulator::getReachableConfigSet(CharStream *input, ATNConfigSet *closure_, ATNConfigSet *reach, size_t t) {
// this is used to skip processing for configs which have a lower priority
// than a config that already reached an accept state for the same rule
size_t skipAlt = ATN::INVALID_ALT_NUMBER;
for (const auto &c : closure_->configs) {
bool currentAltReachedAcceptState = c->alt == skipAlt;
if (currentAltReachedAcceptState && (std::static_pointer_cast<LexerATNConfig>(c))->hasPassedThroughNonGreedyDecision()) {
continue;
}
#if LEXER_DEBUG_ATN == 1
std::cout << "testing " << getTokenName((int)t) << " at " << c->toString(true) << std::endl;
#endif
size_t n = c->state->transitions.size();
for (size_t ti = 0; ti < n; ti++) { // for each transition
const Transition *trans = c->state->transitions[ti].get();
ATNState *target = getReachableTarget(trans, (int)t);
if (target != nullptr) {
auto lexerActionExecutor = downCast<const LexerATNConfig&>(*c).getLexerActionExecutor();
if (lexerActionExecutor != nullptr) {
lexerActionExecutor = lexerActionExecutor->fixOffsetBeforeMatch((int)input->index() - (int)_startIndex);
}
bool treatEofAsEpsilon = t == Token::EOF;
Ref<LexerATNConfig> config = std::make_shared<LexerATNConfig>(downCast<const LexerATNConfig&>(*c),
target, std::move(lexerActionExecutor));
if (closure(input, config, reach, currentAltReachedAcceptState, true, treatEofAsEpsilon)) {
// any remaining configs for this alt have a lower priority than
// the one that just reached an accept state.
skipAlt = c->alt;
break;
}
}
}
}
}
void LexerATNSimulator::accept(CharStream *input, const Ref<const LexerActionExecutor> &lexerActionExecutor, size_t /*startIndex*/,
size_t index, size_t line, size_t charPos) {
#if LEXER_DEBUG_ATN == 1
std::cout << "ACTION ";
std::cout << toString(lexerActionExecutor) << std::endl;
#endif
// seek to after last char in token
input->seek(index);
_line = line;
_charPositionInLine = (int)charPos;
if (lexerActionExecutor != nullptr && _recog != nullptr) {
lexerActionExecutor->execute(_recog, input, _startIndex);
}
}
atn::ATNState *LexerATNSimulator::getReachableTarget(const Transition *trans, size_t t) {
if (trans->matches(t, Lexer::MIN_CHAR_VALUE, Lexer::MAX_CHAR_VALUE)) {
return trans->target;
}
return nullptr;
}
std::unique_ptr<ATNConfigSet> LexerATNSimulator::computeStartState(CharStream *input, ATNState *p) {
Ref<const PredictionContext> initialContext = PredictionContext::EMPTY; // ml: the purpose of this assignment is unclear
std::unique_ptr<ATNConfigSet> configs(new OrderedATNConfigSet());
for (size_t i = 0; i < p->transitions.size(); i++) {
ATNState *target = p->transitions[i]->target;
Ref<LexerATNConfig> c = std::make_shared<LexerATNConfig>(target, (int)(i + 1), initialContext);
closure(input, c, configs.get(), false, false, false);
}
return configs;
}
bool LexerATNSimulator::closure(CharStream *input, const Ref<LexerATNConfig> &config, ATNConfigSet *configs,
bool currentAltReachedAcceptState, bool speculative, bool treatEofAsEpsilon) {
#if LEXER_DEBUG_ATN == 1
std::cout << "closure(" << config->toString(true) << ")" << std::endl;
#endif
if (config->state != nullptr && config->state->getStateType() == ATNStateType::RULE_STOP) {
#if LEXER_DEBUG_ATN == 1
if (_recog != nullptr) {
std::cout << "closure at " << _recog->getRuleNames()[config->state->ruleIndex] << " rule stop " << config << std::endl;
} else {
std::cout << "closure at rule stop " << config << std::endl;
}
#endif
if (config->context == nullptr || config->context->hasEmptyPath()) {
if (config->context == nullptr || config->context->isEmpty()) {
configs->add(config);
return true;
} else {
configs->add(std::make_shared<LexerATNConfig>(*config, config->state, PredictionContext::EMPTY));
currentAltReachedAcceptState = true;
}
}
if (config->context != nullptr && !config->context->isEmpty()) {
for (size_t i = 0; i < config->context->size(); i++) {
if (config->context->getReturnState(i) != PredictionContext::EMPTY_RETURN_STATE) {
Ref<const PredictionContext> newContext = config->context->getParent(i); // "pop" return state
ATNState *returnState = atn.states[config->context->getReturnState(i)];
Ref<LexerATNConfig> c = std::make_shared<LexerATNConfig>(*config, returnState, newContext);
currentAltReachedAcceptState = closure(input, c, configs, currentAltReachedAcceptState, speculative, treatEofAsEpsilon);
}
}
}
return currentAltReachedAcceptState;
}
// optimization
if (!config->state->epsilonOnlyTransitions) {
if (!currentAltReachedAcceptState || !config->hasPassedThroughNonGreedyDecision()) {
configs->add(config);
}
}
ATNState *p = config->state;
for (size_t i = 0; i < p->transitions.size(); i++) {
const Transition *t = p->transitions[i].get();
Ref<LexerATNConfig> c = getEpsilonTarget(input, config, t, configs, speculative, treatEofAsEpsilon);
if (c != nullptr) {
currentAltReachedAcceptState = closure(input, c, configs, currentAltReachedAcceptState, speculative, treatEofAsEpsilon);
}
}
return currentAltReachedAcceptState;
}
Ref<LexerATNConfig> LexerATNSimulator::getEpsilonTarget(CharStream *input, const Ref<LexerATNConfig> &config, const Transition *t,
ATNConfigSet *configs, bool speculative, bool treatEofAsEpsilon) {
Ref<LexerATNConfig> c = nullptr;
switch (t->getTransitionType()) {
case TransitionType::RULE: {
const RuleTransition *ruleTransition = static_cast<const RuleTransition*>(t);
Ref<const PredictionContext> newContext = SingletonPredictionContext::create(config->context, ruleTransition->followState->stateNumber);
c = std::make_shared<LexerATNConfig>(*config, t->target, newContext);
break;
}
case TransitionType::PRECEDENCE:
throw UnsupportedOperationException("Precedence predicates are not supported in lexers.");
case TransitionType::PREDICATE: {
/* Track traversing semantic predicates. If we traverse,
we cannot add a DFA state for this "reach" computation
because the DFA would not test the predicate again in the
future. Rather than creating collections of semantic predicates
like v3 and testing them on prediction, v4 will test them on the
fly all the time using the ATN not the DFA. This is slower but
semantically it's not used that often. One of the key elements to
this predicate mechanism is not adding DFA states that see
predicates immediately afterwards in the ATN. For example,
a : ID {p1}? | ID {p2}? ;
should create the start state for rule 'a' (to save start state
competition), but should not create target of ID state. The
collection of ATN states the following ID references includes
states reached by traversing predicates. Since this is when we
test them, we cannot cash the DFA state target of ID.
*/
const PredicateTransition *pt = static_cast<const PredicateTransition*>(t);
#if LEXER_DEBUG_ATN == 1
std::cout << "EVAL rule " << pt->getRuleIndex() << ":" << pt->getPredIndex() << std::endl;
#endif
configs->hasSemanticContext = true;
if (evaluatePredicate(input, pt->getRuleIndex(), pt->getPredIndex(), speculative)) {
c = std::make_shared<LexerATNConfig>(*config, t->target);
}
break;
}
case TransitionType::ACTION:
if (config->context == nullptr|| config->context->hasEmptyPath()) {
// execute actions anywhere in the start rule for a token.
//
// TODO: if the entry rule is invoked recursively, some
// actions may be executed during the recursive call. The
// problem can appear when hasEmptyPath() is true but
// isEmpty() is false. In this case, the config needs to be
// split into two contexts - one with just the empty path
// and another with everything but the empty path.
// Unfortunately, the current algorithm does not allow
// getEpsilonTarget to return two configurations, so
// additional modifications are needed before we can support
// the split operation.
auto lexerActionExecutor = LexerActionExecutor::append(config->getLexerActionExecutor(),
atn.lexerActions[static_cast<const ActionTransition *>(t)->actionIndex]);
c = std::make_shared<LexerATNConfig>(*config, t->target, std::move(lexerActionExecutor));
break;
}
else {
// ignore actions in referenced rules
c = std::make_shared<LexerATNConfig>(*config, t->target);
break;
}
case TransitionType::EPSILON:
c = std::make_shared<LexerATNConfig>(*config, t->target);
break;
case TransitionType::ATOM:
case TransitionType::RANGE:
case TransitionType::SET:
if (treatEofAsEpsilon) {
if (t->matches(Token::EOF, Lexer::MIN_CHAR_VALUE, Lexer::MAX_CHAR_VALUE)) {
c = std::make_shared<LexerATNConfig>(*config, t->target);
break;
}
}
break;
default: // To silence the compiler. Other transition types are not used here.
break;
}
return c;
}
bool LexerATNSimulator::evaluatePredicate(CharStream *input, size_t ruleIndex, size_t predIndex, bool speculative) {
// assume true if no recognizer was provided
if (_recog == nullptr) {
return true;
}
if (!speculative) {
return _recog->sempred(nullptr, ruleIndex, predIndex);
}
size_t savedCharPositionInLine = _charPositionInLine;
size_t savedLine = _line;
size_t index = input->index();
ssize_t marker = input->mark();
auto onExit = finally([this, input, savedCharPositionInLine, savedLine, index, marker] {
_charPositionInLine = savedCharPositionInLine;
_line = savedLine;
input->seek(index);
input->release(marker);
});
consume(input);
return _recog->sempred(nullptr, ruleIndex, predIndex);
}
void LexerATNSimulator::captureSimState(CharStream *input, dfa::DFAState *dfaState) {
_prevAccept.index = input->index();
_prevAccept.line = _line;
_prevAccept.charPos = _charPositionInLine;
_prevAccept.dfaState = dfaState;
}
dfa::DFAState *LexerATNSimulator::addDFAEdge(dfa::DFAState *from, size_t t, ATNConfigSet *q) {
/* leading to this call, ATNConfigSet.hasSemanticContext is used as a
* marker indicating dynamic predicate evaluation makes this edge
* dependent on the specific input sequence, so the static edge in the
* DFA should be omitted. The target DFAState is still created since
* execATN has the ability to resynchronize with the DFA state cache
* following the predicate evaluation step.
*
* TJP notes: next time through the DFA, we see a pred again and eval.
* If that gets us to a previously created (but dangling) DFA
* state, we can continue in pure DFA mode from there.
*/
bool suppressEdge = q->hasSemanticContext;
q->hasSemanticContext = false;
dfa::DFAState *to = addDFAState(q);
if (suppressEdge) {
return to;
}
addDFAEdge(from, t, to);
return to;
}
void LexerATNSimulator::addDFAEdge(dfa::DFAState *p, size_t t, dfa::DFAState *q) {
if (/*t < MIN_DFA_EDGE ||*/ t > MAX_DFA_EDGE) { // MIN_DFA_EDGE is 0
// Only track edges within the DFA bounds
return;
}
UniqueLock<SharedMutex> edgeLock(atn._edgeMutex);
p->edges[t - MIN_DFA_EDGE] = q; // connect
}
dfa::DFAState *LexerATNSimulator::addDFAState(ATNConfigSet *configs) {
return addDFAState(configs, true);
}
dfa::DFAState *LexerATNSimulator::addDFAState(ATNConfigSet *configs, bool suppressEdge) {
/* the lexer evaluates predicates on-the-fly; by this point configs
* should not contain any configurations with unevaluated predicates.
*/
assert(!configs->hasSemanticContext);
dfa::DFAState *proposed = new dfa::DFAState(std::unique_ptr<ATNConfigSet>(configs)); /* mem-check: managed by the DFA or deleted below */
Ref<ATNConfig> firstConfigWithRuleStopState = nullptr;
for (const auto &c : configs->configs) {
if (RuleStopState::is(c->state)) {
firstConfigWithRuleStopState = c;
break;
}
}
if (firstConfigWithRuleStopState != nullptr) {
proposed->isAcceptState = true;
proposed->lexerActionExecutor = downCast<const LexerATNConfig&>(*firstConfigWithRuleStopState).getLexerActionExecutor();
proposed->prediction = atn.ruleToTokenType[firstConfigWithRuleStopState->state->ruleIndex];
}
dfa::DFA &dfa = _decisionToDFA[_mode];
{
UniqueLock<SharedMutex> stateLock(atn._stateMutex);
auto [existing, inserted] = dfa.states.insert(proposed);
if (!inserted) {
delete proposed;
proposed = *existing;
} else {
// Previously we did a lookup, then set fields, then inserted. It was `dfa.states.size()`,
// since we already inserted we need to subtract one.
proposed->stateNumber = static_cast<int>(dfa.states.size() - 1);
proposed->configs->setReadonly(true);
}
if (!suppressEdge) {
dfa.s0 = proposed;
}
}
return proposed;
}
dfa::DFA& LexerATNSimulator::getDFA(size_t mode) {
return _decisionToDFA[mode];
}
std::string LexerATNSimulator::getText(CharStream *input) {
// index is first lookahead char, don't include.
return input->getText(misc::Interval(_startIndex, input->index() - 1));
}
size_t LexerATNSimulator::getLine() const {
return _line;
}
void LexerATNSimulator::setLine(size_t line) {
_line = line;
}
size_t LexerATNSimulator::getCharPositionInLine() {
return _charPositionInLine;
}
void LexerATNSimulator::setCharPositionInLine(size_t charPositionInLine) {
_charPositionInLine = charPositionInLine;
}
void LexerATNSimulator::consume(CharStream *input) {
size_t curChar = input->LA(1);
if (curChar == '\n') {
_line++;
_charPositionInLine = 0;
} else {
_charPositionInLine++;
}
input->consume();
}
std::string LexerATNSimulator::getTokenName(size_t t) {
if (t == Token::EOF) {
return "EOF";
}
return std::string("'") + static_cast<char>(t) + std::string("'");
}
void LexerATNSimulator::InitializeInstanceFields() {
_startIndex = 0;
_line = 1;
_charPositionInLine = 0;
_mode = antlr4::Lexer::DEFAULT_MODE;
}

View File

@@ -0,0 +1,199 @@
/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
* Use of this file is governed by the BSD 3-clause license that
* can be found in the LICENSE.txt file in the project root.
*/
#pragma once
#include <atomic>
#include "atn/ATNSimulator.h"
#include "atn/LexerATNConfig.h"
#include "atn/ATNConfigSet.h"
namespace antlr4 {
namespace atn {
/// "dup" of ParserInterpreter
class ANTLR4CPP_PUBLIC LexerATNSimulator : public ATNSimulator {
protected:
struct ANTLR4CPP_PUBLIC SimState final {
size_t index = INVALID_INDEX;
size_t line = 0;
size_t charPos = INVALID_INDEX;
dfa::DFAState *dfaState = nullptr;
void reset();
};
public:
static constexpr size_t MIN_DFA_EDGE = 0;
static constexpr size_t MAX_DFA_EDGE = 127; // forces unicode to stay in ATN
protected:
/// <summary>
/// When we hit an accept state in either the DFA or the ATN, we
/// have to notify the character stream to start buffering characters
/// via <seealso cref="IntStream#mark"/> and record the current state. The current sim state
/// includes the current index into the input, the current line,
/// and current character position in that line. Note that the Lexer is
/// tracking the starting line and characterization of the token. These
/// variables track the "state" of the simulator when it hits an accept state.
/// <p/>
/// We track these variables separately for the DFA and ATN simulation
/// because the DFA simulation often has to fail over to the ATN
/// simulation. If the ATN simulation fails, we need the DFA to fall
/// back to its previously accepted state, if any. If the ATN succeeds,
/// then the ATN does the accept and the DFA simulator that invoked it
/// can simply return the predicted token type.
/// </summary>
Lexer *const _recog;
/// The current token's starting index into the character stream.
/// Shared across DFA to ATN simulation in case the ATN fails and the
/// DFA did not have a previous accept state. In this case, we use the
/// ATN-generated exception object.
size_t _startIndex;
/// line number 1..n within the input.
size_t _line;
/// The index of the character relative to the beginning of the line 0..n-1.
size_t _charPositionInLine;
public:
std::vector<dfa::DFA> &_decisionToDFA;
protected:
size_t _mode;
/// Used during DFA/ATN exec to record the most recent accept configuration info.
SimState _prevAccept;
public:
LexerATNSimulator(const ATN &atn, std::vector<dfa::DFA> &decisionToDFA, PredictionContextCache &sharedContextCache);
LexerATNSimulator(Lexer *recog, const ATN &atn, std::vector<dfa::DFA> &decisionToDFA, PredictionContextCache &sharedContextCache);
virtual ~LexerATNSimulator() = default;
virtual void copyState(LexerATNSimulator *simulator);
virtual size_t match(CharStream *input, size_t mode);
virtual void reset() override;
virtual void clearDFA() override;
protected:
virtual size_t matchATN(CharStream *input);
virtual size_t execATN(CharStream *input, dfa::DFAState *ds0);
/// <summary>
/// Get an existing target state for an edge in the DFA. If the target state
/// for the edge has not yet been computed or is otherwise not available,
/// this method returns {@code null}.
/// </summary>
/// <param name="s"> The current DFA state </param>
/// <param name="t"> The next input symbol </param>
/// <returns> The existing target DFA state for the given input symbol
/// {@code t}, or {@code null} if the target state for this edge is not
/// already cached </returns>
virtual dfa::DFAState *getExistingTargetState(dfa::DFAState *s, size_t t);
/// <summary>
/// Compute a target state for an edge in the DFA, and attempt to add the
/// computed state and corresponding edge to the DFA.
/// </summary>
/// <param name="input"> The input stream </param>
/// <param name="s"> The current DFA state </param>
/// <param name="t"> The next input symbol
/// </param>
/// <returns> The computed target DFA state for the given input symbol
/// {@code t}. If {@code t} does not lead to a valid DFA state, this method
/// returns <seealso cref="#ERROR"/>. </returns>
virtual dfa::DFAState *computeTargetState(CharStream *input, dfa::DFAState *s, size_t t);
virtual size_t failOrAccept(CharStream *input, ATNConfigSet *reach, size_t t);
/// <summary>
/// Given a starting configuration set, figure out all ATN configurations
/// we can reach upon input {@code t}. Parameter {@code reach} is a return
/// parameter.
/// </summary>
void getReachableConfigSet(CharStream *input, ATNConfigSet *closure_, // closure_ as we have a closure() already
ATNConfigSet *reach, size_t t);
virtual void accept(CharStream *input, const Ref<const LexerActionExecutor> &lexerActionExecutor, size_t startIndex, size_t index,
size_t line, size_t charPos);
virtual ATNState *getReachableTarget(const Transition *trans, size_t t);
virtual std::unique_ptr<ATNConfigSet> computeStartState(CharStream *input, ATNState *p);
/// <summary>
/// Since the alternatives within any lexer decision are ordered by
/// preference, this method stops pursuing the closure as soon as an accept
/// state is reached. After the first accept state is reached by depth-first
/// search from {@code config}, all other (potentially reachable) states for
/// this rule would have a lower priority.
/// </summary>
/// <returns> {@code true} if an accept state is reached, otherwise
/// {@code false}. </returns>
virtual bool closure(CharStream *input, const Ref<LexerATNConfig> &config, ATNConfigSet *configs,
bool currentAltReachedAcceptState, bool speculative, bool treatEofAsEpsilon);
// side-effect: can alter configs.hasSemanticContext
virtual Ref<LexerATNConfig> getEpsilonTarget(CharStream *input, const Ref<LexerATNConfig> &config, const Transition *t,
ATNConfigSet *configs, bool speculative, bool treatEofAsEpsilon);
/// <summary>
/// Evaluate a predicate specified in the lexer.
/// <p/>
/// If {@code speculative} is {@code true}, this method was called before
/// <seealso cref="#consume"/> for the matched character. This method should call
/// <seealso cref="#consume"/> before evaluating the predicate to ensure position
/// sensitive values, including <seealso cref="Lexer#getText"/>, <seealso cref="Lexer#getLine"/>,
/// and <seealso cref="Lexer#getCharPositionInLine"/>, properly reflect the current
/// lexer state. This method should restore {@code input} and the simulator
/// to the original state before returning (i.e. undo the actions made by the
/// call to <seealso cref="#consume"/>.
/// </summary>
/// <param name="input"> The input stream. </param>
/// <param name="ruleIndex"> The rule containing the predicate. </param>
/// <param name="predIndex"> The index of the predicate within the rule. </param>
/// <param name="speculative"> {@code true} if the current index in {@code input} is
/// one character before the predicate's location.
/// </param>
/// <returns> {@code true} if the specified predicate evaluates to
/// {@code true}. </returns>
virtual bool evaluatePredicate(CharStream *input, size_t ruleIndex, size_t predIndex, bool speculative);
virtual void captureSimState(CharStream *input, dfa::DFAState *dfaState);
virtual dfa::DFAState* addDFAEdge(dfa::DFAState *from, size_t t, ATNConfigSet *q);
virtual void addDFAEdge(dfa::DFAState *p, size_t t, dfa::DFAState *q);
/// <summary>
/// Add a new DFA state if there isn't one with this set of
/// configurations already. This method also detects the first
/// configuration containing an ATN rule stop state. Later, when
/// traversing the DFA, we will know which rule to accept.
/// </summary>
virtual dfa::DFAState *addDFAState(ATNConfigSet *configs);
virtual dfa::DFAState *addDFAState(ATNConfigSet *configs, bool suppressEdge);
public:
dfa::DFA& getDFA(size_t mode);
/// Get the text matched so far for the current token.
virtual std::string getText(CharStream *input);
virtual size_t getLine() const;
virtual void setLine(size_t line);
virtual size_t getCharPositionInLine();
virtual void setCharPositionInLine(size_t charPositionInLine);
virtual void consume(CharStream *input);
virtual std::string getTokenName(size_t t);
private:
void InitializeInstanceFields();
};
} // namespace atn
} // namespace antlr4

View File

@@ -0,0 +1,15 @@
#include "LexerAction.h"
using namespace antlr4::atn;
size_t LexerAction::hashCode() const {
auto hash = cachedHashCode();
if (hash == 0) {
hash = hashCodeImpl();
if (hash == 0) {
hash = std::numeric_limits<size_t>::max();
}
_hashCode.store(hash, std::memory_order_relaxed);
}
return hash;
}

View File

@@ -0,0 +1,100 @@
/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
* Use of this file is governed by the BSD 3-clause license that
* can be found in the LICENSE.txt file in the project root.
*/
#pragma once
#include "atn/LexerActionType.h"
#include "antlr4-common.h"
namespace antlr4 {
namespace atn {
/// <summary>
/// Represents a single action which can be executed following the successful
/// match of a lexer rule. Lexer actions are used for both embedded action syntax
/// and ANTLR 4's new lexer command syntax.
///
/// @author Sam Harwell
/// @since 4.2
/// </summary>
class ANTLR4CPP_PUBLIC LexerAction {
public:
virtual ~LexerAction() = default;
/// <summary>
/// Gets the serialization type of the lexer action.
/// </summary>
/// <returns> The serialization type of the lexer action. </returns>
///
/// IMPORTANT: Unlike Java, this returns LexerActionType::INDEXED_CUSTOM for instances of
/// LexerIndexedCustomAction. If you need the wrapped action type, use
/// LexerIndexedCustomAction::getAction()->getActionType().
LexerActionType getActionType() const { return _actionType; }
/// <summary>
/// Gets whether the lexer action is position-dependent. Position-dependent
/// actions may have different semantics depending on the <seealso cref="CharStream"/>
/// index at the time the action is executed.
///
/// <para>Many lexer commands, including {@code type}, {@code skip}, and
/// {@code more}, do not check the input index during their execution.
/// Actions like this are position-independent, and may be stored more
/// efficiently as part of the <seealso cref="LexerATNConfig#lexerActionExecutor"/>.</para>
/// </summary>
/// <returns> {@code true} if the lexer action semantics can be affected by the
/// position of the input <seealso cref="CharStream"/> at the time it is executed;
/// otherwise, {@code false}. </returns>
bool isPositionDependent() const { return _positionDependent; }
/// <summary>
/// Execute the lexer action in the context of the specified <seealso cref="Lexer"/>.
///
/// <para>For position-dependent actions, the input stream must already be
/// positioned correctly prior to calling this method.</para>
/// </summary>
/// <param name="lexer"> The lexer instance. </param>
virtual void execute(Lexer *lexer) const = 0;
size_t hashCode() const;
virtual bool equals(const LexerAction &other) const = 0;
virtual std::string toString() const = 0;
protected:
LexerAction(LexerActionType actionType, bool positionDependent)
: _actionType(actionType), _hashCode(0), _positionDependent(positionDependent) {}
virtual size_t hashCodeImpl() const = 0;
size_t cachedHashCode() const { return _hashCode.load(std::memory_order_relaxed); }
private:
const LexerActionType _actionType;
mutable std::atomic<size_t> _hashCode;
const bool _positionDependent;
};
inline bool operator==(const LexerAction &lhs, const LexerAction &rhs) {
return lhs.equals(rhs);
}
inline bool operator!=(const LexerAction &lhs, const LexerAction &rhs) {
return !operator==(lhs, rhs);
}
} // namespace atn
} // namespace antlr4
namespace std {
template <>
struct hash<::antlr4::atn::LexerAction> {
size_t operator()(const ::antlr4::atn::LexerAction &lexerAction) const {
return lexerAction.hashCode();
}
};
} // namespace std

View File

@@ -0,0 +1,108 @@
/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
* Use of this file is governed by the BSD 3-clause license that
* can be found in the LICENSE.txt file in the project root.
*/
#include "misc/MurmurHash.h"
#include "atn/LexerIndexedCustomAction.h"
#include "atn/HashUtils.h"
#include "support/CPPUtils.h"
#include "support/Arrays.h"
#include "support/Casts.h"
#include "atn/LexerActionExecutor.h"
using namespace antlr4;
using namespace antlr4::atn;
using namespace antlr4::misc;
using namespace antlrcpp;
namespace {
bool lexerActionEqual(const Ref<const LexerAction> &lhs, const Ref<const LexerAction> &rhs) {
return *lhs == *rhs;
}
}
LexerActionExecutor::LexerActionExecutor(std::vector<Ref<const LexerAction>> lexerActions)
: _lexerActions(std::move(lexerActions)), _hashCode(0) {}
Ref<const LexerActionExecutor> LexerActionExecutor::append(const Ref<const LexerActionExecutor> &lexerActionExecutor,
Ref<const LexerAction> lexerAction) {
if (lexerActionExecutor == nullptr) {
return std::make_shared<LexerActionExecutor>(std::vector<Ref<const LexerAction>>{ std::move(lexerAction) });
}
std::vector<Ref<const LexerAction>> lexerActions;
lexerActions.reserve(lexerActionExecutor->_lexerActions.size() + 1);
lexerActions.insert(lexerActions.begin(), lexerActionExecutor->_lexerActions.begin(), lexerActionExecutor->_lexerActions.end());
lexerActions.push_back(std::move(lexerAction));
return std::make_shared<LexerActionExecutor>(std::move(lexerActions));
}
Ref<const LexerActionExecutor> LexerActionExecutor::fixOffsetBeforeMatch(int offset) const {
std::vector<Ref<const LexerAction>> updatedLexerActions;
for (size_t i = 0; i < _lexerActions.size(); i++) {
if (_lexerActions[i]->isPositionDependent() && !LexerIndexedCustomAction::is(*_lexerActions[i])) {
if (updatedLexerActions.empty()) {
updatedLexerActions = _lexerActions; // Make a copy.
}
updatedLexerActions[i] = std::make_shared<LexerIndexedCustomAction>(offset, _lexerActions[i]);
}
}
if (updatedLexerActions.empty()) {
return shared_from_this();
}
return std::make_shared<LexerActionExecutor>(std::move(updatedLexerActions));
}
const std::vector<Ref<const LexerAction>>& LexerActionExecutor::getLexerActions() const {
return _lexerActions;
}
void LexerActionExecutor::execute(Lexer *lexer, CharStream *input, size_t startIndex) const {
bool requiresSeek = false;
size_t stopIndex = input->index();
auto onExit = finally([requiresSeek, input, stopIndex]() {
if (requiresSeek) {
input->seek(stopIndex);
}
});
for (const auto &lexerAction : _lexerActions) {
if (LexerIndexedCustomAction::is(*lexerAction)) {
int offset = downCast<const LexerIndexedCustomAction&>(*lexerAction).getOffset();
input->seek(startIndex + offset);
requiresSeek = (startIndex + offset) != stopIndex;
} else if (lexerAction->isPositionDependent()) {
input->seek(stopIndex);
requiresSeek = false;
}
lexerAction->execute(lexer);
}
}
size_t LexerActionExecutor::hashCode() const {
auto hash = _hashCode.load(std::memory_order_relaxed);
if (hash == 0) {
hash = MurmurHash::initialize();
for (const auto &lexerAction : _lexerActions) {
hash = MurmurHash::update(hash, lexerAction);
}
hash = MurmurHash::finish(hash, _lexerActions.size());
if (hash == 0) {
hash = std::numeric_limits<size_t>::max();
}
_hashCode.store(hash, std::memory_order_relaxed);
}
return hash;
}
bool LexerActionExecutor::equals(const LexerActionExecutor &other) const {
if (this == std::addressof(other)) {
return true;
}
return cachedHashCodeEqual(_hashCode.load(std::memory_order_relaxed), other._hashCode.load(std::memory_order_relaxed)) &&
_lexerActions.size() == other._lexerActions.size() &&
std::equal(_lexerActions.begin(), _lexerActions.end(), other._lexerActions.begin(), lexerActionEqual);
}

View File

@@ -0,0 +1,128 @@
/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
* Use of this file is governed by the BSD 3-clause license that
* can be found in the LICENSE.txt file in the project root.
*/
#pragma once
#include "CharStream.h"
#include "atn/LexerAction.h"
namespace antlr4 {
namespace atn {
/// Represents an executor for a sequence of lexer actions which traversed during
/// the matching operation of a lexer rule (token).
///
/// <para>The executor tracks position information for position-dependent lexer actions
/// efficiently, ensuring that actions appearing only at the end of the rule do
/// not cause bloating of the <seealso cref="DFA"/> created for the lexer.</para>
class ANTLR4CPP_PUBLIC LexerActionExecutor final : public std::enable_shared_from_this<LexerActionExecutor> {
public:
/// <summary>
/// Constructs an executor for a sequence of <seealso cref="LexerAction"/> actions. </summary>
/// <param name="lexerActions"> The lexer actions to execute. </param>
explicit LexerActionExecutor(std::vector<Ref<const LexerAction>> lexerActions);
/// <summary>
/// Creates a <seealso cref="LexerActionExecutor"/> which executes the actions for
/// the input {@code lexerActionExecutor} followed by a specified
/// {@code lexerAction}.
/// </summary>
/// <param name="lexerActionExecutor"> The executor for actions already traversed by
/// the lexer while matching a token within a particular
/// <seealso cref="LexerATNConfig"/>. If this is {@code null}, the method behaves as
/// though it were an empty executor. </param>
/// <param name="lexerAction"> The lexer action to execute after the actions
/// specified in {@code lexerActionExecutor}.
/// </param>
/// <returns> A <seealso cref="LexerActionExecutor"/> for executing the combine actions
/// of {@code lexerActionExecutor} and {@code lexerAction}. </returns>
static Ref<const LexerActionExecutor> append(const Ref<const LexerActionExecutor> &lexerActionExecutor,
Ref<const LexerAction> lexerAction);
/// <summary>
/// Creates a <seealso cref="LexerActionExecutor"/> which encodes the current offset
/// for position-dependent lexer actions.
///
/// <para>Normally, when the executor encounters lexer actions where
/// <seealso cref="LexerAction#isPositionDependent"/> returns {@code true}, it calls
/// <seealso cref="IntStream#seek"/> on the input <seealso cref="CharStream"/> to set the input
/// position to the <em>end</em> of the current token. This behavior provides
/// for efficient DFA representation of lexer actions which appear at the end
/// of a lexer rule, even when the lexer rule matches a variable number of
/// characters.</para>
///
/// <para>Prior to traversing a match transition in the ATN, the current offset
/// from the token start index is assigned to all position-dependent lexer
/// actions which have not already been assigned a fixed offset. By storing
/// the offsets relative to the token start index, the DFA representation of
/// lexer actions which appear in the middle of tokens remains efficient due
/// to sharing among tokens of the same length, regardless of their absolute
/// position in the input stream.</para>
///
/// <para>If the current executor already has offsets assigned to all
/// position-dependent lexer actions, the method returns {@code this}.</para>
/// </summary>
/// <param name="offset"> The current offset to assign to all position-dependent
/// lexer actions which do not already have offsets assigned.
/// </param>
/// <returns> A <seealso cref="LexerActionExecutor"/> which stores input stream offsets
/// for all position-dependent lexer actions. </returns>
Ref<const LexerActionExecutor> fixOffsetBeforeMatch(int offset) const;
/// <summary>
/// Gets the lexer actions to be executed by this executor. </summary>
/// <returns> The lexer actions to be executed by this executor. </returns>
const std::vector<Ref<const LexerAction>>& getLexerActions() const;
/// <summary>
/// Execute the actions encapsulated by this executor within the context of a
/// particular <seealso cref="Lexer"/>.
///
/// <para>This method calls <seealso cref="IntStream#seek"/> to set the position of the
/// {@code input} <seealso cref="CharStream"/> prior to calling
/// <seealso cref="LexerAction#execute"/> on a position-dependent action. Before the
/// method returns, the input position will be restored to the same position
/// it was in when the method was invoked.</para>
/// </summary>
/// <param name="lexer"> The lexer instance. </param>
/// <param name="input"> The input stream which is the source for the current token.
/// When this method is called, the current <seealso cref="IntStream#index"/> for
/// {@code input} should be the start of the following token, i.e. 1
/// character past the end of the current token. </param>
/// <param name="startIndex"> The token start index. This value may be passed to
/// <seealso cref="IntStream#seek"/> to set the {@code input} position to the beginning
/// of the token. </param>
void execute(Lexer *lexer, CharStream *input, size_t startIndex) const;
size_t hashCode() const;
bool equals(const LexerActionExecutor &other) const;
private:
const std::vector<Ref<const LexerAction>> _lexerActions;
mutable std::atomic<size_t> _hashCode;
};
inline bool operator==(const LexerActionExecutor &lhs, const LexerActionExecutor &rhs) {
return lhs.equals(rhs);
}
inline bool operator!=(const LexerActionExecutor &lhs, const LexerActionExecutor &rhs) {
return !operator==(lhs, rhs);
}
} // namespace atn
} // namespace antlr4
namespace std {
template <>
struct hash<::antlr4::atn::LexerActionExecutor> {
size_t operator()(const ::antlr4::atn::LexerActionExecutor &lexerActionExecutor) const {
return lexerActionExecutor.hashCode();
}
};
} // namespace std

View File

@@ -0,0 +1,57 @@
/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
* Use of this file is governed by the BSD 3-clause license that
* can be found in the LICENSE.txt file in the project root.
*/
#pragma once
#include "antlr4-common.h"
namespace antlr4 {
namespace atn {
/// <summary>
/// Represents the serialization type of a <seealso cref="LexerAction"/>.
///
/// @author Sam Harwell
/// @since 4.2
/// </summary>
enum class LexerActionType : size_t {
/// <summary>
/// The type of a <seealso cref="LexerChannelAction"/> action.
/// </summary>
CHANNEL = 0,
/// <summary>
/// The type of a <seealso cref="LexerCustomAction"/> action.
/// </summary>
CUSTOM,
/// <summary>
/// The type of a <seealso cref="LexerModeAction"/> action.
/// </summary>
MODE,
/// <summary>
/// The type of a <seealso cref="LexerMoreAction"/> action.
/// </summary>
MORE,
/// <summary>
/// The type of a <seealso cref="LexerPopModeAction"/> action.
/// </summary>
POP_MODE,
/// <summary>
/// The type of a <seealso cref="LexerPushModeAction"/> action.
/// </summary>
PUSH_MODE,
/// <summary>
/// The type of a <seealso cref="LexerSkipAction"/> action.
/// </summary>
SKIP,
/// <summary>
/// The type of a <seealso cref="LexerTypeAction"/> action.
/// </summary>
TYPE,
INDEXED_CUSTOM,
};
} // namespace atn
} // namespace antlr4

View File

@@ -0,0 +1,43 @@
/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
* Use of this file is governed by the BSD 3-clause license that
* can be found in the LICENSE.txt file in the project root.
*/
#include "misc/MurmurHash.h"
#include "Lexer.h"
#include "support/Casts.h"
#include "atn/LexerChannelAction.h"
using namespace antlr4::atn;
using namespace antlr4::misc;
using namespace antlrcpp;
LexerChannelAction::LexerChannelAction(int channel)
: LexerAction(LexerActionType::CHANNEL, false), _channel(channel) {}
void LexerChannelAction::execute(Lexer *lexer) const {
lexer->setChannel(getChannel());
}
size_t LexerChannelAction::hashCodeImpl() const {
size_t hash = MurmurHash::initialize();
hash = MurmurHash::update(hash, static_cast<size_t>(getActionType()));
hash = MurmurHash::update(hash, getChannel());
return MurmurHash::finish(hash, 2);
}
bool LexerChannelAction::equals(const LexerAction &other) const {
if (this == std::addressof(other)) {
return true;
}
if (getActionType() != other.getActionType()) {
return false;
}
const auto &lexerAction = downCast<const LexerChannelAction&>(other);
return getChannel() == lexerAction.getChannel();
}
std::string LexerChannelAction::toString() const {
return "channel(" + std::to_string(getChannel()) + ")";
}

View File

@@ -0,0 +1,59 @@
/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
* Use of this file is governed by the BSD 3-clause license that
* can be found in the LICENSE.txt file in the project root.
*/
#pragma once
#include "atn/LexerAction.h"
#include "atn/LexerActionType.h"
namespace antlr4 {
namespace atn {
using antlr4::Lexer;
/// <summary>
/// Implements the {@code channel} lexer action by calling
/// <seealso cref="Lexer#setChannel"/> with the assigned channel.
///
/// @author Sam Harwell
/// @since 4.2
/// </summary>
class ANTLR4CPP_PUBLIC LexerChannelAction final : public LexerAction {
public:
static bool is(const LexerAction &lexerAction) { return lexerAction.getActionType() == LexerActionType::CHANNEL; }
static bool is(const LexerAction *lexerAction) { return lexerAction != nullptr && is(*lexerAction); }
/// <summary>
/// Constructs a new {@code channel} action with the specified channel value. </summary>
/// <param name="channel"> The channel value to pass to <seealso cref="Lexer#setChannel"/>. </param>
explicit LexerChannelAction(int channel);
/// <summary>
/// Gets the channel to use for the <seealso cref="Token"/> created by the lexer.
/// </summary>
/// <returns> The channel to use for the <seealso cref="Token"/> created by the lexer. </returns>
int getChannel() const { return _channel; }
/// <summary>
/// {@inheritDoc}
///
/// <para>This action is implemented by calling <seealso cref="Lexer#setChannel"/> with the
/// value provided by <seealso cref="#getChannel"/>.</para>
/// </summary>
void execute(Lexer *lexer) const override;
bool equals(const LexerAction &other) const override;
std::string toString() const override;
protected:
size_t hashCodeImpl() const override;
private:
const int _channel;
};
} // namespace atn
} // namespace antlr4

View File

@@ -0,0 +1,45 @@
/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
* Use of this file is governed by the BSD 3-clause license that
* can be found in the LICENSE.txt file in the project root.
*/
#include "misc/MurmurHash.h"
#include "Lexer.h"
#include "support/Casts.h"
#include "atn/LexerCustomAction.h"
using namespace antlr4;
using namespace antlr4::atn;
using namespace antlr4::misc;
using namespace antlrcpp;
LexerCustomAction::LexerCustomAction(size_t ruleIndex, size_t actionIndex)
: LexerAction(LexerActionType::CUSTOM, true), _ruleIndex(ruleIndex), _actionIndex(actionIndex) {}
void LexerCustomAction::execute(Lexer *lexer) const {
lexer->action(nullptr, getRuleIndex(), getActionIndex());
}
size_t LexerCustomAction::hashCodeImpl() const {
size_t hash = MurmurHash::initialize();
hash = MurmurHash::update(hash, static_cast<size_t>(getActionType()));
hash = MurmurHash::update(hash, getRuleIndex());
hash = MurmurHash::update(hash, getActionIndex());
return MurmurHash::finish(hash, 3);
}
bool LexerCustomAction::equals(const LexerAction &other) const {
if (this == std::addressof(other)) {
return true;
}
if (getActionType() != other.getActionType()) {
return false;
}
const auto &lexerAction = downCast<const LexerCustomAction&>(other);
return getRuleIndex() == lexerAction.getRuleIndex() && getActionIndex() == lexerAction.getActionIndex();
}
std::string LexerCustomAction::toString() const {
return "custom(" + std::to_string(getRuleIndex()) + ", " + std::to_string(getActionIndex()) + ")";
}

View File

@@ -0,0 +1,75 @@
/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
* Use of this file is governed by the BSD 3-clause license that
* can be found in the LICENSE.txt file in the project root.
*/
#pragma once
#include "atn/LexerAction.h"
#include "atn/LexerActionType.h"
namespace antlr4 {
namespace atn {
/// <summary>
/// Executes a custom lexer action by calling <seealso cref="Recognizer#action"/> with the
/// rule and action indexes assigned to the custom action. The implementation of
/// a custom action is added to the generated code for the lexer in an override
/// of <seealso cref="Recognizer#action"/> when the grammar is compiled.
///
/// <para>This class may represent embedded actions created with the <code>{...}</code>
/// syntax in ANTLR 4, as well as actions created for lexer commands where the
/// command argument could not be evaluated when the grammar was compiled.</para>
///
/// @author Sam Harwell
/// @since 4.2
/// </summary>
class ANTLR4CPP_PUBLIC LexerCustomAction final : public LexerAction {
public:
static bool is(const LexerAction &lexerAction) { return lexerAction.getActionType() == LexerActionType::CUSTOM; }
static bool is(const LexerAction *lexerAction) { return lexerAction != nullptr && is(*lexerAction); }
/// <summary>
/// Constructs a custom lexer action with the specified rule and action
/// indexes.
/// </summary>
/// <param name="ruleIndex"> The rule index to use for calls to
/// <seealso cref="Recognizer#action"/>. </param>
/// <param name="actionIndex"> The action index to use for calls to
/// <seealso cref="Recognizer#action"/>. </param>
LexerCustomAction(size_t ruleIndex, size_t actionIndex);
/// <summary>
/// Gets the rule index to use for calls to <seealso cref="Recognizer#action"/>.
/// </summary>
/// <returns> The rule index for the custom action. </returns>
size_t getRuleIndex() const { return _ruleIndex; }
/// <summary>
/// Gets the action index to use for calls to <seealso cref="Recognizer#action"/>.
/// </summary>
/// <returns> The action index for the custom action. </returns>
size_t getActionIndex() const { return _actionIndex; }
/// <summary>
/// {@inheritDoc}
///
/// <para>Custom actions are implemented by calling <seealso cref="Lexer#action"/> with the
/// appropriate rule and action indexes.</para>
/// </summary>
void execute(Lexer *lexer) const override;
bool equals(const LexerAction &other) const override;
std::string toString() const override;
protected:
size_t hashCodeImpl() const override;
private:
const size_t _ruleIndex;
const size_t _actionIndex;
};
} // namespace atn
} // namespace antlr4

View File

@@ -0,0 +1,50 @@
/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
* Use of this file is governed by the BSD 3-clause license that
* can be found in the LICENSE.txt file in the project root.
*/
#include "atn/HashUtils.h"
#include "misc/MurmurHash.h"
#include "Lexer.h"
#include "support/CPPUtils.h"
#include "support/Casts.h"
#include "atn/LexerIndexedCustomAction.h"
using namespace antlr4;
using namespace antlr4::atn;
using namespace antlr4::misc;
using namespace antlrcpp;
LexerIndexedCustomAction::LexerIndexedCustomAction(int offset, Ref<const LexerAction> action)
: LexerAction(LexerActionType::INDEXED_CUSTOM, true), _action(std::move(action)), _offset(offset) {}
void LexerIndexedCustomAction::execute(Lexer *lexer) const {
// assume the input stream position was properly set by the calling code
getAction()->execute(lexer);
}
size_t LexerIndexedCustomAction::hashCodeImpl() const {
size_t hash = MurmurHash::initialize();
hash = MurmurHash::update(hash, static_cast<size_t>(getActionType()));
hash = MurmurHash::update(hash, getOffset());
hash = MurmurHash::update(hash, getAction());
return MurmurHash::finish(hash, 3);
}
bool LexerIndexedCustomAction::equals(const LexerAction &other) const {
if (this == std::addressof(other)) {
return true;
}
if (getActionType() != other.getActionType()) {
return false;
}
const auto &lexerAction = downCast<const LexerIndexedCustomAction&>(other);
return getOffset() == lexerAction.getOffset() &&
cachedHashCodeEqual(cachedHashCode(), lexerAction.cachedHashCode()) &&
*getAction() == *lexerAction.getAction();
}
std::string LexerIndexedCustomAction::toString() const {
return "indexedCustom(" + std::to_string(getOffset()) + ", " + getAction()->toString() + ")";
}

View File

@@ -0,0 +1,76 @@
/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
* Use of this file is governed by the BSD 3-clause license that
* can be found in the LICENSE.txt file in the project root.
*/
#pragma once
#include "RuleContext.h"
#include "atn/LexerAction.h"
namespace antlr4 {
namespace atn {
/// <summary>
/// This implementation of <seealso cref="LexerAction"/> is used for tracking input offsets
/// for position-dependent actions within a <seealso cref="LexerActionExecutor"/>.
///
/// <para>This action is not serialized as part of the ATN, and is only required for
/// position-dependent lexer actions which appear at a location other than the
/// end of a rule. For more information about DFA optimizations employed for
/// lexer actions, see <seealso cref="LexerActionExecutor#append"/> and
/// <seealso cref="LexerActionExecutor#fixOffsetBeforeMatch"/>.</para>
///
/// @author Sam Harwell
/// @since 4.2
/// </summary>
class ANTLR4CPP_PUBLIC LexerIndexedCustomAction final : public LexerAction {
public:
static bool is(const LexerAction &lexerAction) { return lexerAction.getActionType() == LexerActionType::INDEXED_CUSTOM; }
static bool is(const LexerAction *lexerAction) { return lexerAction != nullptr && is(*lexerAction); }
/// <summary>
/// Constructs a new indexed custom action by associating a character offset
/// with a <seealso cref="LexerAction"/>.
///
/// <para>Note: This class is only required for lexer actions for which
/// <seealso cref="LexerAction#isPositionDependent"/> returns {@code true}.</para>
/// </summary>
/// <param name="offset"> The offset into the input <seealso cref="CharStream"/>, relative to
/// the token start index, at which the specified lexer action should be
/// executed. </param>
/// <param name="action"> The lexer action to execute at a particular offset in the
/// input <seealso cref="CharStream"/>. </param>
LexerIndexedCustomAction(int offset, Ref<const LexerAction> action);
/// <summary>
/// Gets the location in the input <seealso cref="CharStream"/> at which the lexer
/// action should be executed. The value is interpreted as an offset relative
/// to the token start index.
/// </summary>
/// <returns> The location in the input <seealso cref="CharStream"/> at which the lexer
/// action should be executed. </returns>
int getOffset() const { return _offset; }
/// <summary>
/// Gets the lexer action to execute.
/// </summary>
/// <returns> A <seealso cref="LexerAction"/> object which executes the lexer action. </returns>
const Ref<const LexerAction>& getAction() const { return _action; }
void execute(Lexer *lexer) const override;
bool equals(const LexerAction &other) const override;
std::string toString() const override;
protected:
size_t hashCodeImpl() const override;
private:
const Ref<const LexerAction> _action;
const int _offset;
};
} // namespace atn
} // namespace antlr4

View File

@@ -0,0 +1,43 @@
/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
* Use of this file is governed by the BSD 3-clause license that
* can be found in the LICENSE.txt file in the project root.
*/
#include "misc/MurmurHash.h"
#include "Lexer.h"
#include "support/Casts.h"
#include "atn/LexerModeAction.h"
using namespace antlr4;
using namespace antlr4::atn;
using namespace antlr4::misc;
using namespace antlrcpp;
LexerModeAction::LexerModeAction(int mode) : LexerAction(LexerActionType::MODE, false), _mode(mode) {}
void LexerModeAction::execute(Lexer *lexer) const {
lexer->setMode(getMode());
}
size_t LexerModeAction::hashCodeImpl() const {
size_t hash = MurmurHash::initialize();
hash = MurmurHash::update(hash, static_cast<size_t>(getActionType()));
hash = MurmurHash::update(hash, getMode());
return MurmurHash::finish(hash, 2);
}
bool LexerModeAction::equals(const LexerAction &other) const {
if (this == std::addressof(other)) {
return true;
}
if (getActionType() != other.getActionType()) {
return false;
}
const auto &lexerAction = downCast<const LexerModeAction&>(other);
return getMode() == lexerAction.getMode();
}
std::string LexerModeAction::toString() const {
return "mode(" + std::to_string(getMode()) + ")";
}

View File

@@ -0,0 +1,57 @@
/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
* Use of this file is governed by the BSD 3-clause license that
* can be found in the LICENSE.txt file in the project root.
*/
#pragma once
#include "atn/LexerAction.h"
#include "atn/LexerActionType.h"
namespace antlr4 {
namespace atn {
/// <summary>
/// Implements the {@code mode} lexer action by calling <seealso cref="Lexer#mode"/> with
/// the assigned mode.
///
/// @author Sam Harwell
/// @since 4.2
/// </summary>
class ANTLR4CPP_PUBLIC LexerModeAction final : public LexerAction {
public:
static bool is(const LexerAction &lexerAction) { return lexerAction.getActionType() == LexerActionType::MODE; }
static bool is(const LexerAction *lexerAction) { return lexerAction != nullptr && is(*lexerAction); }
/// <summary>
/// Constructs a new {@code mode} action with the specified mode value. </summary>
/// <param name="mode"> The mode value to pass to <seealso cref="Lexer#mode"/>. </param>
explicit LexerModeAction(int mode);
/// <summary>
/// Get the lexer mode this action should transition the lexer to.
/// </summary>
/// <returns> The lexer mode for this {@code mode} command. </returns>
int getMode() const { return _mode; }
/// <summary>
/// {@inheritDoc}
///
/// <para>This action is implemented by calling <seealso cref="Lexer#mode"/> with the
/// value provided by <seealso cref="#getMode"/>.</para>
/// </summary>
void execute(Lexer *lexer) const override;
bool equals(const LexerAction &obj) const override;
std::string toString() const override;
protected:
size_t hashCodeImpl() const override;
private:
const int _mode;
};
} // namespace atn
} // namespace antlr4

View File

@@ -0,0 +1,36 @@
/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
* Use of this file is governed by the BSD 3-clause license that
* can be found in the LICENSE.txt file in the project root.
*/
#include "misc/MurmurHash.h"
#include "Lexer.h"
#include "atn/LexerMoreAction.h"
using namespace antlr4;
using namespace antlr4::atn;
using namespace antlr4::misc;
const Ref<const LexerMoreAction>& LexerMoreAction::getInstance() {
static const Ref<const LexerMoreAction> instance(new LexerMoreAction());
return instance;
}
void LexerMoreAction::execute(Lexer *lexer) const {
lexer->more();
}
size_t LexerMoreAction::hashCodeImpl() const {
size_t hash = MurmurHash::initialize();
hash = MurmurHash::update(hash, static_cast<size_t>(getActionType()));
return MurmurHash::finish(hash, 1);
}
bool LexerMoreAction::equals(const LexerAction &other) const {
return this == std::addressof(other);
}
std::string LexerMoreAction::toString() const {
return "more";
}

View File

@@ -0,0 +1,53 @@
/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
* Use of this file is governed by the BSD 3-clause license that
* can be found in the LICENSE.txt file in the project root.
*/
#pragma once
#include "atn/LexerAction.h"
#include "atn/LexerActionType.h"
namespace antlr4 {
namespace atn {
/// <summary>
/// Implements the {@code more} lexer action by calling <seealso cref="Lexer#more"/>.
///
/// <para>The {@code more} command does not have any parameters, so this action is
/// implemented as a singleton instance exposed by <seealso cref="#INSTANCE"/>.</para>
///
/// @author Sam Harwell
/// @since 4.2
/// </summary>
class ANTLR4CPP_PUBLIC LexerMoreAction final : public LexerAction {
public:
static bool is(const LexerAction &lexerAction) { return lexerAction.getActionType() == LexerActionType::MORE; }
static bool is(const LexerAction *lexerAction) { return lexerAction != nullptr && is(*lexerAction); }
/// <summary>
/// Provides a singleton instance of this parameterless lexer action.
/// </summary>
static const Ref<const LexerMoreAction>& getInstance();
/// <summary>
/// {@inheritDoc}
///
/// <para>This action is implemented by calling <seealso cref="Lexer#more"/>.</para>
/// </summary>
void execute(Lexer *lexer) const override;
bool equals(const LexerAction &obj) const override;
std::string toString() const override;
protected:
size_t hashCodeImpl() const override;
private:
/// Constructs the singleton instance of the lexer {@code more} command.
LexerMoreAction() : LexerAction(LexerActionType::MORE, false) {}
};
} // namespace atn
} // namespace antlr4

View File

@@ -0,0 +1,36 @@
/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
* Use of this file is governed by the BSD 3-clause license that
* can be found in the LICENSE.txt file in the project root.
*/
#include "misc/MurmurHash.h"
#include "Lexer.h"
#include "atn/LexerPopModeAction.h"
using namespace antlr4;
using namespace antlr4::atn;
using namespace antlr4::misc;
const Ref<const LexerPopModeAction>& LexerPopModeAction::getInstance() {
static const Ref<const LexerPopModeAction> instance(new LexerPopModeAction());
return instance;
}
void LexerPopModeAction::execute(Lexer *lexer) const {
lexer->popMode();
}
size_t LexerPopModeAction::hashCodeImpl() const {
size_t hash = MurmurHash::initialize();
hash = MurmurHash::update(hash, static_cast<size_t>(getActionType()));
return MurmurHash::finish(hash, 1);
}
bool LexerPopModeAction::equals(const LexerAction &other) const {
return this == std::addressof(other);
}
std::string LexerPopModeAction::toString() const {
return "popMode";
}

View File

@@ -0,0 +1,53 @@
/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
* Use of this file is governed by the BSD 3-clause license that
* can be found in the LICENSE.txt file in the project root.
*/
#pragma once
#include "atn/LexerAction.h"
#include "atn/LexerActionType.h"
namespace antlr4 {
namespace atn {
/// <summary>
/// Implements the {@code popMode} lexer action by calling <seealso cref="Lexer#popMode"/>.
///
/// <para>The {@code popMode} command does not have any parameters, so this action is
/// implemented as a singleton instance exposed by <seealso cref="#INSTANCE"/>.</para>
///
/// @author Sam Harwell
/// @since 4.2
/// </summary>
class ANTLR4CPP_PUBLIC LexerPopModeAction final : public LexerAction {
public:
static bool is(const LexerAction &lexerAction) { return lexerAction.getActionType() == LexerActionType::POP_MODE; }
static bool is(const LexerAction *lexerAction) { return lexerAction != nullptr && is(*lexerAction); }
/// <summary>
/// Provides a singleton instance of this parameterless lexer action.
/// </summary>
static const Ref<const LexerPopModeAction>& getInstance();
/// <summary>
/// {@inheritDoc}
///
/// <para>This action is implemented by calling <seealso cref="Lexer#popMode"/>.</para>
/// </summary>
void execute(Lexer *lexer) const override;
bool equals(const LexerAction &other) const override;
std::string toString() const override;
protected:
size_t hashCodeImpl() const override;
private:
/// Constructs the singleton instance of the lexer {@code popMode} command.
LexerPopModeAction() : LexerAction(LexerActionType::POP_MODE, false) {}
};
} // namespace atn
} // namespace antlr4

View File

@@ -0,0 +1,43 @@
/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
* Use of this file is governed by the BSD 3-clause license that
* can be found in the LICENSE.txt file in the project root.
*/
#include "misc/MurmurHash.h"
#include "Lexer.h"
#include "support/Casts.h"
#include "atn/LexerPushModeAction.h"
using namespace antlr4;
using namespace antlr4::atn;
using namespace antlr4::misc;
using namespace antlrcpp;
LexerPushModeAction::LexerPushModeAction(int mode) : LexerAction(LexerActionType::PUSH_MODE, false), _mode(mode) {}
void LexerPushModeAction::execute(Lexer *lexer) const {
lexer->pushMode(getMode());
}
size_t LexerPushModeAction::hashCodeImpl() const {
size_t hash = MurmurHash::initialize();
hash = MurmurHash::update(hash, static_cast<size_t>(getActionType()));
hash = MurmurHash::update(hash, getMode());
return MurmurHash::finish(hash, 2);
}
bool LexerPushModeAction::equals(const LexerAction &other) const {
if (this == std::addressof(other)) {
return true;
}
if (getActionType() != other.getActionType()) {
return false;
}
const auto &lexerAction = downCast<const LexerPushModeAction&>(other);
return getMode() == lexerAction.getMode();
}
std::string LexerPushModeAction::toString() const {
return "pushMode(" + std::to_string(getMode()) + ")";
}

View File

@@ -0,0 +1,57 @@
/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
* Use of this file is governed by the BSD 3-clause license that
* can be found in the LICENSE.txt file in the project root.
*/
#pragma once
#include "atn/LexerAction.h"
#include "atn/LexerActionType.h"
namespace antlr4 {
namespace atn {
/// <summary>
/// Implements the {@code pushMode} lexer action by calling
/// <seealso cref="Lexer#pushMode"/> with the assigned mode.
///
/// @author Sam Harwell
/// @since 4.2
/// </summary>
class ANTLR4CPP_PUBLIC LexerPushModeAction final : public LexerAction {
public:
static bool is(const LexerAction &lexerAction) { return lexerAction.getActionType() == LexerActionType::PUSH_MODE; }
static bool is(const LexerAction *lexerAction) { return lexerAction != nullptr && is(*lexerAction); }
/// <summary>
/// Constructs a new {@code pushMode} action with the specified mode value. </summary>
/// <param name="mode"> The mode value to pass to <seealso cref="Lexer#pushMode"/>. </param>
explicit LexerPushModeAction(int mode);
/// <summary>
/// Get the lexer mode this action should transition the lexer to.
/// </summary>
/// <returns> The lexer mode for this {@code pushMode} command. </returns>
int getMode() const { return _mode; }
/// <summary>
/// {@inheritDoc}
///
/// <para>This action is implemented by calling <seealso cref="Lexer#pushMode"/> with the
/// value provided by <seealso cref="#getMode"/>.</para>
/// </summary>
void execute(Lexer *lexer) const override;
bool equals(const LexerAction &obj) const override;
std::string toString() const override;
protected:
size_t hashCodeImpl() const override;
private:
const int _mode;
};
} // namespace atn
} // namespace antlr4

View File

@@ -0,0 +1,36 @@
/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
* Use of this file is governed by the BSD 3-clause license that
* can be found in the LICENSE.txt file in the project root.
*/
#include "misc/MurmurHash.h"
#include "Lexer.h"
#include "atn/LexerSkipAction.h"
using namespace antlr4;
using namespace antlr4::atn;
using namespace antlr4::misc;
const Ref<const LexerSkipAction>& LexerSkipAction::getInstance() {
static const Ref<const LexerSkipAction> instance(new LexerSkipAction());
return instance;
}
void LexerSkipAction::execute(Lexer *lexer) const {
lexer->skip();
}
size_t LexerSkipAction::hashCodeImpl() const {
size_t hash = MurmurHash::initialize();
hash = MurmurHash::update(hash, static_cast<size_t>(getActionType()));
return MurmurHash::finish(hash, 1);
}
bool LexerSkipAction::equals(const LexerAction &other) const {
return this == std::addressof(other);
}
std::string LexerSkipAction::toString() const {
return "skip";
}

View File

@@ -0,0 +1,51 @@
/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
* Use of this file is governed by the BSD 3-clause license that
* can be found in the LICENSE.txt file in the project root.
*/
#pragma once
#include "atn/LexerAction.h"
#include "atn/LexerActionType.h"
namespace antlr4 {
namespace atn {
/// <summary>
/// Implements the {@code skip} lexer action by calling <seealso cref="Lexer#skip"/>.
///
/// <para>The {@code skip} command does not have any parameters, so this action is
/// implemented as a singleton instance exposed by <seealso cref="#INSTANCE"/>.</para>
///
/// @author Sam Harwell
/// @since 4.2
/// </summary>
class ANTLR4CPP_PUBLIC LexerSkipAction final : public LexerAction {
public:
static bool is(const LexerAction &lexerAction) { return lexerAction.getActionType() == LexerActionType::SKIP; }
static bool is(const LexerAction *lexerAction) { return lexerAction != nullptr && is(*lexerAction); }
/// Provides a singleton instance of this parameterless lexer action.
static const Ref<const LexerSkipAction>& getInstance();
/// <summary>
/// {@inheritDoc}
///
/// <para>This action is implemented by calling <seealso cref="Lexer#skip"/>.</para>
/// </summary>
void execute(Lexer *lexer) const override;
bool equals(const LexerAction &obj) const override;
std::string toString() const override;
protected:
size_t hashCodeImpl() const override;
private:
/// Constructs the singleton instance of the lexer {@code skip} command.
LexerSkipAction() : LexerAction(LexerActionType::SKIP, false) {}
};
} // namespace atn
} // namespace antlr4

View File

@@ -0,0 +1,43 @@
/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
* Use of this file is governed by the BSD 3-clause license that
* can be found in the LICENSE.txt file in the project root.
*/
#include "misc/MurmurHash.h"
#include "Lexer.h"
#include "support/Casts.h"
#include "atn/LexerTypeAction.h"
using namespace antlr4;
using namespace antlr4::atn;
using namespace antlr4::misc;
using namespace antlrcpp;
LexerTypeAction::LexerTypeAction(int type) : LexerAction(LexerActionType::TYPE, false), _type(type) {}
void LexerTypeAction::execute(Lexer *lexer) const {
lexer->setType(getType());
}
size_t LexerTypeAction::hashCodeImpl() const {
size_t hash = MurmurHash::initialize();
hash = MurmurHash::update(hash, static_cast<size_t>(getActionType()));
hash = MurmurHash::update(hash, getType());
return MurmurHash::finish(hash, 2);
}
bool LexerTypeAction::equals(const LexerAction &other) const {
if (this == std::addressof(other)) {
return true;
}
if (getActionType() != other.getActionType()) {
return false;
}
const auto &lexerAction = downCast<const LexerTypeAction&>(other);
return getType() == lexerAction.getType();
}
std::string LexerTypeAction::toString() const {
return "type(" + std::to_string(getType()) + ")";
}

View File

@@ -0,0 +1,51 @@
/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
* Use of this file is governed by the BSD 3-clause license that
* can be found in the LICENSE.txt file in the project root.
*/
#pragma once
#include "atn/LexerActionType.h"
#include "atn/LexerAction.h"
namespace antlr4 {
namespace atn {
/// Implements the {@code type} lexer action by calling <seealso cref="Lexer#setType"/>
/// with the assigned type.
class ANTLR4CPP_PUBLIC LexerTypeAction final : public LexerAction {
public:
static bool is(const LexerAction &lexerAction) { return lexerAction.getActionType() == LexerActionType::TYPE; }
static bool is(const LexerAction *lexerAction) { return lexerAction != nullptr && is(*lexerAction); }
/// <summary>
/// Constructs a new {@code type} action with the specified token type value. </summary>
/// <param name="type"> The type to assign to the token using <seealso cref="Lexer#setType"/>. </param>
explicit LexerTypeAction(int type);
/// <summary>
/// Gets the type to assign to a token created by the lexer. </summary>
/// <returns> The type to assign to a token created by the lexer. </returns>
int getType() const { return _type; }
/// <summary>
/// {@inheritDoc}
///
/// <para>This action is implemented by calling <seealso cref="Lexer#setType"/> with the
/// value provided by <seealso cref="#getType"/>.</para>
/// </summary>
void execute(Lexer *lexer) const override;
bool equals(const LexerAction &obj) const override;
std::string toString() const override;
protected:
size_t hashCodeImpl() const override;
private:
const int _type;
};
} // namespace atn
} // namespace antlr4

View File

@@ -0,0 +1,16 @@
/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
* Use of this file is governed by the BSD 3-clause license that
* can be found in the LICENSE.txt file in the project root.
*/
#include "atn/LookaheadEventInfo.h"
using namespace antlr4;
using namespace antlr4::atn;
LookaheadEventInfo::LookaheadEventInfo(size_t decision, ATNConfigSet *configs, size_t predictedAlt,
TokenStream *input, size_t startIndex, size_t stopIndex, bool fullCtx)
: DecisionEventInfo(decision, configs, input, startIndex, stopIndex, fullCtx) {
this->predictedAlt = predictedAlt;
}

View File

@@ -0,0 +1,42 @@
/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
* Use of this file is governed by the BSD 3-clause license that
* can be found in the LICENSE.txt file in the project root.
*/
#pragma once
#include "atn/DecisionEventInfo.h"
namespace antlr4 {
namespace atn {
/// This class represents profiling event information for tracking the lookahead
/// depth required in order to make a prediction.
class ANTLR4CPP_PUBLIC LookaheadEventInfo : public DecisionEventInfo {
public:
/// The alternative chosen by adaptivePredict(), not necessarily
/// the outermost alt shown for a rule; left-recursive rules have
/// user-level alts that differ from the rewritten rule with a (...) block
/// and a (..)* loop.
size_t predictedAlt = 0;
/// <summary>
/// Constructs a new instance of the <seealso cref="LookaheadEventInfo"/> class with
/// the specified detailed lookahead information.
/// </summary>
/// <param name="decision"> The decision number </param>
/// <param name="configs"> The final configuration set containing the necessary
/// information to determine the result of a prediction, or {@code null} if
/// the final configuration set is not available </param>
/// <param name="input"> The input token stream </param>
/// <param name="startIndex"> The start index for the current prediction </param>
/// <param name="stopIndex"> The index at which the prediction was finally made </param>
/// <param name="fullCtx"> {@code true} if the current lookahead is part of an LL
/// prediction; otherwise, {@code false} if the current lookahead is part of
/// an SLL prediction </param>
LookaheadEventInfo(size_t decision, ATNConfigSet *configs, size_t predictedAlt, TokenStream *input, size_t startIndex,
size_t stopIndex, bool fullCtx);
};
} // namespace atn
} // namespace antlr4

View File

@@ -0,0 +1,26 @@
/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
* Use of this file is governed by the BSD 3-clause license that
* can be found in the LICENSE.txt file in the project root.
*/
#pragma once
#include "atn/ATNState.h"
namespace antlr4 {
namespace atn {
/// Mark the end of a * or + loop.
class ANTLR4CPP_PUBLIC LoopEndState final : public ATNState {
public:
static bool is(const ATNState &atnState) { return atnState.getStateType() == ATNStateType::LOOP_END; }
static bool is(const ATNState *atnState) { return atnState != nullptr && is(*atnState); }
ATNState *loopBackState = nullptr;
LoopEndState() : ATNState(ATNStateType::LOOP_END) {}
};
} // namespace atn
} // namespace antlr4

View File

@@ -0,0 +1,22 @@
/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
* Use of this file is governed by the BSD 3-clause license that
* can be found in the LICENSE.txt file in the project root.
*/
#include "atn/NotSetTransition.h"
#include "atn/ATNState.h"
#include "misc/IntervalSet.h"
using namespace antlr4;
using namespace antlr4::atn;
NotSetTransition::NotSetTransition(ATNState *target, misc::IntervalSet set) : SetTransition(TransitionType::NOT_SET, target, std::move(set)) {}
bool NotSetTransition::matches(size_t symbol, size_t minVocabSymbol, size_t maxVocabSymbol) const {
return symbol >= minVocabSymbol && symbol <= maxVocabSymbol
&& !SetTransition::matches(symbol, minVocabSymbol, maxVocabSymbol);
}
std::string NotSetTransition::toString() const {
return "NOT_SET " + Transition::toString() + " { " + SetTransition::toString() + " }";
}

View File

@@ -0,0 +1,27 @@
/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
* Use of this file is governed by the BSD 3-clause license that
* can be found in the LICENSE.txt file in the project root.
*/
#pragma once
#include "atn/SetTransition.h"
namespace antlr4 {
namespace atn {
class ANTLR4CPP_PUBLIC NotSetTransition final : public SetTransition {
public:
static bool is(const Transition &transition) { return transition.getTransitionType() == TransitionType::NOT_SET; }
static bool is(const Transition *transition) { return transition != nullptr && is(*transition); }
NotSetTransition(ATNState *target, misc::IntervalSet set);
virtual bool matches(size_t symbol, size_t minVocabSymbol, size_t maxVocabSymbol) const override;
virtual std::string toString() const override;
};
} // namespace atn
} // namespace antlr4

View File

@@ -0,0 +1,16 @@
/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
* Use of this file is governed by the BSD 3-clause license that
* can be found in the LICENSE.txt file in the project root.
*/
#include "atn/OrderedATNConfigSet.h"
using namespace antlr4::atn;
size_t OrderedATNConfigSet::hashCode(const ATNConfig &atnConfig) const {
return atnConfig.hashCode();
}
bool OrderedATNConfigSet::equals(const ATNConfig &lhs, const ATNConfig &rhs) const {
return lhs == rhs;
}

View File

@@ -0,0 +1,25 @@
/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
* Use of this file is governed by the BSD 3-clause license that
* can be found in the LICENSE.txt file in the project root.
*/
#pragma once
#include "atn/ATNConfigSet.h"
#include "atn/ATNConfig.h"
namespace antlr4 {
namespace atn {
class ANTLR4CPP_PUBLIC OrderedATNConfigSet final : public ATNConfigSet {
public:
OrderedATNConfigSet() = default;
private:
size_t hashCode(const ATNConfig &atnConfig) const override;
bool equals(const ATNConfig &lhs, const ATNConfig &rhs) const override;
};
} // namespace atn
} // namespace antlr4

View File

@@ -0,0 +1,102 @@
/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
* Use of this file is governed by the BSD 3-clause license that
* can be found in the LICENSE.txt file in the project root.
*/
#include "atn/ProfilingATNSimulator.h"
#include "dfa/DFA.h"
#include "atn/ParseInfo.h"
using namespace antlr4::atn;
ParseInfo::ParseInfo(ProfilingATNSimulator *atnSimulator) : _atnSimulator(atnSimulator) {
}
ParseInfo::~ParseInfo() {
}
std::vector<DecisionInfo> ParseInfo::getDecisionInfo() {
return _atnSimulator->getDecisionInfo();
}
std::vector<size_t> ParseInfo::getLLDecisions() {
std::vector<DecisionInfo> decisions = _atnSimulator->getDecisionInfo();
std::vector<size_t> LL;
for (size_t i = 0; i < decisions.size(); ++i) {
long long fallBack = decisions[i].LL_Fallback;
if (fallBack > 0) {
LL.push_back(i);
}
}
return LL;
}
long long ParseInfo::getTotalTimeInPrediction() {
std::vector<DecisionInfo> decisions = _atnSimulator->getDecisionInfo();
long long t = 0;
for (size_t i = 0; i < decisions.size(); ++i) {
t += decisions[i].timeInPrediction;
}
return t;
}
long long ParseInfo::getTotalSLLLookaheadOps() {
std::vector<DecisionInfo> decisions = _atnSimulator->getDecisionInfo();
long long k = 0;
for (size_t i = 0; i < decisions.size(); ++i) {
k += decisions[i].SLL_TotalLook;
}
return k;
}
long long ParseInfo::getTotalLLLookaheadOps() {
std::vector<DecisionInfo> decisions = _atnSimulator->getDecisionInfo();
long long k = 0;
for (size_t i = 0; i < decisions.size(); i++) {
k += decisions[i].LL_TotalLook;
}
return k;
}
long long ParseInfo::getTotalSLLATNLookaheadOps() {
std::vector<DecisionInfo> decisions = _atnSimulator->getDecisionInfo();
long long k = 0;
for (size_t i = 0; i < decisions.size(); ++i) {
k += decisions[i].SLL_ATNTransitions;
}
return k;
}
long long ParseInfo::getTotalLLATNLookaheadOps() {
std::vector<DecisionInfo> decisions = _atnSimulator->getDecisionInfo();
long long k = 0;
for (size_t i = 0; i < decisions.size(); ++i) {
k += decisions[i].LL_ATNTransitions;
}
return k;
}
long long ParseInfo::getTotalATNLookaheadOps() {
std::vector<DecisionInfo> decisions = _atnSimulator->getDecisionInfo();
long long k = 0;
for (size_t i = 0; i < decisions.size(); ++i) {
k += decisions[i].SLL_ATNTransitions;
k += decisions[i].LL_ATNTransitions;
}
return k;
}
size_t ParseInfo::getDFASize() {
size_t n = 0;
std::vector<dfa::DFA> &decisionToDFA = _atnSimulator->decisionToDFA;
for (size_t i = 0; i < decisionToDFA.size(); ++i) {
n += getDFASize(i);
}
return n;
}
size_t ParseInfo::getDFASize(size_t decision) {
dfa::DFA &decisionToDFA = _atnSimulator->decisionToDFA[decision];
return decisionToDFA.states.size();
}

View File

@@ -0,0 +1,102 @@
/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
* Use of this file is governed by the BSD 3-clause license that
* can be found in the LICENSE.txt file in the project root.
*/
#pragma once
#include "atn/DecisionInfo.h"
namespace antlr4 {
namespace atn {
class ProfilingATNSimulator;
/// This class provides access to specific and aggregate statistics gathered
/// during profiling of a parser.
class ANTLR4CPP_PUBLIC ParseInfo {
public:
ParseInfo(ProfilingATNSimulator *atnSimulator);
ParseInfo(ParseInfo const&) = default;
virtual ~ParseInfo();
ParseInfo& operator=(ParseInfo const&) = default;
/// <summary>
/// Gets an array of <seealso cref="DecisionInfo"/> instances containing the profiling
/// information gathered for each decision in the ATN.
/// </summary>
/// <returns> An array of <seealso cref="DecisionInfo"/> instances, indexed by decision
/// number. </returns>
virtual std::vector<DecisionInfo> getDecisionInfo();
/// <summary>
/// Gets the decision numbers for decisions that required one or more
/// full-context predictions during parsing. These are decisions for which
/// <seealso cref="DecisionInfo#LL_Fallback"/> is non-zero.
/// </summary>
/// <returns> A list of decision numbers which required one or more
/// full-context predictions during parsing. </returns>
virtual std::vector<size_t> getLLDecisions();
/// <summary>
/// Gets the total time spent during prediction across all decisions made
/// during parsing. This value is the sum of
/// <seealso cref="DecisionInfo#timeInPrediction"/> for all decisions.
/// </summary>
virtual long long getTotalTimeInPrediction();
/// <summary>
/// Gets the total number of SLL lookahead operations across all decisions
/// made during parsing. This value is the sum of
/// <seealso cref="DecisionInfo#SLL_TotalLook"/> for all decisions.
/// </summary>
virtual long long getTotalSLLLookaheadOps();
/// <summary>
/// Gets the total number of LL lookahead operations across all decisions
/// made during parsing. This value is the sum of
/// <seealso cref="DecisionInfo#LL_TotalLook"/> for all decisions.
/// </summary>
virtual long long getTotalLLLookaheadOps();
/// <summary>
/// Gets the total number of ATN lookahead operations for SLL prediction
/// across all decisions made during parsing.
/// </summary>
virtual long long getTotalSLLATNLookaheadOps();
/// <summary>
/// Gets the total number of ATN lookahead operations for LL prediction
/// across all decisions made during parsing.
/// </summary>
virtual long long getTotalLLATNLookaheadOps();
/// <summary>
/// Gets the total number of ATN lookahead operations for SLL and LL
/// prediction across all decisions made during parsing.
///
/// <para>
/// This value is the sum of <seealso cref="#getTotalSLLATNLookaheadOps"/> and
/// <seealso cref="#getTotalLLATNLookaheadOps"/>.</para>
/// </summary>
virtual long long getTotalATNLookaheadOps();
/// <summary>
/// Gets the total number of DFA states stored in the DFA cache for all
/// decisions in the ATN.
/// </summary>
virtual size_t getDFASize();
/// <summary>
/// Gets the total number of DFA states stored in the DFA cache for a
/// particular decision.
/// </summary>
virtual size_t getDFASize(size_t decision);
protected:
const ProfilingATNSimulator *_atnSimulator; // non-owning, we are created by this simulator.
};
} // namespace atn
} // namespace antlr4

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,911 @@
/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
* Use of this file is governed by the BSD 3-clause license that
* can be found in the LICENSE.txt file in the project root.
*/
#pragma once
#include "PredictionMode.h"
#include "dfa/DFAState.h"
#include "atn/ATNSimulator.h"
#include "atn/PredictionContext.h"
#include "atn/PredictionContextMergeCache.h"
#include "atn/ParserATNSimulatorOptions.h"
#include "SemanticContext.h"
#include "atn/ATNConfig.h"
namespace antlr4 {
namespace atn {
/**
* The embodiment of the adaptive LL(*), ALL(*), parsing strategy.
*
* <p>
* The basic complexity of the adaptive strategy makes it harder to understand.
* We begin with ATN simulation to build paths in a DFA. Subsequent prediction
* requests go through the DFA first. If they reach a state without an edge for
* the current symbol, the algorithm fails over to the ATN simulation to
* complete the DFA path for the current input (until it finds a conflict state
* or uniquely predicting state).</p>
*
* <p>
* All of that is done without using the outer context because we want to create
* a DFA that is not dependent upon the rule invocation stack when we do a
* prediction. One DFA works in all contexts. We avoid using context not
* necessarily because it's slower, although it can be, but because of the DFA
* caching problem. The closure routine only considers the rule invocation stack
* created during prediction beginning in the decision rule. For example, if
* prediction occurs without invoking another rule's ATN, there are no context
* stacks in the configurations. When lack of context leads to a conflict, we
* don't know if it's an ambiguity or a weakness in the strong LL(*) parsing
* strategy (versus full LL(*)).</p>
*
* <p>
* When SLL yields a configuration set with conflict, we rewind the input and
* retry the ATN simulation, this time using full outer context without adding
* to the DFA. Configuration context stacks will be the full invocation stacks
* from the start rule. If we get a conflict using full context, then we can
* definitively say we have a true ambiguity for that input sequence. If we
* don't get a conflict, it implies that the decision is sensitive to the outer
* context. (It is not context-sensitive in the sense of context-sensitive
* grammars.)</p>
*
* <p>
* The next time we reach this DFA state with an SLL conflict, through DFA
* simulation, we will again retry the ATN simulation using full context mode.
* This is slow because we can't save the results and have to "interpret" the
* ATN each time we get that input.</p>
*
* <p>
* <strong>CACHING FULL CONTEXT PREDICTIONS</strong></p>
*
* <p>
* We could cache results from full context to predicted alternative easily and
* that saves a lot of time but doesn't work in presence of predicates. The set
* of visible predicates from the ATN start state changes depending on the
* context, because closure can fall off the end of a rule. I tried to cache
* tuples (stack context, semantic context, predicted alt) but it was slower
* than interpreting and much more complicated. Also required a huge amount of
* memory. The goal is not to create the world's fastest parser anyway. I'd like
* to keep this algorithm simple. By launching multiple threads, we can improve
* the speed of parsing across a large number of files.</p>
*
* <p>
* There is no strict ordering between the amount of input used by SLL vs LL,
* which makes it really hard to build a cache for full context. Let's say that
* we have input A B C that leads to an SLL conflict with full context X. That
* implies that using X we might only use A B but we could also use A B C D to
* resolve conflict. Input A B C D could predict alternative 1 in one position
* in the input and A B C E could predict alternative 2 in another position in
* input. The conflicting SLL configurations could still be non-unique in the
* full context prediction, which would lead us to requiring more input than the
* original A B C. To make a prediction cache work, we have to track the exact
* input used during the previous prediction. That amounts to a cache that maps
* X to a specific DFA for that context.</p>
*
* <p>
* Something should be done for left-recursive expression predictions. They are
* likely LL(1) + pred eval. Easier to do the whole SLL unless error and retry
* with full LL thing Sam does.</p>
*
* <p>
* <strong>AVOIDING FULL CONTEXT PREDICTION</strong></p>
*
* <p>
* We avoid doing full context retry when the outer context is empty, we did not
* dip into the outer context by falling off the end of the decision state rule,
* or when we force SLL mode.</p>
*
* <p>
* As an example of the not dip into outer context case, consider as super
* constructor calls versus function calls. One grammar might look like
* this:</p>
*
* <pre>
* ctorBody
* : '{' superCall? stat* '}'
* ;
* </pre>
*
* <p>
* Or, you might see something like</p>
*
* <pre>
* stat
* : superCall ';'
* | expression ';'
* | ...
* ;
* </pre>
*
* <p>
* In both cases I believe that no closure operations will dip into the outer
* context. In the first case ctorBody in the worst case will stop at the '}'.
* In the 2nd case it should stop at the ';'. Both cases should stay within the
* entry rule and not dip into the outer context.</p>
*
* <p>
* <strong>PREDICATES</strong></p>
*
* <p>
* Predicates are always evaluated if present in either SLL or LL both. SLL and
* LL simulation deals with predicates differently. SLL collects predicates as
* it performs closure operations like ANTLR v3 did. It delays predicate
* evaluation until it reaches and accept state. This allows us to cache the SLL
* ATN simulation whereas, if we had evaluated predicates on-the-fly during
* closure, the DFA state configuration sets would be different and we couldn't
* build up a suitable DFA.</p>
*
* <p>
* When building a DFA accept state during ATN simulation, we evaluate any
* predicates and return the sole semantically valid alternative. If there is
* more than 1 alternative, we report an ambiguity. If there are 0 alternatives,
* we throw an exception. Alternatives without predicates act like they have
* true predicates. The simple way to think about it is to strip away all
* alternatives with false predicates and choose the minimum alternative that
* remains.</p>
*
* <p>
* When we start in the DFA and reach an accept state that's predicated, we test
* those and return the minimum semantically viable alternative. If no
* alternatives are viable, we throw an exception.</p>
*
* <p>
* During full LL ATN simulation, closure always evaluates predicates and
* on-the-fly. This is crucial to reducing the configuration set size during
* closure. It hits a landmine when parsing with the Java grammar, for example,
* without this on-the-fly evaluation.</p>
*
* <p>
* <strong>SHARING DFA</strong></p>
*
* <p>
* All instances of the same parser share the same decision DFAs through a
* static field. Each instance gets its own ATN simulator but they share the
* same {@link #decisionToDFA} field. They also share a
* {@link PredictionContextCache} object that makes sure that all
* {@link PredictionContext} objects are shared among the DFA states. This makes
* a big size difference.</p>
*
* <p>
* <strong>THREAD SAFETY</strong></p>
*
* <p>
* The {@link ParserATNSimulator} locks on the {@link #decisionToDFA} field when
* it adds a new DFA object to that array. {@link #addDFAEdge}
* locks on the DFA for the current decision when setting the
* {@link DFAState#edges} field. {@link #addDFAState} locks on
* the DFA for the current decision when looking up a DFA state to see if it
* already exists. We must make sure that all requests to add DFA states that
* are equivalent result in the same shared DFA object. This is because lots of
* threads will be trying to update the DFA at once. The
* {@link #addDFAState} method also locks inside the DFA lock
* but this time on the shared context cache when it rebuilds the
* configurations' {@link PredictionContext} objects using cached
* subgraphs/nodes. No other locking occurs, even during DFA simulation. This is
* safe as long as we can guarantee that all threads referencing
* {@code s.edge[t]} get the same physical target {@link DFAState}, or
* {@code null}. Once into the DFA, the DFA simulation does not reference the
* {@link DFA#states} map. It follows the {@link DFAState#edges} field to new
* targets. The DFA simulator will either find {@link DFAState#edges} to be
* {@code null}, to be non-{@code null} and {@code dfa.edges[t]} null, or
* {@code dfa.edges[t]} to be non-null. The
* {@link #addDFAEdge} method could be racing to set the field
* but in either case the DFA simulator works; if {@code null}, and requests ATN
* simulation. It could also race trying to get {@code dfa.edges[t]}, but either
* way it will work because it's not doing a test and set operation.</p>
*
* <p>
* <strong>Starting with SLL then failing to combined SLL/LL (Two-Stage
* Parsing)</strong></p>
*
* <p>
* Sam pointed out that if SLL does not give a syntax error, then there is no
* point in doing full LL, which is slower. We only have to try LL if we get a
* syntax error. For maximum speed, Sam starts the parser set to pure SLL
* mode with the {@link BailErrorStrategy}:</p>
*
* <pre>
* parser.{@link Parser#getInterpreter() getInterpreter()}.{@link #setPredictionMode setPredictionMode}{@code (}{@link PredictionMode#SLL}{@code )};
* parser.{@link Parser#setErrorHandler setErrorHandler}(new {@link BailErrorStrategy}());
* </pre>
*
* <p>
* If it does not get a syntax error, then we're done. If it does get a syntax
* error, we need to retry with the combined SLL/LL strategy.</p>
*
* <p>
* The reason this works is as follows. If there are no SLL conflicts, then the
* grammar is SLL (at least for that input set). If there is an SLL conflict,
* the full LL analysis must yield a set of viable alternatives which is a
* subset of the alternatives reported by SLL. If the LL set is a singleton,
* then the grammar is LL but not SLL. If the LL set is the same size as the SLL
* set, the decision is SLL. If the LL set has size &gt; 1, then that decision
* is truly ambiguous on the current input. If the LL set is smaller, then the
* SLL conflict resolution might choose an alternative that the full LL would
* rule out as a possibility based upon better context information. If that's
* the case, then the SLL parse will definitely get an error because the full LL
* analysis says it's not viable. If SLL conflict resolution chooses an
* alternative within the LL set, them both SLL and LL would choose the same
* alternative because they both choose the minimum of multiple conflicting
* alternatives.</p>
*
* <p>
* Let's say we have a set of SLL conflicting alternatives {@code {1, 2, 3}} and
* a smaller LL set called <em>s</em>. If <em>s</em> is {@code {2, 3}}, then SLL
* parsing will get an error because SLL will pursue alternative 1. If
* <em>s</em> is {@code {1, 2}} or {@code {1, 3}} then both SLL and LL will
* choose the same alternative because alternative one is the minimum of either
* set. If <em>s</em> is {@code {2}} or {@code {3}} then SLL will get a syntax
* error. If <em>s</em> is {@code {1}} then SLL will succeed.</p>
*
* <p>
* Of course, if the input is invalid, then we will get an error for sure in
* both SLL and LL parsing. Erroneous input will therefore require 2 passes over
* the input.</p>
*/
class ANTLR4CPP_PUBLIC ParserATNSimulator : public ATNSimulator {
public:
/// Testing only!
ParserATNSimulator(const ATN &atn, std::vector<dfa::DFA> &decisionToDFA,
PredictionContextCache &sharedContextCache);
ParserATNSimulator(Parser *parser, const ATN &atn, std::vector<dfa::DFA> &decisionToDFA,
PredictionContextCache &sharedContextCache);
ParserATNSimulator(Parser *parser, const ATN &atn, std::vector<dfa::DFA> &decisionToDFA,
PredictionContextCache &sharedContextCache,
const ParserATNSimulatorOptions &options);
virtual void reset() override;
virtual void clearDFA() override;
virtual size_t adaptivePredict(TokenStream *input, size_t decision, ParserRuleContext *outerContext);
static const bool TURN_OFF_LR_LOOP_ENTRY_BRANCH_OPT;
std::vector<dfa::DFA> &decisionToDFA;
/** Implements first-edge (loop entry) elimination as an optimization
* during closure operations. See antlr/antlr4#1398.
*
* The optimization is to avoid adding the loop entry config when
* the exit path can only lead back to the same
* StarLoopEntryState after popping context at the rule end state
* (traversing only epsilon edges, so we're still in closure, in
* this same rule).
*
* We need to detect any state that can reach loop entry on
* epsilon w/o exiting rule. We don't have to look at FOLLOW
* links, just ensure that all stack tops for config refer to key
* states in LR rule.
*
* To verify we are in the right situation we must first check
* closure is at a StarLoopEntryState generated during LR removal.
* Then we check that each stack top of context is a return state
* from one of these cases:
*
* 1. 'not' expr, '(' type ')' expr. The return state points at loop entry state
* 2. expr op expr. The return state is the block end of internal block of (...)*
* 3. 'between' expr 'and' expr. The return state of 2nd expr reference.
* That state points at block end of internal block of (...)*.
* 4. expr '?' expr ':' expr. The return state points at block end,
* which points at loop entry state.
*
* If any is true for each stack top, then closure does not add a
* config to the current config set for edge[0], the loop entry branch.
*
* Conditions fail if any context for the current config is:
*
* a. empty (we'd fall out of expr to do a global FOLLOW which could
* even be to some weird spot in expr) or,
* b. lies outside of expr or,
* c. lies within expr but at a state not the BlockEndState
* generated during LR removal
*
* Do we need to evaluate predicates ever in closure for this case?
*
* No. Predicates, including precedence predicates, are only
* evaluated when computing a DFA start state. I.e., only before
* the lookahead (but not parser) consumes a token.
*
* There are no epsilon edges allowed in LR rule alt blocks or in
* the "primary" part (ID here). If closure is in
* StarLoopEntryState any lookahead operation will have consumed a
* token as there are no epsilon-paths that lead to
* StarLoopEntryState. We do not have to evaluate predicates
* therefore if we are in the generated StarLoopEntryState of a LR
* rule. Note that when making a prediction starting at that
* decision point, decision d=2, compute-start-state performs
* closure starting at edges[0], edges[1] emanating from
* StarLoopEntryState. That means it is not performing closure on
* StarLoopEntryState during compute-start-state.
*
* How do we know this always gives same prediction answer?
*
* Without predicates, loop entry and exit paths are ambiguous
* upon remaining input +b (in, say, a+b). Either paths lead to
* valid parses. Closure can lead to consuming + immediately or by
* falling out of this call to expr back into expr and loop back
* again to StarLoopEntryState to match +b. In this special case,
* we choose the more efficient path, which is to take the bypass
* path.
*
* The lookahead language has not changed because closure chooses
* one path over the other. Both paths lead to consuming the same
* remaining input during a lookahead operation. If the next token
* is an operator, lookahead will enter the choice block with
* operators. If it is not, lookahead will exit expr. Same as if
* closure had chosen to enter the choice block immediately.
*
* Closure is examining one config (some loopentrystate, some alt,
* context) which means it is considering exactly one alt. Closure
* always copies the same alt to any derived configs.
*
* How do we know this optimization doesn't mess up precedence in
* our parse trees?
*
* Looking through expr from left edge of stat only has to confirm
* that an input, say, a+b+c; begins with any valid interpretation
* of an expression. The precedence actually doesn't matter when
* making a decision in stat seeing through expr. It is only when
* parsing rule expr that we must use the precedence to get the
* right interpretation and, hence, parse tree.
*/
bool canDropLoopEntryEdgeInLeftRecursiveRule(ATNConfig *config) const;
virtual std::string getRuleName(size_t index);
virtual Ref<ATNConfig> precedenceTransition(Ref<ATNConfig> const& config, const PrecedencePredicateTransition *pt,
bool collectPredicates, bool inContext, bool fullCtx);
void setPredictionMode(PredictionMode newMode);
PredictionMode getPredictionMode();
Parser* getParser();
virtual std::string getTokenName(size_t t);
virtual std::string getLookaheadName(TokenStream *input);
/// <summary>
/// Used for debugging in adaptivePredict around execATN but I cut
/// it out for clarity now that alg. works well. We can leave this
/// "dead" code for a bit.
/// </summary>
virtual void dumpDeadEndConfigs(NoViableAltException &nvae);
protected:
Parser *const parser;
/// <summary>
/// Each prediction operation uses a cache for merge of prediction contexts.
/// Don't keep around as it wastes huge amounts of memory. The merge cache
/// isn't synchronized but we're ok since two threads shouldn't reuse same
/// parser/atnsim object because it can only handle one input at a time.
/// This maps graphs a and b to merged result c. (a,b)->c. We can avoid
/// the merge if we ever see a and b again. Note that (b,a)->c should
/// also be examined during cache lookup.
/// </summary>
PredictionContextMergeCache mergeCache;
size_t _mergeCacheCounter = 0;
// LAME globals to avoid parameters!!!!! I need these down deep in predTransition
TokenStream *_input;
size_t _startIndex;
ParserRuleContext *_outerContext;
dfa::DFA *_dfa; // Reference into the decisionToDFA vector.
/// <summary>
/// Performs ATN simulation to compute a predicted alternative based
/// upon the remaining input, but also updates the DFA cache to avoid
/// having to traverse the ATN again for the same input sequence.
///
/// There are some key conditions we're looking for after computing a new
/// set of ATN configs (proposed DFA state):
/// if the set is empty, there is no viable alternative for current symbol
/// does the state uniquely predict an alternative?
/// does the state have a conflict that would prevent us from
/// putting it on the work list?
///
/// We also have some key operations to do:
/// add an edge from previous DFA state to potentially new DFA state, D,
/// upon current symbol but only if adding to work list, which means in all
/// cases except no viable alternative (and possibly non-greedy decisions?)
/// collecting predicates and adding semantic context to DFA accept states
/// adding rule context to context-sensitive DFA accept states
/// consuming an input symbol
/// reporting a conflict
/// reporting an ambiguity
/// reporting a context sensitivity
/// reporting insufficient predicates
///
/// cover these cases:
/// dead end
/// single alt
/// single alt + preds
/// conflict
/// conflict + preds
/// </summary>
virtual size_t execATN(dfa::DFA &dfa, dfa::DFAState *s0, TokenStream *input, size_t startIndex,
ParserRuleContext *outerContext);
/// <summary>
/// Get an existing target state for an edge in the DFA. If the target state
/// for the edge has not yet been computed or is otherwise not available,
/// this method returns {@code null}.
/// </summary>
/// <param name="previousD"> The current DFA state </param>
/// <param name="t"> The next input symbol </param>
/// <returns> The existing target DFA state for the given input symbol
/// {@code t}, or {@code null} if the target state for this edge is not
/// already cached </returns>
virtual dfa::DFAState* getExistingTargetState(dfa::DFAState *previousD, size_t t);
/// <summary>
/// Compute a target state for an edge in the DFA, and attempt to add the
/// computed state and corresponding edge to the DFA.
/// </summary>
/// <param name="dfa"> The DFA </param>
/// <param name="previousD"> The current DFA state </param>
/// <param name="t"> The next input symbol
/// </param>
/// <returns> The computed target DFA state for the given input symbol
/// {@code t}. If {@code t} does not lead to a valid DFA state, this method
/// returns <seealso cref="#ERROR"/>. </returns>
virtual dfa::DFAState *computeTargetState(dfa::DFA &dfa, dfa::DFAState *previousD, size_t t);
virtual void predicateDFAState(dfa::DFAState *dfaState, DecisionState *decisionState);
// comes back with reach.uniqueAlt set to a valid alt
virtual size_t execATNWithFullContext(dfa::DFA &dfa, dfa::DFAState *D, ATNConfigSet *s0,
TokenStream *input, size_t startIndex, ParserRuleContext *outerContext); // how far we got before failing over
virtual std::unique_ptr<ATNConfigSet> computeReachSet(ATNConfigSet *closure, size_t t, bool fullCtx);
/// <summary>
/// Return a configuration set containing only the configurations from
/// {@code configs} which are in a <seealso cref="RuleStopState"/>. If all
/// configurations in {@code configs} are already in a rule stop state, this
/// method simply returns {@code configs}.
/// <p/>
/// When {@code lookToEndOfRule} is true, this method uses
/// <seealso cref="ATN#nextTokens"/> for each configuration in {@code configs} which is
/// not already in a rule stop state to see if a rule stop state is reachable
/// from the configuration via epsilon-only transitions.
/// </summary>
/// <param name="configs"> the configuration set to update </param>
/// <param name="lookToEndOfRule"> when true, this method checks for rule stop states
/// reachable by epsilon-only transitions from each configuration in
/// {@code configs}.
/// </param>
/// <returns> {@code configs} if all configurations in {@code configs} are in a
/// rule stop state, otherwise return a new configuration set containing only
/// the configurations from {@code configs} which are in a rule stop state </returns>
virtual ATNConfigSet* removeAllConfigsNotInRuleStopState(ATNConfigSet *configs, bool lookToEndOfRule);
virtual std::unique_ptr<ATNConfigSet> computeStartState(ATNState *p, RuleContext *ctx, bool fullCtx);
/* parrt internal source braindump that doesn't mess up
* external API spec.
applyPrecedenceFilter is an optimization to avoid highly
nonlinear prediction of expressions and other left recursive
rules. The precedence predicates such as {3>=prec}? Are highly
context-sensitive in that they can only be properly evaluated
in the context of the proper prec argument. Without pruning,
these predicates are normal predicates evaluated when we reach
conflict state (or unique prediction). As we cannot evaluate
these predicates out of context, the resulting conflict leads
to full LL evaluation and nonlinear prediction which shows up
very clearly with fairly large expressions.
Example grammar:
e : e '*' e
| e '+' e
| INT
;
We convert that to the following:
e[int prec]
: INT
( {3>=prec}? '*' e[4]
| {2>=prec}? '+' e[3]
)*
;
The (..)* loop has a decision for the inner block as well as
an enter or exit decision, which is what concerns us here. At
the 1st + of input 1+2+3, the loop entry sees both predicates
and the loop exit also sees both predicates by falling off the
edge of e. This is because we have no stack information with
SLL and find the follow of e, which will hit the return states
inside the loop after e[4] and e[3], which brings it back to
the enter or exit decision. In this case, we know that we
cannot evaluate those predicates because we have fallen off
the edge of the stack and will in general not know which prec
parameter is the right one to use in the predicate.
Because we have special information, that these are precedence
predicates, we can resolve them without failing over to full
LL despite their context sensitive nature. We make an
assumption that prec[-1] <= prec[0], meaning that the current
precedence level is greater than or equal to the precedence
level of recursive invocations above us in the stack. For
example, if predicate {3>=prec}? is true of the current prec,
then one option is to enter the loop to match it now. The
other option is to exit the loop and the left recursive rule
to match the current operator in rule invocation further up
the stack. But, we know that all of those prec are lower or
the same value and so we can decide to enter the loop instead
of matching it later. That means we can strip out the other
configuration for the exit branch.
So imagine we have (14,1,$,{2>=prec}?) and then
(14,2,$-dipsIntoOuterContext,{2>=prec}?). The optimization
allows us to collapse these two configurations. We know that
if {2>=prec}? is true for the current prec parameter, it will
also be true for any prec from an invoking e call, indicated
by dipsIntoOuterContext. As the predicates are both true, we
have the option to evaluate them early in the decision start
state. We do this by stripping both predicates and choosing to
enter the loop as it is consistent with the notion of operator
precedence. It's also how the full LL conflict resolution
would work.
The solution requires a different DFA start state for each
precedence level.
The basic filter mechanism is to remove configurations of the
form (p, 2, pi) if (p, 1, pi) exists for the same p and pi. In
other words, for the same ATN state and predicate context,
remove any configuration associated with an exit branch if
there is a configuration associated with the enter branch.
It's also the case that the filter evaluates precedence
predicates and resolves conflicts according to precedence
levels. For example, for input 1+2+3 at the first +, we see
prediction filtering
[(11,1,[$],{3>=prec}?), (14,1,[$],{2>=prec}?), (5,2,[$],up=1),
(11,2,[$],up=1), (14,2,[$],up=1)],hasSemanticContext=true,dipsIntoOuterContext
to
[(11,1,[$]), (14,1,[$]), (5,2,[$],up=1)],dipsIntoOuterContext
This filters because {3>=prec}? evals to true and collapses
(11,1,[$],{3>=prec}?) and (11,2,[$],up=1) since early conflict
resolution based upon rules of operator precedence fits with
our usual match first alt upon conflict.
We noticed a problem where a recursive call resets precedence
to 0. Sam's fix: each config has flag indicating if it has
returned from an expr[0] call. then just don't filter any
config with that flag set. flag is carried along in
closure(). so to avoid adding field, set bit just under sign
bit of dipsIntoOuterContext (SUPPRESS_PRECEDENCE_FILTER).
With the change you filter "unless (p, 2, pi) was reached
after leaving the rule stop state of the LR rule containing
state p, corresponding to a rule invocation with precedence
level 0"
*/
/**
* This method transforms the start state computed by
* {@link #computeStartState} to the special start state used by a
* precedence DFA for a particular precedence value. The transformation
* process applies the following changes to the start state's configuration
* set.
*
* <ol>
* <li>Evaluate the precedence predicates for each configuration using
* {@link SemanticContext#evalPrecedence}.</li>
* <li>When {@link ATNConfig#isPrecedenceFilterSuppressed} is {@code false},
* remove all configurations which predict an alternative greater than 1,
* for which another configuration that predicts alternative 1 is in the
* same ATN state with the same prediction context. This transformation is
* valid for the following reasons:
* <ul>
* <li>The closure block cannot contain any epsilon transitions which bypass
* the body of the closure, so all states reachable via alternative 1 are
* part of the precedence alternatives of the transformed left-recursive
* rule.</li>
* <li>The "primary" portion of a left recursive rule cannot contain an
* epsilon transition, so the only way an alternative other than 1 can exist
* in a state that is also reachable via alternative 1 is by nesting calls
* to the left-recursive rule, with the outer calls not being at the
* preferred precedence level. The
* {@link ATNConfig#isPrecedenceFilterSuppressed} property marks ATN
* configurations which do not meet this condition, and therefore are not
* eligible for elimination during the filtering process.</li>
* </ul>
* </li>
* </ol>
*
* <p>
* The prediction context must be considered by this filter to address
* situations like the following.
* </p>
* <code>
* <pre>
* grammar TA;
* prog: statement* EOF;
* statement: letterA | statement letterA 'b' ;
* letterA: 'a';
* </pre>
* </code>
* <p>
* If the above grammar, the ATN state immediately before the token
* reference {@code 'a'} in {@code letterA} is reachable from the left edge
* of both the primary and closure blocks of the left-recursive rule
* {@code statement}. The prediction context associated with each of these
* configurations distinguishes between them, and prevents the alternative
* which stepped out to {@code prog} (and then back in to {@code statement}
* from being eliminated by the filter.
* </p>
*
* @param configs The configuration set computed by
* {@link #computeStartState} as the start state for the DFA.
* @return The transformed configuration set representing the start state
* for a precedence DFA at a particular precedence level (determined by
* calling {@link Parser#getPrecedence}).
*/
std::unique_ptr<ATNConfigSet> applyPrecedenceFilter(ATNConfigSet *configs);
virtual ATNState *getReachableTarget(const Transition *trans, size_t ttype);
virtual std::vector<Ref<const SemanticContext>> getPredsForAmbigAlts(const antlrcpp::BitSet &ambigAlts,
ATNConfigSet *configs, size_t nalts);
std::vector<dfa::DFAState::PredPrediction> getPredicatePredictions(const antlrcpp::BitSet &ambigAlts,
const std::vector<Ref<const SemanticContext>> &altToPred);
/**
* This method is used to improve the localization of error messages by
* choosing an alternative rather than throwing a
* {@link NoViableAltException} in particular prediction scenarios where the
* {@link #ERROR} state was reached during ATN simulation.
*
* <p>
* The default implementation of this method uses the following
* algorithm to identify an ATN configuration which successfully parsed the
* decision entry rule. Choosing such an alternative ensures that the
* {@link ParserRuleContext} returned by the calling rule will be complete
* and valid, and the syntax error will be reported later at a more
* localized location.</p>
*
* <ul>
* <li>If a syntactically valid path or paths reach the end of the decision rule and
* they are semantically valid if predicated, return the min associated alt.</li>
* <li>Else, if a semantically invalid but syntactically valid path exist
* or paths exist, return the minimum associated alt.
* </li>
* <li>Otherwise, return {@link ATN#INVALID_ALT_NUMBER}.</li>
* </ul>
*
* <p>
* In some scenarios, the algorithm described above could predict an
* alternative which will result in a {@link FailedPredicateException} in
* the parser. Specifically, this could occur if the <em>only</em> configuration
* capable of successfully parsing to the end of the decision rule is
* blocked by a semantic predicate. By choosing this alternative within
* {@link #adaptivePredict} instead of throwing a
* {@link NoViableAltException}, the resulting
* {@link FailedPredicateException} in the parser will identify the specific
* predicate which is preventing the parser from successfully parsing the
* decision rule, which helps developers identify and correct logic errors
* in semantic predicates.
* </p>
*
* @param configs The ATN configurations which were valid immediately before
* the {@link #ERROR} state was reached
* @param outerContext The is the \gamma_0 initial parser context from the paper
* or the parser stack at the instant before prediction commences.
*
* @return The value to return from {@link #adaptivePredict}, or
* {@link ATN#INVALID_ALT_NUMBER} if a suitable alternative was not
* identified and {@link #adaptivePredict} should report an error instead.
*/
size_t getSynValidOrSemInvalidAltThatFinishedDecisionEntryRule(ATNConfigSet *configs,
ParserRuleContext *outerContext);
virtual size_t getAltThatFinishedDecisionEntryRule(ATNConfigSet *configs);
/** Walk the list of configurations and split them according to
* those that have preds evaluating to true/false. If no pred, assume
* true pred and include in succeeded set. Returns Pair of sets.
*
* Create a new set so as not to alter the incoming parameter.
*
* Assumption: the input stream has been restored to the starting point
* prediction, which is where predicates need to evaluate.
*/
std::pair<ATNConfigSet *, ATNConfigSet *> splitAccordingToSemanticValidity(ATNConfigSet *configs,
ParserRuleContext *outerContext);
/// <summary>
/// Look through a list of predicate/alt pairs, returning alts for the
/// pairs that win. A {@code NONE} predicate indicates an alt containing an
/// unpredicated config which behaves as "always true." If !complete
/// then we stop at the first predicate that evaluates to true. This
/// includes pairs with null predicates.
/// </summary>
antlrcpp::BitSet evalSemanticContext(const std::vector<dfa::DFAState::PredPrediction> &predPredictions,
ParserRuleContext *outerContext, bool complete);
/**
* Evaluate a semantic context within a specific parser context.
*
* <p>
* This method might not be called for every semantic context evaluated
* during the prediction process. In particular, we currently do not
* evaluate the following but it may change in the future:</p>
*
* <ul>
* <li>Precedence predicates (represented by
* {@link SemanticContext.PrecedencePredicate}) are not currently evaluated
* through this method.</li>
* <li>Operator predicates (represented by {@link SemanticContext.AND} and
* {@link SemanticContext.OR}) are evaluated as a single semantic
* context, rather than evaluating the operands individually.
* Implementations which require evaluation results from individual
* predicates should override this method to explicitly handle evaluation of
* the operands within operator predicates.</li>
* </ul>
*
* @param pred The semantic context to evaluate
* @param parserCallStack The parser context in which to evaluate the
* semantic context
* @param alt The alternative which is guarded by {@code pred}
* @param fullCtx {@code true} if the evaluation is occurring during LL
* prediction; otherwise, {@code false} if the evaluation is occurring
* during SLL prediction
*
* @since 4.3
*/
virtual bool evalSemanticContext(Ref<const SemanticContext> const& pred, ParserRuleContext *parserCallStack,
size_t alt, bool fullCtx);
/* TODO: If we are doing predicates, there is no point in pursuing
closure operations if we reach a DFA state that uniquely predicts
alternative. We will not be caching that DFA state and it is a
waste to pursue the closure. Might have to advance when we do
ambig detection thought :(
*/
virtual void closure(Ref<ATNConfig> const& config, ATNConfigSet *configs, ATNConfig::Set &closureBusy,
bool collectPredicates, bool fullCtx, bool treatEofAsEpsilon);
virtual void closureCheckingStopState(Ref<ATNConfig> const& config, ATNConfigSet *configs, ATNConfig::Set &closureBusy,
bool collectPredicates, bool fullCtx, int depth, bool treatEofAsEpsilon);
/// Do the actual work of walking epsilon edges.
virtual void closure_(Ref<ATNConfig> const& config, ATNConfigSet *configs, ATNConfig::Set &closureBusy,
bool collectPredicates, bool fullCtx, int depth, bool treatEofAsEpsilon);
virtual Ref<ATNConfig> getEpsilonTarget(Ref<ATNConfig> const& config, const Transition *t, bool collectPredicates,
bool inContext, bool fullCtx, bool treatEofAsEpsilon);
virtual Ref<ATNConfig> actionTransition(Ref<ATNConfig> const& config, const ActionTransition *t);
virtual Ref<ATNConfig> predTransition(Ref<ATNConfig> const& config, const PredicateTransition *pt, bool collectPredicates,
bool inContext, bool fullCtx);
virtual Ref<ATNConfig> ruleTransition(Ref<ATNConfig> const& config, const RuleTransition *t);
/**
* Gets a {@link BitSet} containing the alternatives in {@code configs}
* which are part of one or more conflicting alternative subsets.
*
* @param configs The {@link ATNConfigSet} to analyze.
* @return The alternatives in {@code configs} which are part of one or more
* conflicting alternative subsets. If {@code configs} does not contain any
* conflicting subsets, this method returns an empty {@link BitSet}.
*/
virtual antlrcpp::BitSet getConflictingAlts(ATNConfigSet *configs);
/// <summary>
/// Sam pointed out a problem with the previous definition, v3, of
/// ambiguous states. If we have another state associated with conflicting
/// alternatives, we should keep going. For example, the following grammar
///
/// s : (ID | ID ID?) ';' ;
///
/// When the ATN simulation reaches the state before ';', it has a DFA
/// state that looks like: [12|1|[], 6|2|[], 12|2|[]]. Naturally
/// 12|1|[] and 12|2|[] conflict, but we cannot stop processing this node
/// because alternative to has another way to continue, via [6|2|[]].
/// The key is that we have a single state that has config's only associated
/// with a single alternative, 2, and crucially the state transitions
/// among the configurations are all non-epsilon transitions. That means
/// we don't consider any conflicts that include alternative 2. So, we
/// ignore the conflict between alts 1 and 2. We ignore a set of
/// conflicting alts when there is an intersection with an alternative
/// associated with a single alt state in the state->config-list map.
///
/// It's also the case that we might have two conflicting configurations but
/// also a 3rd nonconflicting configuration for a different alternative:
/// [1|1|[], 1|2|[], 8|3|[]]. This can come about from grammar:
///
/// a : A | A | A B ;
///
/// After matching input A, we reach the stop state for rule A, state 1.
/// State 8 is the state right before B. Clearly alternatives 1 and 2
/// conflict and no amount of further lookahead will separate the two.
/// However, alternative 3 will be able to continue and so we do not
/// stop working on this state. In the previous example, we're concerned
/// with states associated with the conflicting alternatives. Here alt
/// 3 is not associated with the conflicting configs, but since we can continue
/// looking for input reasonably, I don't declare the state done. We
/// ignore a set of conflicting alts when we have an alternative
/// that we still need to pursue.
/// </summary>
virtual antlrcpp::BitSet getConflictingAltsOrUniqueAlt(ATNConfigSet *configs);
virtual NoViableAltException noViableAlt(TokenStream *input, ParserRuleContext *outerContext,
ATNConfigSet *configs, size_t startIndex, bool deleteConfigs);
static size_t getUniqueAlt(ATNConfigSet *configs);
/// <summary>
/// Add an edge to the DFA, if possible. This method calls
/// <seealso cref="#addDFAState"/> to ensure the {@code to} state is present in the
/// DFA. If {@code from} is {@code null}, or if {@code t} is outside the
/// range of edges that can be represented in the DFA tables, this method
/// returns without adding the edge to the DFA.
/// <p/>
/// If {@code to} is {@code null}, this method returns {@code null}.
/// Otherwise, this method returns the <seealso cref="DFAState"/> returned by calling
/// <seealso cref="#addDFAState"/> for the {@code to} state.
/// </summary>
/// <param name="dfa"> The DFA </param>
/// <param name="from"> The source state for the edge </param>
/// <param name="t"> The input symbol </param>
/// <param name="to"> The target state for the edge
/// </param>
/// <returns> If {@code to} is {@code null}, this method returns {@code null};
/// otherwise this method returns the result of calling <seealso cref="#addDFAState"/>
/// on {@code to} </returns>
virtual dfa::DFAState *addDFAEdge(dfa::DFA &dfa, dfa::DFAState *from, ssize_t t, dfa::DFAState *to);
/// <summary>
/// Add state {@code D} to the DFA if it is not already present, and return
/// the actual instance stored in the DFA. If a state equivalent to {@code D}
/// is already in the DFA, the existing state is returned. Otherwise this
/// method returns {@code D} after adding it to the DFA.
/// <p/>
/// If {@code D} is <seealso cref="#ERROR"/>, this method returns <seealso cref="#ERROR"/> and
/// does not change the DFA.
/// </summary>
/// <param name="dfa"> The dfa </param>
/// <param name="D"> The DFA state to add </param>
/// <returns> The state stored in the DFA. This will be either the existing
/// state if {@code D} is already in the DFA, or {@code D} itself if the
/// state was not already present. </returns>
virtual dfa::DFAState *addDFAState(dfa::DFA &dfa, dfa::DFAState *D);
virtual void reportAttemptingFullContext(dfa::DFA &dfa, const antlrcpp::BitSet &conflictingAlts,
ATNConfigSet *configs, size_t startIndex, size_t stopIndex);
virtual void reportContextSensitivity(dfa::DFA &dfa, size_t prediction, ATNConfigSet *configs,
size_t startIndex, size_t stopIndex);
/// If context sensitive parsing, we know it's ambiguity not conflict.
virtual void reportAmbiguity(dfa::DFA &dfa,
dfa::DFAState *D, // the DFA state from execATN() that had SLL conflicts
size_t startIndex, size_t stopIndex,
bool exact,
const antlrcpp::BitSet &ambigAlts,
ATNConfigSet *configs); // configs that LL not SLL considered conflicting
private:
// SLL, LL, or LL + exact ambig detection?
PredictionMode _mode;
static bool getLrLoopSetting();
void InitializeInstanceFields();
};
} // namespace atn
} // namespace antlr4

View File

@@ -0,0 +1,50 @@
// Copyright 2012-2022 The ANTLR Project
//
// Redistribution and use in source and binary forms, with or without modification, are permitted
// provided that the following conditions are met:
//
// 1. Redistributions of source code must retain the above copyright notice, this list of conditions
// and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright notice, this list of
// conditions and the following disclaimer in the documentation and/or other materials provided
// with the distribution.
//
// 3. Neither the name of the copyright holder nor the names of its contributors may be used to
// endorse or promote products derived from this software without specific prior written
// permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
// FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
// WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#pragma once
#include "atn/PredictionContextMergeCacheOptions.h"
namespace antlr4 {
namespace atn {
class ANTLR4CPP_PUBLIC ParserATNSimulatorOptions final {
public:
ParserATNSimulatorOptions& setPredictionContextMergeCacheOptions(
PredictionContextMergeCacheOptions predictionContextMergeCacheOptions) {
_predictionContextMergeCacheOptions = std::move(predictionContextMergeCacheOptions);
return *this;
}
const PredictionContextMergeCacheOptions& getPredictionContextMergeCacheOptions() const {
return _predictionContextMergeCacheOptions;
}
private:
PredictionContextMergeCacheOptions _predictionContextMergeCacheOptions;
};
} // namespace atn
} // namespace antlr4

View File

@@ -0,0 +1,29 @@
/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
* Use of this file is governed by the BSD 3-clause license that
* can be found in the LICENSE.txt file in the project root.
*/
#pragma once
#include "atn/BlockStartState.h"
namespace antlr4 {
namespace atn {
/// Start of {@code (A|B|...)+} loop. Technically a decision state, but
/// we don't use for code generation; somebody might need it, so I'm defining
/// it for completeness. In reality, the <seealso cref="PlusLoopbackState"/> node is the
/// real decision-making note for {@code A+}.
class ANTLR4CPP_PUBLIC PlusBlockStartState final : public BlockStartState {
public:
static bool is(const ATNState &atnState) { return atnState.getStateType() == ATNStateType::PLUS_BLOCK_START; }
static bool is(const ATNState *atnState) { return atnState != nullptr && is(*atnState); }
PlusLoopbackState *loopBackState = nullptr;
PlusBlockStartState() : BlockStartState(ATNStateType::PLUS_BLOCK_START) {}
};
} // namespace atn
} // namespace antlr4

View File

@@ -0,0 +1,25 @@
/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
* Use of this file is governed by the BSD 3-clause license that
* can be found in the LICENSE.txt file in the project root.
*/
#pragma once
#include "atn/DecisionState.h"
namespace antlr4 {
namespace atn {
/// Decision state for {@code A+} and {@code (A|B)+}. It has two transitions:
/// one to the loop back to start of the block and one to exit.
class ANTLR4CPP_PUBLIC PlusLoopbackState final : public DecisionState {
public:
static bool is(const ATNState &atnState) { return atnState.getStateType() == ATNStateType::PLUS_LOOP_BACK; }
static bool is(const ATNState *atnState) { return atnState != nullptr && is(*atnState); }
PlusLoopbackState() : DecisionState(ATNStateType::PLUS_LOOP_BACK) {}
};
} // namespace atn
} // namespace antlr4

View File

@@ -0,0 +1,23 @@
/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
* Use of this file is governed by the BSD 3-clause license that
* can be found in the LICENSE.txt file in the project root.
*/
#include "atn/PrecedencePredicateTransition.h"
using namespace antlr4::atn;
PrecedencePredicateTransition::PrecedencePredicateTransition(ATNState *target, int precedence)
: Transition(TransitionType::PRECEDENCE, target), _predicate(std::make_shared<SemanticContext::PrecedencePredicate>(precedence)) {}
bool PrecedencePredicateTransition::isEpsilon() const {
return true;
}
bool PrecedencePredicateTransition::matches(size_t /*symbol*/, size_t /*minVocabSymbol*/, size_t /*maxVocabSymbol*/) const {
return false;
}
std::string PrecedencePredicateTransition::toString() const {
return "PRECEDENCE " + Transition::toString() + " { precedence: " + std::to_string(getPrecedence()) + " }";
}

View File

@@ -0,0 +1,35 @@
/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
* Use of this file is governed by the BSD 3-clause license that
* can be found in the LICENSE.txt file in the project root.
*/
#pragma once
#include "atn/Transition.h"
#include "atn/SemanticContext.h"
namespace antlr4 {
namespace atn {
class ANTLR4CPP_PUBLIC PrecedencePredicateTransition final : public Transition {
public:
static bool is(const Transition &transition) { return transition.getTransitionType() == TransitionType::PRECEDENCE; }
static bool is(const Transition *transition) { return transition != nullptr && is(*transition); }
PrecedencePredicateTransition(ATNState *target, int precedence);
int getPrecedence() const { return _predicate->precedence; }
bool isEpsilon() const override;
bool matches(size_t symbol, size_t minVocabSymbol, size_t maxVocabSymbol) const override;
std::string toString() const override;
const Ref<const SemanticContext::PrecedencePredicate>& getPredicate() const { return _predicate; }
private:
const std::shared_ptr<const SemanticContext::PrecedencePredicate> _predicate;
};
} // namespace atn
} // namespace antlr4

View File

@@ -0,0 +1,17 @@
/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
* Use of this file is governed by the BSD 3-clause license that
* can be found in the LICENSE.txt file in the project root.
*/
#include "SemanticContext.h"
#include "atn/PredicateEvalInfo.h"
using namespace antlr4;
using namespace antlr4::atn;
PredicateEvalInfo::PredicateEvalInfo(size_t decision, TokenStream *input, size_t startIndex, size_t stopIndex,
Ref<const SemanticContext> semctx, bool evalResult, size_t predictedAlt, bool fullCtx)
: DecisionEventInfo(decision, nullptr, input, startIndex, stopIndex, fullCtx),
semctx(std::move(semctx)), predictedAlt(predictedAlt), evalResult(evalResult) {
}

View File

@@ -0,0 +1,62 @@
/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
* Use of this file is governed by the BSD 3-clause license that
* can be found in the LICENSE.txt file in the project root.
*/
#pragma once
#include "atn/DecisionEventInfo.h"
namespace antlr4 {
namespace atn {
/// <summary>
/// This class represents profiling event information for semantic predicate
/// evaluations which occur during prediction.
/// </summary>
/// <seealso cref= ParserATNSimulator#evalSemanticContext
///
/// @since 4.3 </seealso>
class ANTLR4CPP_PUBLIC PredicateEvalInfo : public DecisionEventInfo {
public:
/// The semantic context which was evaluated.
const Ref<const SemanticContext> semctx;
/// <summary>
/// The alternative number for the decision which is guarded by the semantic
/// context <seealso cref="#semctx"/>. Note that other ATN
/// configurations may predict the same alternative which are guarded by
/// other semantic contexts and/or <seealso cref="SemanticContext#NONE"/>.
/// </summary>
const size_t predictedAlt;
/// The result of evaluating the semantic context <seealso cref="#semctx"/>.
const bool evalResult;
/// <summary>
/// Constructs a new instance of the <seealso cref="PredicateEvalInfo"/> class with the
/// specified detailed predicate evaluation information.
/// </summary>
/// <param name="decision"> The decision number </param>
/// <param name="input"> The input token stream </param>
/// <param name="startIndex"> The start index for the current prediction </param>
/// <param name="stopIndex"> The index at which the predicate evaluation was
/// triggered. Note that the input stream may be reset to other positions for
/// the actual evaluation of individual predicates. </param>
/// <param name="semctx"> The semantic context which was evaluated </param>
/// <param name="evalResult"> The results of evaluating the semantic context </param>
/// <param name="predictedAlt"> The alternative number for the decision which is
/// guarded by the semantic context {@code semctx}. See <seealso cref="#predictedAlt"/>
/// for more information. </param>
/// <param name="fullCtx"> {@code true} if the semantic context was
/// evaluated during LL prediction; otherwise, {@code false} if the semantic
/// context was evaluated during SLL prediction
/// </param>
/// <seealso cref= ParserATNSimulator#evalSemanticContext(SemanticContext, ParserRuleContext, int, boolean) </seealso>
/// <seealso cref= SemanticContext#eval(Recognizer, RuleContext) </seealso>
PredicateEvalInfo(size_t decision, TokenStream *input, size_t startIndex, size_t stopIndex,
Ref<const SemanticContext> semctx, bool evalResult, size_t predictedAlt, bool fullCtx);
};
} // namespace atn
} // namespace antlr4

View File

@@ -0,0 +1,24 @@
/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
* Use of this file is governed by the BSD 3-clause license that
* can be found in the LICENSE.txt file in the project root.
*/
#include "atn/PredicateTransition.h"
using namespace antlr4::atn;
PredicateTransition::PredicateTransition(ATNState *target, size_t ruleIndex, size_t predIndex, bool isCtxDependent)
: Transition(TransitionType::PREDICATE, target), _predicate(std::make_shared<SemanticContext::Predicate>(ruleIndex, predIndex, isCtxDependent)) {}
bool PredicateTransition::isEpsilon() const {
return true;
}
bool PredicateTransition::matches(size_t /*symbol*/, size_t /*minVocabSymbol*/, size_t /*maxVocabSymbol*/) const {
return false;
}
std::string PredicateTransition::toString() const {
return "PREDICATE " + Transition::toString() + " { ruleIndex: " + std::to_string(getRuleIndex()) +
", predIndex: " + std::to_string(getPredIndex()) + ", isCtxDependent: " + std::to_string(isCtxDependent()) + " }";
}

View File

@@ -0,0 +1,50 @@
/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
* Use of this file is governed by the BSD 3-clause license that
* can be found in the LICENSE.txt file in the project root.
*/
#pragma once
#include "atn/Transition.h"
#include "atn/SemanticContext.h"
namespace antlr4 {
namespace atn {
/// TODO: this is old comment:
/// A tree of semantic predicates from the grammar AST if label==SEMPRED.
/// In the ATN, labels will always be exactly one predicate, but the DFA
/// may have to combine a bunch of them as it collects predicates from
/// multiple ATN configurations into a single DFA state.
class ANTLR4CPP_PUBLIC PredicateTransition final : public Transition {
public:
static bool is(const Transition &transition) { return transition.getTransitionType() == TransitionType::PREDICATE; }
static bool is(const Transition *transition) { return transition != nullptr && is(*transition); }
PredicateTransition(ATNState *target, size_t ruleIndex, size_t predIndex, bool isCtxDependent);
size_t getRuleIndex() const {
return _predicate->ruleIndex;
}
size_t getPredIndex() const {
return _predicate->predIndex;
}
bool isCtxDependent() const {
return _predicate->isCtxDependent;
}
bool isEpsilon() const override;
bool matches(size_t symbol, size_t minVocabSymbol, size_t maxVocabSymbol) const override;
std::string toString() const override;
const Ref<const SemanticContext::Predicate>& getPredicate() const { return _predicate; }
private:
const std::shared_ptr<const SemanticContext::Predicate> _predicate;
};
} // namespace atn
} // namespace antlr4

View File

@@ -0,0 +1,601 @@
/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
* Use of this file is governed by the BSD 3-clause license that
* can be found in the LICENSE.txt file in the project root.
*/
#include "atn/SingletonPredictionContext.h"
#include "misc/MurmurHash.h"
#include "atn/ArrayPredictionContext.h"
#include "atn/PredictionContextCache.h"
#include "atn/PredictionContextMergeCache.h"
#include "RuleContext.h"
#include "ParserRuleContext.h"
#include "atn/RuleTransition.h"
#include "support/Arrays.h"
#include "support/CPPUtils.h"
#include "support/Casts.h"
#include "atn/PredictionContext.h"
using namespace antlr4;
using namespace antlr4::misc;
using namespace antlr4::atn;
using namespace antlrcpp;
namespace {
void combineCommonParents(std::vector<Ref<const PredictionContext>> &parents) {
std::unordered_set<Ref<const PredictionContext>> uniqueParents;
uniqueParents.reserve(parents.size());
for (const auto &parent : parents) {
uniqueParents.insert(parent);
}
for (auto &parent : parents) {
parent = *uniqueParents.find(parent);
}
}
Ref<const PredictionContext> getCachedContextImpl(const Ref<const PredictionContext> &context,
PredictionContextCache &contextCache,
std::unordered_map<Ref<const PredictionContext>,
Ref<const PredictionContext>> &visited) {
if (context->isEmpty()) {
return context;
}
{
auto iterator = visited.find(context);
if (iterator != visited.end()) {
return iterator->second; // Not necessarly the same as context.
}
}
auto cached = contextCache.get(context);
if (cached) {
visited[context] = cached;
return cached;
}
bool changed = false;
std::vector<Ref<const PredictionContext>> parents(context->size());
for (size_t i = 0; i < parents.size(); i++) {
auto parent = getCachedContextImpl(context->getParent(i), contextCache, visited);
if (changed || parent != context->getParent(i)) {
if (!changed) {
parents.clear();
for (size_t j = 0; j < context->size(); j++) {
parents.push_back(context->getParent(j));
}
changed = true;
}
parents[i] = std::move(parent);
}
}
if (!changed) {
visited[context] = context;
contextCache.put(context);
return context;
}
Ref<const PredictionContext> updated;
if (parents.empty()) {
updated = PredictionContext::EMPTY;
} else if (parents.size() == 1) {
updated = SingletonPredictionContext::create(std::move(parents[0]), context->getReturnState(0));
contextCache.put(updated);
} else {
updated = std::make_shared<ArrayPredictionContext>(std::move(parents), downCast<const ArrayPredictionContext*>(context.get())->returnStates);
contextCache.put(updated);
}
visited[updated] = updated;
visited[context] = updated;
return updated;
}
void getAllContextNodesImpl(const Ref<const PredictionContext> &context,
std::vector<Ref<const PredictionContext>> &nodes,
std::unordered_set<const PredictionContext*> &visited) {
if (visited.find(context.get()) != visited.end()) {
return; // Already done.
}
visited.insert(context.get());
nodes.push_back(context);
for (size_t i = 0; i < context->size(); i++) {
getAllContextNodesImpl(context->getParent(i), nodes, visited);
}
}
size_t insertOrAssignNodeId(std::unordered_map<const PredictionContext*, size_t> &nodeIds, size_t &nodeId, const PredictionContext *node) {
auto existing = nodeIds.find(node);
if (existing != nodeIds.end()) {
return existing->second;
}
return nodeIds.insert({node, nodeId++}).first->second;
}
}
const Ref<const PredictionContext> PredictionContext::EMPTY = std::make_shared<SingletonPredictionContext>(nullptr, PredictionContext::EMPTY_RETURN_STATE);
//----------------- PredictionContext ----------------------------------------------------------------------------------
PredictionContext::PredictionContext(PredictionContextType contextType) : _contextType(contextType), _hashCode(0) {}
PredictionContext::PredictionContext(PredictionContext&& other) : _contextType(other._contextType), _hashCode(other._hashCode.exchange(0, std::memory_order_relaxed)) {}
Ref<const PredictionContext> PredictionContext::fromRuleContext(const ATN &atn, RuleContext *outerContext) {
if (outerContext == nullptr) {
return PredictionContext::EMPTY;
}
// if we are in RuleContext of start rule, s, then PredictionContext
// is EMPTY. Nobody called us. (if we are empty, return empty)
if (outerContext->parent == nullptr || outerContext == &ParserRuleContext::EMPTY) {
return PredictionContext::EMPTY;
}
// If we have a parent, convert it to a PredictionContext graph
auto parent = PredictionContext::fromRuleContext(atn, RuleContext::is(outerContext->parent) ? downCast<RuleContext*>(outerContext->parent) : nullptr);
const auto *transition = downCast<const RuleTransition*>(atn.states[outerContext->invokingState]->transitions[0].get());
return SingletonPredictionContext::create(std::move(parent), transition->followState->stateNumber);
}
bool PredictionContext::hasEmptyPath() const {
// since EMPTY_RETURN_STATE can only appear in the last position, we check last one
return getReturnState(size() - 1) == EMPTY_RETURN_STATE;
}
size_t PredictionContext::hashCode() const {
auto hash = cachedHashCode();
if (hash == 0) {
hash = hashCodeImpl();
if (hash == 0) {
hash = std::numeric_limits<size_t>::max();
}
_hashCode.store(hash, std::memory_order_relaxed);
}
return hash;
}
Ref<const PredictionContext> PredictionContext::merge(Ref<const PredictionContext> a, Ref<const PredictionContext> b,
bool rootIsWildcard, PredictionContextMergeCache *mergeCache) {
assert(a && b);
// share same graph if both same
if (a == b || *a == *b) {
return a;
}
const auto aType = a->getContextType();
const auto bType = b->getContextType();
if (aType == PredictionContextType::SINGLETON && bType == PredictionContextType::SINGLETON) {
return mergeSingletons(std::static_pointer_cast<const SingletonPredictionContext>(std::move(a)),
std::static_pointer_cast<const SingletonPredictionContext>(std::move(b)), rootIsWildcard, mergeCache);
}
// At least one of a or b is array.
// If one is $ and rootIsWildcard, return $ as * wildcard.
if (rootIsWildcard) {
if (a == PredictionContext::EMPTY) {
return a;
}
if (b == PredictionContext::EMPTY) {
return b;
}
}
// convert singleton so both are arrays to normalize
Ref<const ArrayPredictionContext> left;
if (aType == PredictionContextType::SINGLETON) {
left = std::make_shared<ArrayPredictionContext>(downCast<const SingletonPredictionContext&>(*a));
} else {
left = std::static_pointer_cast<const ArrayPredictionContext>(std::move(a));
}
Ref<const ArrayPredictionContext> right;
if (bType == PredictionContextType::SINGLETON) {
right = std::make_shared<ArrayPredictionContext>(downCast<const SingletonPredictionContext&>(*b));
} else {
right = std::static_pointer_cast<const ArrayPredictionContext>(std::move(b));
}
return mergeArrays(std::move(left), std::move(right), rootIsWildcard, mergeCache);
}
Ref<const PredictionContext> PredictionContext::mergeSingletons(Ref<const SingletonPredictionContext> a, Ref<const SingletonPredictionContext> b,
bool rootIsWildcard, PredictionContextMergeCache *mergeCache) {
if (mergeCache) {
auto existing = mergeCache->get(a, b);
if (existing) {
return existing;
}
existing = mergeCache->get(b, a);
if (existing) {
return existing;
}
}
auto rootMerge = mergeRoot(a, b, rootIsWildcard);
if (rootMerge) {
if (mergeCache) {
return mergeCache->put(a, b, std::move(rootMerge));
}
return rootMerge;
}
const auto& parentA = a->parent;
const auto& parentB = b->parent;
if (a->returnState == b->returnState) { // a == b
auto parent = merge(parentA, parentB, rootIsWildcard, mergeCache);
// If parent is same as existing a or b parent or reduced to a parent, return it.
if (parent == parentA) { // ax + bx = ax, if a=b
return a;
}
if (parent == parentB) { // ax + bx = bx, if a=b
return b;
}
// else: ax + ay = a'[x,y]
// merge parents x and y, giving array node with x,y then remainders
// of those graphs. dup a, a' points at merged array
// new joined parent so create new singleton pointing to it, a'
auto c = SingletonPredictionContext::create(std::move(parent), a->returnState);
if (mergeCache) {
return mergeCache->put(a, b, std::move(c));
}
return c;
}
// a != b payloads differ
// see if we can collapse parents due to $+x parents if local ctx
Ref<const PredictionContext> singleParent;
if (a == b || (*parentA == *parentB)) { // ax + bx = [a,b]x
singleParent = parentA;
}
if (singleParent) { // parents are same, sort payloads and use same parent
std::vector<size_t> payloads = { a->returnState, b->returnState };
if (a->returnState > b->returnState) {
payloads[0] = b->returnState;
payloads[1] = a->returnState;
}
std::vector<Ref<const PredictionContext>> parents = { singleParent, singleParent };
auto c = std::make_shared<ArrayPredictionContext>(std::move(parents), std::move(payloads));
if (mergeCache) {
return mergeCache->put(a, b, std::move(c));
}
return c;
}
// parents differ and can't merge them. Just pack together
// into array; can't merge.
// ax + by = [ax,by]
if (a->returnState > b->returnState) { // sort by payload
std::vector<size_t> payloads = { b->returnState, a->returnState };
std::vector<Ref<const PredictionContext>> parents = { b->parent, a->parent };
auto c = std::make_shared<ArrayPredictionContext>(std::move(parents), std::move(payloads));
if (mergeCache) {
return mergeCache->put(a, b, std::move(c));
}
return c;
}
std::vector<size_t> payloads = {a->returnState, b->returnState};
std::vector<Ref<const PredictionContext>> parents = { a->parent, b->parent };
auto c = std::make_shared<ArrayPredictionContext>(std::move(parents), std::move(payloads));
if (mergeCache) {
return mergeCache->put(a, b, std::move(c));
}
return c;
}
Ref<const PredictionContext> PredictionContext::mergeRoot(Ref<const SingletonPredictionContext> a, Ref<const SingletonPredictionContext> b,
bool rootIsWildcard) {
if (rootIsWildcard) {
if (a == EMPTY) { // * + b = *
return EMPTY;
}
if (b == EMPTY) { // a + * = *
return EMPTY;
}
} else {
if (a == EMPTY && b == EMPTY) { // $ + $ = $
return EMPTY;
}
if (a == EMPTY) { // $ + x = [$,x]
std::vector<size_t> payloads = { b->returnState, EMPTY_RETURN_STATE };
std::vector<Ref<const PredictionContext>> parents = { b->parent, nullptr };
return std::make_shared<ArrayPredictionContext>(std::move(parents), std::move(payloads));
}
if (b == EMPTY) { // x + $ = [$,x] ($ is always first if present)
std::vector<size_t> payloads = { a->returnState, EMPTY_RETURN_STATE };
std::vector<Ref<const PredictionContext>> parents = { a->parent, nullptr };
return std::make_shared<ArrayPredictionContext>(std::move(parents), std::move(payloads));
}
}
return nullptr;
}
Ref<const PredictionContext> PredictionContext::mergeArrays(Ref<const ArrayPredictionContext> a, Ref<const ArrayPredictionContext> b,
bool rootIsWildcard, PredictionContextMergeCache *mergeCache) {
if (mergeCache) {
auto existing = mergeCache->get(a, b);
if (existing) {
#if TRACE_ATN_SIM == 1
std::cout << "mergeArrays a=" << a->toString() << ",b=" << b->toString() << " -> previous" << std::endl;
#endif
return existing;
}
existing = mergeCache->get(b, a);
if (existing) {
#if TRACE_ATN_SIM == 1
std::cout << "mergeArrays a=" << a->toString() << ",b=" << b->toString() << " -> previous" << std::endl;
#endif
return existing;
}
}
// merge sorted payloads a + b => M
size_t i = 0; // walks a
size_t j = 0; // walks b
size_t k = 0; // walks target M array
std::vector<size_t> mergedReturnStates(a->returnStates.size() + b->returnStates.size());
std::vector<Ref<const PredictionContext>> mergedParents(a->returnStates.size() + b->returnStates.size());
// walk and merge to yield mergedParents, mergedReturnStates
while (i < a->returnStates.size() && j < b->returnStates.size()) {
const auto& parentA = a->parents[i];
const auto& parentB = b->parents[j];
if (a->returnStates[i] == b->returnStates[j]) {
// same payload (stack tops are equal), must yield merged singleton
size_t payload = a->returnStates[i];
// $+$ = $
bool both$ = payload == EMPTY_RETURN_STATE && !parentA && !parentB;
bool ax_ax = (parentA && parentB) && *parentA == *parentB; // ax+ax -> ax
if (both$ || ax_ax) {
mergedParents[k] = parentA; // choose left
mergedReturnStates[k] = payload;
} else { // ax+ay -> a'[x,y]
mergedParents[k] = merge(parentA, parentB, rootIsWildcard, mergeCache);
mergedReturnStates[k] = payload;
}
i++; // hop over left one as usual
j++; // but also skip one in right side since we merge
} else if (a->returnStates[i] < b->returnStates[j]) { // copy a[i] to M
mergedParents[k] = parentA;
mergedReturnStates[k] = a->returnStates[i];
i++;
} else { // b > a, copy b[j] to M
mergedParents[k] = parentB;
mergedReturnStates[k] = b->returnStates[j];
j++;
}
k++;
}
// copy over any payloads remaining in either array
if (i < a->returnStates.size()) {
for (auto p = i; p < a->returnStates.size(); p++) {
mergedParents[k] = a->parents[p];
mergedReturnStates[k] = a->returnStates[p];
k++;
}
} else {
for (auto p = j; p < b->returnStates.size(); p++) {
mergedParents[k] = b->parents[p];
mergedReturnStates[k] = b->returnStates[p];
k++;
}
}
// trim merged if we combined a few that had same stack tops
if (k < mergedParents.size()) { // write index < last position; trim
if (k == 1) { // for just one merged element, return singleton top
auto c = SingletonPredictionContext::create(std::move(mergedParents[0]), mergedReturnStates[0]);
if (mergeCache) {
return mergeCache->put(a, b, std::move(c));
}
return c;
}
mergedParents.resize(k);
mergedReturnStates.resize(k);
}
ArrayPredictionContext m(std::move(mergedParents), std::move(mergedReturnStates));
// if we created same array as a or b, return that instead
// TODO: track whether this is possible above during merge sort for speed
if (m == *a) {
if (mergeCache) {
#if TRACE_ATN_SIM == 1
std::cout << "mergeArrays a=" << a->toString() << ",b=" << b->toString() << " -> a" << std::endl;
#endif
return mergeCache->put(a, b, a);
}
#if TRACE_ATN_SIM == 1
std::cout << "mergeArrays a=" << a->toString() << ",b=" << b->toString() << " -> a" << std::endl;
#endif
return a;
}
if (m == *b) {
if (mergeCache) {
#if TRACE_ATN_SIM == 1
std::cout << "mergeArrays a=" << a->toString() << ",b=" << b->toString() << " -> b" << std::endl;
#endif
return mergeCache->put(a, b, b);
}
#if TRACE_ATN_SIM == 1
std::cout << "mergeArrays a=" << a->toString() << ",b=" << b->toString() << " -> b" << std::endl;
#endif
return b;
}
combineCommonParents(m.parents);
auto c = std::make_shared<ArrayPredictionContext>(std::move(m));
#if TRACE_ATN_SIM == 1
std::cout << "mergeArrays a=" << a->toString() << ",b=" << b->toString() << " -> " << c->toString() << std::endl;
#endif
if (mergeCache) {
return mergeCache->put(a, b, std::move(c));
}
return c;
}
std::string PredictionContext::toDOTString(const Ref<const PredictionContext> &context) {
if (context == nullptr) {
return "";
}
std::stringstream ss;
ss << "digraph G {\n" << "rankdir=LR;\n";
std::vector<Ref<const PredictionContext>> nodes = getAllContextNodes(context);
std::unordered_map<const PredictionContext*, size_t> nodeIds;
size_t nodeId = 0;
for (const auto &current : nodes) {
if (current->getContextType() == PredictionContextType::SINGLETON) {
std::string s = std::to_string(insertOrAssignNodeId(nodeIds, nodeId, current.get()));
ss << " s" << s;
std::string returnState = std::to_string(current->getReturnState(0));
if (current == PredictionContext::EMPTY) {
returnState = "$";
}
ss << " [label=\"" << returnState << "\"];\n";
continue;
}
Ref<const ArrayPredictionContext> arr = std::static_pointer_cast<const ArrayPredictionContext>(current);
ss << " s" << insertOrAssignNodeId(nodeIds, nodeId, arr.get()) << " [shape=box, label=\"" << "[";
bool first = true;
for (auto inv : arr->returnStates) {
if (!first) {
ss << ", ";
}
if (inv == EMPTY_RETURN_STATE) {
ss << "$";
} else {
ss << inv;
}
first = false;
}
ss << "]";
ss << "\"];\n";
}
for (const auto &current : nodes) {
if (current == EMPTY) {
continue;
}
for (size_t i = 0; i < current->size(); i++) {
if (!current->getParent(i)) {
continue;
}
ss << " s" << insertOrAssignNodeId(nodeIds, nodeId, current.get()) << "->" << "s" << insertOrAssignNodeId(nodeIds, nodeId, current->getParent(i).get());
if (current->size() > 1) {
ss << " [label=\"parent[" << i << "]\"];\n";
} else {
ss << ";\n";
}
}
}
ss << "}\n";
return ss.str();
}
// The "visited" map is just a temporary structure to control the retrieval process (which is recursive).
Ref<const PredictionContext> PredictionContext::getCachedContext(const Ref<const PredictionContext> &context,
PredictionContextCache &contextCache) {
std::unordered_map<Ref<const PredictionContext>, Ref<const PredictionContext>> visited;
return getCachedContextImpl(context, contextCache, visited);
}
std::vector<Ref<const PredictionContext>> PredictionContext::getAllContextNodes(const Ref<const PredictionContext> &context) {
std::vector<Ref<const PredictionContext>> nodes;
std::unordered_set<const PredictionContext*> visited;
getAllContextNodesImpl(context, nodes, visited);
return nodes;
}
std::vector<std::string> PredictionContext::toStrings(Recognizer *recognizer, int currentState) const {
return toStrings(recognizer, EMPTY, currentState);
}
std::vector<std::string> PredictionContext::toStrings(Recognizer *recognizer, const Ref<const PredictionContext> &stop, int currentState) const {
std::vector<std::string> result;
for (size_t perm = 0; ; perm++) {
size_t offset = 0;
bool last = true;
const PredictionContext *p = this;
size_t stateNumber = currentState;
std::stringstream ss;
ss << "[";
bool outerContinue = false;
while (!p->isEmpty() && p != stop.get()) {
size_t index = 0;
if (p->size() > 0) {
size_t bits = 1;
while ((1ULL << bits) < p->size()) {
bits++;
}
size_t mask = (1 << bits) - 1;
index = (perm >> offset) & mask;
last &= index >= p->size() - 1;
if (index >= p->size()) {
outerContinue = true;
break;
}
offset += bits;
}
if (recognizer != nullptr) {
if (ss.tellp() > 1) {
// first char is '[', if more than that this isn't the first rule
ss << ' ';
}
const ATN &atn = recognizer->getATN();
ATNState *s = atn.states[stateNumber];
std::string ruleName = recognizer->getRuleNames()[s->ruleIndex];
ss << ruleName;
} else if (p->getReturnState(index) != EMPTY_RETURN_STATE) {
if (!p->isEmpty()) {
if (ss.tellp() > 1) {
// first char is '[', if more than that this isn't the first rule
ss << ' ';
}
ss << p->getReturnState(index);
}
}
stateNumber = p->getReturnState(index);
p = p->getParent(index).get();
}
if (outerContinue)
continue;
ss << "]";
result.push_back(ss.str());
if (last) {
break;
}
}
return result;
}

View File

@@ -0,0 +1,225 @@
/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
* Use of this file is governed by the BSD 3-clause license that
* can be found in the LICENSE.txt file in the project root.
*/
#pragma once
#include <atomic>
#include "Recognizer.h"
#include "atn/ATN.h"
#include "atn/ATNState.h"
#include "atn/PredictionContextType.h"
namespace antlr4 {
class RuleContext;
namespace atn {
class ATN;
class ArrayPredictionContext;
class SingletonPredictionContext;
class PredictionContextCache;
class PredictionContextMergeCache;
class ANTLR4CPP_PUBLIC PredictionContext {
public:
/// Represents $ in local context prediction, which means wildcard.
/// *+x = *.
static const Ref<const PredictionContext> EMPTY;
/// Represents $ in an array in full context mode, when $
/// doesn't mean wildcard: $ + x = [$,x]. Here,
/// $ = EMPTY_RETURN_STATE.
// ml: originally Integer.MAX_VALUE, which would be -1 for us, but this is already used in places where
// -1 is converted to unsigned, so we use a different value here. Any value does the job provided it doesn't
// conflict with real return states.
static constexpr size_t EMPTY_RETURN_STATE = std::numeric_limits<size_t>::max() - 9;
// dispatch
static Ref<const PredictionContext> merge(Ref<const PredictionContext> a,
Ref<const PredictionContext> b,
bool rootIsWildcard,
PredictionContextMergeCache *mergeCache);
/// <summary>
/// Merge two <seealso cref="SingletonPredictionContext"/> instances.
///
/// <p/>
///
/// Stack tops equal, parents merge is same; return left graph.<br/>
/// <embed src="images/SingletonMerge_SameRootSamePar.svg" type="image/svg+xml"/>
///
/// <p/>
///
/// Same stack top, parents differ; merge parents giving array node, then
/// remainders of those graphs. A new root node is created to point to the
/// merged parents.<br/>
/// <embed src="images/SingletonMerge_SameRootDiffPar.svg" type="image/svg+xml"/>
///
/// <p/>
///
/// Different stack tops pointing to same parent. Make array node for the
/// root where both element in the root point to the same (original)
/// parent.<br/>
/// <embed src="images/SingletonMerge_DiffRootSamePar.svg" type="image/svg+xml"/>
///
/// <p/>
///
/// Different stack tops pointing to different parents. Make array node for
/// the root where each element points to the corresponding original
/// parent.<br/>
/// <embed src="images/SingletonMerge_DiffRootDiffPar.svg" type="image/svg+xml"/>
/// </summary>
/// <param name="a"> the first <seealso cref="SingletonPredictionContext"/> </param>
/// <param name="b"> the second <seealso cref="SingletonPredictionContext"/> </param>
/// <param name="rootIsWildcard"> {@code true} if this is a local-context merge,
/// otherwise false to indicate a full-context merge </param>
/// <param name="mergeCache"> </param>
static Ref<const PredictionContext> mergeSingletons(Ref<const SingletonPredictionContext> a,
Ref<const SingletonPredictionContext> b,
bool rootIsWildcard,
PredictionContextMergeCache *mergeCache);
/**
* Handle case where at least one of {@code a} or {@code b} is
* {@link #EMPTY}. In the following diagrams, the symbol {@code $} is used
* to represent {@link #EMPTY}.
*
* <h2>Local-Context Merges</h2>
*
* <p>These local-context merge operations are used when {@code rootIsWildcard}
* is true.</p>
*
* <p>{@link #EMPTY} is superset of any graph; return {@link #EMPTY}.<br>
* <embed src="images/LocalMerge_EmptyRoot.svg" type="image/svg+xml"/></p>
*
* <p>{@link #EMPTY} and anything is {@code #EMPTY}, so merged parent is
* {@code #EMPTY}; return left graph.<br>
* <embed src="images/LocalMerge_EmptyParent.svg" type="image/svg+xml"/></p>
*
* <p>Special case of last merge if local context.<br>
* <embed src="images/LocalMerge_DiffRoots.svg" type="image/svg+xml"/></p>
*
* <h2>Full-Context Merges</h2>
*
* <p>These full-context merge operations are used when {@code rootIsWildcard}
* is false.</p>
*
* <p><embed src="images/FullMerge_EmptyRoots.svg" type="image/svg+xml"/></p>
*
* <p>Must keep all contexts; {@link #EMPTY} in array is a special value (and
* null parent).<br>
* <embed src="images/FullMerge_EmptyRoot.svg" type="image/svg+xml"/></p>
*
* <p><embed src="images/FullMerge_SameRoot.svg" type="image/svg+xml"/></p>
*
* @param a the first {@link SingletonPredictionContext}
* @param b the second {@link SingletonPredictionContext}
* @param rootIsWildcard {@code true} if this is a local-context merge,
* otherwise false to indicate a full-context merge
*/
static Ref<const PredictionContext> mergeRoot(Ref<const SingletonPredictionContext> a,
Ref<const SingletonPredictionContext> b,
bool rootIsWildcard);
/**
* Merge two {@link ArrayPredictionContext} instances.
*
* <p>Different tops, different parents.<br>
* <embed src="images/ArrayMerge_DiffTopDiffPar.svg" type="image/svg+xml"/></p>
*
* <p>Shared top, same parents.<br>
* <embed src="images/ArrayMerge_ShareTopSamePar.svg" type="image/svg+xml"/></p>
*
* <p>Shared top, different parents.<br>
* <embed src="images/ArrayMerge_ShareTopDiffPar.svg" type="image/svg+xml"/></p>
*
* <p>Shared top, all shared parents.<br>
* <embed src="images/ArrayMerge_ShareTopSharePar.svg" type="image/svg+xml"/></p>
*
* <p>Equal tops, merge parents and reduce top to
* {@link SingletonPredictionContext}.<br>
* <embed src="images/ArrayMerge_EqualTop.svg" type="image/svg+xml"/></p>
*/
static Ref<const PredictionContext> mergeArrays(Ref<const ArrayPredictionContext> a,
Ref<const ArrayPredictionContext> b,
bool rootIsWildcard,
PredictionContextMergeCache *mergeCache);
static std::string toDOTString(const Ref<const PredictionContext> &context);
static Ref<const PredictionContext> getCachedContext(const Ref<const PredictionContext> &context,
PredictionContextCache &contextCache);
static std::vector<Ref<const PredictionContext>> getAllContextNodes(const Ref<const PredictionContext> &context);
/// Convert a RuleContext tree to a PredictionContext graph.
/// Return EMPTY if outerContext is empty.
static Ref<const PredictionContext> fromRuleContext(const ATN &atn, RuleContext *outerContext);
PredictionContext(const PredictionContext&) = delete;
virtual ~PredictionContext() = default;
PredictionContext& operator=(const PredictionContext&) = delete;
PredictionContext& operator=(PredictionContext&&) = delete;
PredictionContextType getContextType() const { return _contextType; }
virtual size_t size() const = 0;
virtual const Ref<const PredictionContext>& getParent(size_t index) const = 0;
virtual size_t getReturnState(size_t index) const = 0;
/// This means only the EMPTY (wildcard? not sure) context is in set.
virtual bool isEmpty() const = 0;
bool hasEmptyPath() const;
size_t hashCode() const;
virtual bool equals(const PredictionContext &other) const = 0;
virtual std::string toString() const = 0;
std::vector<std::string> toStrings(Recognizer *recognizer, int currentState) const;
std::vector<std::string> toStrings(Recognizer *recognizer,
const Ref<const PredictionContext> &stop,
int currentState) const;
protected:
explicit PredictionContext(PredictionContextType contextType);
PredictionContext(PredictionContext&& other);
virtual size_t hashCodeImpl() const = 0;
size_t cachedHashCode() const { return _hashCode.load(std::memory_order_relaxed); }
private:
const PredictionContextType _contextType;
mutable std::atomic<size_t> _hashCode;
};
inline bool operator==(const PredictionContext &lhs, const PredictionContext &rhs) {
return lhs.equals(rhs);
}
inline bool operator!=(const PredictionContext &lhs, const PredictionContext &rhs) {
return !operator==(lhs, rhs);
}
} // namespace atn
} // namespace antlr4
namespace std {
template <>
struct hash<::antlr4::atn::PredictionContext> {
size_t operator()(const ::antlr4::atn::PredictionContext &predictionContext) const {
return predictionContext.hashCode();
}
};
} // namespace std

View File

@@ -0,0 +1,56 @@
// Copyright 2012-2022 The ANTLR Project
//
// Redistribution and use in source and binary forms, with or without modification, are permitted
// provided that the following conditions are met:
//
// 1. Redistributions of source code must retain the above copyright notice, this list of conditions
// and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright notice, this list of
// conditions and the following disclaimer in the documentation and/or other materials provided
// with the distribution.
//
// 3. Neither the name of the copyright holder nor the names of its contributors may be used to
// endorse or promote products derived from this software without specific prior written
// permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
// FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
// WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "atn/PredictionContextCache.h"
using namespace antlr4::atn;
void PredictionContextCache::put(const Ref<const PredictionContext> &value) {
assert(value);
_data.insert(value);
}
Ref<const PredictionContext> PredictionContextCache::get(
const Ref<const PredictionContext> &value) const {
assert(value);
auto iterator = _data.find(value);
if (iterator == _data.end()) {
return nullptr;
}
return *iterator;
}
size_t PredictionContextCache::PredictionContextHasher::operator()(
const Ref<const PredictionContext> &predictionContext) const {
return predictionContext->hashCode();
}
bool PredictionContextCache::PredictionContextComparer::operator()(
const Ref<const PredictionContext> &lhs,
const Ref<const PredictionContext> &rhs) const {
return *lhs == *rhs;
}

View File

@@ -0,0 +1,63 @@
// Copyright 2012-2022 The ANTLR Project
//
// Redistribution and use in source and binary forms, with or without modification, are permitted
// provided that the following conditions are met:
//
// 1. Redistributions of source code must retain the above copyright notice, this list of conditions
// and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright notice, this list of
// conditions and the following disclaimer in the documentation and/or other materials provided
// with the distribution.
//
// 3. Neither the name of the copyright holder nor the names of its contributors may be used to
// endorse or promote products derived from this software without specific prior written
// permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
// FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
// WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#pragma once
#include "atn/PredictionContext.h"
#include "FlatHashSet.h"
namespace antlr4 {
namespace atn {
class ANTLR4CPP_PUBLIC PredictionContextCache final {
public:
PredictionContextCache() = default;
PredictionContextCache(const PredictionContextCache&) = delete;
PredictionContextCache(PredictionContextCache&&) = delete;
PredictionContextCache& operator=(const PredictionContextCache&) = delete;
PredictionContextCache& operator=(PredictionContextCache&&) = delete;
void put(const Ref<const PredictionContext> &value);
Ref<const PredictionContext> get(const Ref<const PredictionContext> &value) const;
private:
struct ANTLR4CPP_PUBLIC PredictionContextHasher final {
size_t operator()(const Ref<const PredictionContext> &predictionContext) const;
};
struct ANTLR4CPP_PUBLIC PredictionContextComparer final {
bool operator()(const Ref<const PredictionContext> &lhs,
const Ref<const PredictionContext> &rhs) const;
};
FlatHashSet<Ref<const PredictionContext>,
PredictionContextHasher, PredictionContextComparer> _data;
};
} // namespace atn
} // namespace antlr4

View File

@@ -0,0 +1,167 @@
// Copyright 2012-2022 The ANTLR Project
//
// Redistribution and use in source and binary forms, with or without modification, are permitted
// provided that the following conditions are met:
//
// 1. Redistributions of source code must retain the above copyright notice, this list of conditions
// and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright notice, this list of
// conditions and the following disclaimer in the documentation and/or other materials provided
// with the distribution.
//
// 3. Neither the name of the copyright holder nor the names of its contributors may be used to
// endorse or promote products derived from this software without specific prior written
// permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
// FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
// WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "atn/PredictionContextMergeCache.h"
#include "misc/MurmurHash.h"
using namespace antlr4::atn;
using namespace antlr4::misc;
PredictionContextMergeCache::PredictionContextMergeCache(
const PredictionContextMergeCacheOptions &options) : _options(options) {}
Ref<const PredictionContext> PredictionContextMergeCache::put(
const Ref<const PredictionContext> &key1,
const Ref<const PredictionContext> &key2,
Ref<const PredictionContext> value) {
assert(key1);
assert(key2);
if (getOptions().getMaxSize() == 0) {
// Cache is effectively disabled.
return value;
}
auto [existing, inserted] = _entries.try_emplace(std::make_pair(key1.get(), key2.get()));
if (inserted) {
try {
existing->second.reset(new Entry());
} catch (...) {
_entries.erase(existing);
throw;
}
existing->second->key = std::make_pair(key1, key2);
existing->second->value = std::move(value);
pushToFront(existing->second.get());
} else {
if (existing->second->value != value) {
existing->second->value = std::move(value);
}
moveToFront(existing->second.get());
}
compact(existing->second.get());
return existing->second->value;
}
Ref<const PredictionContext> PredictionContextMergeCache::get(
const Ref<const PredictionContext> &key1,
const Ref<const PredictionContext> &key2) const {
assert(key1);
assert(key2);
if (getOptions().getMaxSize() == 0) {
// Cache is effectively disabled.
return nullptr;
}
auto iterator = _entries.find(std::make_pair(key1.get(), key2.get()));
if (iterator == _entries.end()) {
return nullptr;
}
moveToFront(iterator->second.get());
return iterator->second->value;
}
void PredictionContextMergeCache::clear() {
Container().swap(_entries);
_head = _tail = nullptr;
_size = 0;
}
void PredictionContextMergeCache::moveToFront(Entry *entry) const {
if (entry->prev == nullptr) {
assert(entry == _head);
return;
}
entry->prev->next = entry->next;
if (entry->next != nullptr) {
entry->next->prev = entry->prev;
} else {
assert(entry == _tail);
_tail = entry->prev;
}
entry->prev = nullptr;
entry->next = _head;
_head->prev = entry;
_head = entry;
assert(entry->prev == nullptr);
}
void PredictionContextMergeCache::pushToFront(Entry *entry) {
++_size;
entry->prev = nullptr;
entry->next = _head;
if (_head != nullptr) {
_head->prev = entry;
_head = entry;
} else {
assert(entry->next == nullptr);
_head = entry;
_tail = entry;
}
assert(entry->prev == nullptr);
}
void PredictionContextMergeCache::remove(Entry *entry) {
if (entry->prev != nullptr) {
entry->prev->next = entry->next;
} else {
assert(entry == _head);
_head = entry->next;
}
if (entry->next != nullptr) {
entry->next->prev = entry->prev;
} else {
assert(entry == _tail);
_tail = entry->prev;
}
--_size;
_entries.erase(std::make_pair(entry->key.first.get(), entry->key.second.get()));
}
void PredictionContextMergeCache::compact(const Entry *preserve) {
Entry *entry = _tail;
while (entry != nullptr && _size > getOptions().getMaxSize()) {
Entry *next = entry->prev;
if (entry != preserve) {
remove(entry);
}
entry = next;
}
}
size_t PredictionContextMergeCache::PredictionContextHasher::operator()(
const PredictionContextPair &value) const {
size_t hash = MurmurHash::initialize();
hash = MurmurHash::update(hash, value.first->hashCode());
hash = MurmurHash::update(hash, value.second->hashCode());
return MurmurHash::finish(hash, 2);
}
bool PredictionContextMergeCache::PredictionContextComparer::operator()(
const PredictionContextPair &lhs, const PredictionContextPair &rhs) const {
return *lhs.first == *rhs.first && *lhs.second == *rhs.second;
}

View File

@@ -0,0 +1,101 @@
// Copyright 2012-2022 The ANTLR Project
//
// Redistribution and use in source and binary forms, with or without modification, are permitted
// provided that the following conditions are met:
//
// 1. Redistributions of source code must retain the above copyright notice, this list of conditions
// and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright notice, this list of
// conditions and the following disclaimer in the documentation and/or other materials provided
// with the distribution.
//
// 3. Neither the name of the copyright holder nor the names of its contributors may be used to
// endorse or promote products derived from this software without specific prior written
// permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
// FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
// WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#pragma once
#include <utility>
#include "atn/PredictionContext.h"
#include "atn/PredictionContextMergeCacheOptions.h"
#include "FlatHashMap.h"
namespace antlr4 {
namespace atn {
class ANTLR4CPP_PUBLIC PredictionContextMergeCache final {
public:
PredictionContextMergeCache()
: PredictionContextMergeCache(PredictionContextMergeCacheOptions()) {}
explicit PredictionContextMergeCache(const PredictionContextMergeCacheOptions &options);
PredictionContextMergeCache(const PredictionContextMergeCache&) = delete;
PredictionContextMergeCache(PredictionContextMergeCache&&) = delete;
PredictionContextMergeCache& operator=(const PredictionContextMergeCache&) = delete;
PredictionContextMergeCache& operator=(PredictionContextMergeCache&&) = delete;
Ref<const PredictionContext> put(const Ref<const PredictionContext> &key1,
const Ref<const PredictionContext> &key2,
Ref<const PredictionContext> value);
Ref<const PredictionContext> get(const Ref<const PredictionContext> &key1,
const Ref<const PredictionContext> &key2) const;
const PredictionContextMergeCacheOptions& getOptions() const { return _options; }
void clear();
private:
using PredictionContextPair = std::pair<const PredictionContext*, const PredictionContext*>;
struct ANTLR4CPP_PUBLIC PredictionContextHasher final {
size_t operator()(const PredictionContextPair &value) const;
};
struct ANTLR4CPP_PUBLIC PredictionContextComparer final {
bool operator()(const PredictionContextPair &lhs, const PredictionContextPair &rhs) const;
};
struct ANTLR4CPP_PUBLIC Entry final {
std::pair<Ref<const PredictionContext>, Ref<const PredictionContext>> key;
Ref<const PredictionContext> value;
Entry *prev = nullptr;
Entry *next = nullptr;
};
void moveToFront(Entry *entry) const;
void pushToFront(Entry *entry);
void remove(Entry *entry);
void compact(const Entry *preserve);
using Container = FlatHashMap<PredictionContextPair, std::unique_ptr<Entry>,
PredictionContextHasher, PredictionContextComparer>;
const PredictionContextMergeCacheOptions _options;
Container _entries;
mutable Entry *_head = nullptr;
mutable Entry *_tail = nullptr;
size_t _size = 0;
};
} // namespace atn
} // namespace antlr4

View File

@@ -0,0 +1,71 @@
// Copyright 2012-2022 The ANTLR Project
//
// Redistribution and use in source and binary forms, with or without modification, are permitted
// provided that the following conditions are met:
//
// 1. Redistributions of source code must retain the above copyright notice, this list of conditions
// and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright notice, this list of
// conditions and the following disclaimer in the documentation and/or other materials provided
// with the distribution.
//
// 3. Neither the name of the copyright holder nor the names of its contributors may be used to
// endorse or promote products derived from this software without specific prior written
// permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
// FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
// WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#pragma once
#include <cstddef>
#include <cstdint>
#include <limits>
#include "antlr4-common.h"
namespace antlr4 {
namespace atn {
class ANTLR4CPP_PUBLIC PredictionContextMergeCacheOptions final {
public:
PredictionContextMergeCacheOptions() = default;
size_t getMaxSize() const { return _maxSize; }
bool hasMaxSize() const { return getMaxSize() != std::numeric_limits<size_t>::max(); }
PredictionContextMergeCacheOptions& setMaxSize(size_t maxSize) {
_maxSize = maxSize;
return *this;
}
size_t getClearEveryN() const {
return _clearEveryN;
}
bool hasClearEveryN() const { return getClearEveryN() != 0; }
PredictionContextMergeCacheOptions& setClearEveryN(uint64_t clearEveryN) {
_clearEveryN = clearEveryN;
return *this;
}
PredictionContextMergeCacheOptions& neverClear() {
return setClearEveryN(0);
}
private:
size_t _maxSize = std::numeric_limits<size_t>::max();
uint64_t _clearEveryN = 1;
};
} // namespace atn
} // namespace antlr4

View File

@@ -0,0 +1,21 @@
/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
* Use of this file is governed by the BSD 3-clause license that
* can be found in the LICENSE.txt file in the project root.
*/
#pragma once
#include <cstddef>
#include "antlr4-common.h"
namespace antlr4 {
namespace atn {
enum class PredictionContextType : size_t {
SINGLETON = 1,
ARRAY = 2,
};
} // namespace atn
} // namespace antlr4

View File

@@ -0,0 +1,202 @@
/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
* Use of this file is governed by the BSD 3-clause license that
* can be found in the LICENSE.txt file in the project root.
*/
#include "atn/RuleStopState.h"
#include "atn/ATNConfigSet.h"
#include "atn/ATNConfig.h"
#include "misc/MurmurHash.h"
#include "SemanticContext.h"
#include "PredictionMode.h"
using namespace antlr4;
using namespace antlr4::atn;
using namespace antlrcpp;
struct AltAndContextConfigHasher
{
/**
* The hash code is only a function of the {@link ATNState#stateNumber}
* and {@link ATNConfig#context}.
*/
size_t operator () (ATNConfig *o) const {
size_t hashCode = misc::MurmurHash::initialize(7);
hashCode = misc::MurmurHash::update(hashCode, o->state->stateNumber);
hashCode = misc::MurmurHash::update(hashCode, o->context);
return misc::MurmurHash::finish(hashCode, 2);
}
};
struct AltAndContextConfigComparer {
bool operator()(ATNConfig *a, ATNConfig *b) const
{
if (a == b) {
return true;
}
return a->state->stateNumber == b->state->stateNumber && *a->context == *b->context;
}
};
bool PredictionModeClass::hasSLLConflictTerminatingPrediction(PredictionMode mode, ATNConfigSet *configs) {
/* Configs in rule stop states indicate reaching the end of the decision
* rule (local context) or end of start rule (full context). If all
* configs meet this condition, then none of the configurations is able
* to match additional input so we terminate prediction.
*/
if (allConfigsInRuleStopStates(configs)) {
return true;
}
bool heuristic;
// Pure SLL mode parsing or SLL+LL if:
// Don't bother with combining configs from different semantic
// contexts if we can fail over to full LL; costs more time
// since we'll often fail over anyway.
if (mode == PredictionMode::SLL || !configs->hasSemanticContext) {
std::vector<antlrcpp::BitSet> altsets = getConflictingAltSubsets(configs);
heuristic = hasConflictingAltSet(altsets) && !hasStateAssociatedWithOneAlt(configs);
} else {
// dup configs, tossing out semantic predicates
ATNConfigSet dup(true);
for (auto &config : configs->configs) {
Ref<ATNConfig> c = std::make_shared<ATNConfig>(*config, SemanticContext::Empty::Instance);
dup.add(c);
}
std::vector<antlrcpp::BitSet> altsets = getConflictingAltSubsets(&dup);
heuristic = hasConflictingAltSet(altsets) && !hasStateAssociatedWithOneAlt(&dup);
}
return heuristic;
}
bool PredictionModeClass::hasConfigInRuleStopState(ATNConfigSet *configs) {
for (const auto &config : configs->configs) {
if (RuleStopState::is(config->state)) {
return true;
}
}
return false;
}
bool PredictionModeClass::allConfigsInRuleStopStates(ATNConfigSet *configs) {
for (const auto &config : configs->configs) {
if (!RuleStopState::is(config->state)) {
return false;
}
}
return true;
}
size_t PredictionModeClass::resolvesToJustOneViableAlt(const std::vector<antlrcpp::BitSet>& altsets) {
return getSingleViableAlt(altsets);
}
bool PredictionModeClass::allSubsetsConflict(const std::vector<antlrcpp::BitSet>& altsets) {
return !hasNonConflictingAltSet(altsets);
}
bool PredictionModeClass::hasNonConflictingAltSet(const std::vector<antlrcpp::BitSet>& altsets) {
for (antlrcpp::BitSet alts : altsets) {
if (alts.count() == 1) {
return true;
}
}
return false;
}
bool PredictionModeClass::hasConflictingAltSet(const std::vector<antlrcpp::BitSet>& altsets) {
for (antlrcpp::BitSet alts : altsets) {
if (alts.count() > 1) {
return true;
}
}
return false;
}
bool PredictionModeClass::allSubsetsEqual(const std::vector<antlrcpp::BitSet>& altsets) {
if (altsets.empty()) {
return true;
}
const antlrcpp::BitSet& first = *altsets.begin();
for (const antlrcpp::BitSet& alts : altsets) {
if (alts != first) {
return false;
}
}
return true;
}
size_t PredictionModeClass::getUniqueAlt(const std::vector<antlrcpp::BitSet>& altsets) {
antlrcpp::BitSet all = getAlts(altsets);
if (all.count() == 1) {
return all.nextSetBit(0);
}
return ATN::INVALID_ALT_NUMBER;
}
antlrcpp::BitSet PredictionModeClass::getAlts(const std::vector<antlrcpp::BitSet>& altsets) {
antlrcpp::BitSet all;
for (const auto &alts : altsets) {
all |= alts;
}
return all;
}
antlrcpp::BitSet PredictionModeClass::getAlts(ATNConfigSet *configs) {
antlrcpp::BitSet alts;
for (const auto &config : configs->configs) {
alts.set(config->alt);
}
return alts;
}
std::vector<antlrcpp::BitSet> PredictionModeClass::getConflictingAltSubsets(ATNConfigSet *configs) {
std::unordered_map<ATNConfig*, antlrcpp::BitSet, AltAndContextConfigHasher, AltAndContextConfigComparer> configToAlts;
for (auto &config : configs->configs) {
configToAlts[config.get()].set(config->alt);
}
std::vector<antlrcpp::BitSet> values;
values.reserve(configToAlts.size());
for (const auto &pair : configToAlts) {
values.push_back(pair.second);
}
return values;
}
std::unordered_map<ATNState*, antlrcpp::BitSet> PredictionModeClass::getStateToAltMap(ATNConfigSet *configs) {
std::unordered_map<ATNState*, antlrcpp::BitSet> m;
for (const auto &c : configs->configs) {
m[c->state].set(c->alt);
}
return m;
}
bool PredictionModeClass::hasStateAssociatedWithOneAlt(ATNConfigSet *configs) {
auto x = getStateToAltMap(configs);
for (const auto &pair : x){
if (pair.second.count() == 1) return true;
}
return false;
}
size_t PredictionModeClass::getSingleViableAlt(const std::vector<antlrcpp::BitSet>& altsets) {
antlrcpp::BitSet viableAlts;
for (const auto &alts : altsets) {
size_t minAlt = alts.nextSetBit(0);
viableAlts.set(minAlt);
if (viableAlts.count() > 1) // more than 1 viable alt
{
return ATN::INVALID_ALT_NUMBER;
}
}
return viableAlts.nextSetBit(0);
}

Some files were not shown because too many files have changed in this diff Show More