Initial commit from sysy-main

This commit is contained in:
Lixuanwang
2025-02-27 23:14:53 +08:00
commit cc523fd30b
1125 changed files with 257793 additions and 0 deletions

View File

@@ -0,0 +1,124 @@
/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
* Use of this file is governed by the BSD 3-clause license that
* can be found in the LICENSE.txt file in the project root.
*/
#include "atn/ATN.h"
#include "atn/ATNDeserializer.h"
#include "Vocabulary.h"
#include "misc/InterpreterDataReader.h"
using namespace antlr4::dfa;
using namespace antlr4::atn;
using namespace antlr4::misc;
InterpreterData::InterpreterData(std::vector<std::string> const& literalNames, std::vector<std::string> const& symbolicNames)
: vocabulary(literalNames, symbolicNames) {
}
InterpreterData InterpreterDataReader::parseFile(std::string const& fileName) {
// The structure of the data file is very simple. Everything is line based with empty lines
// separating the different parts. For lexers the layout is:
// token literal names:
// ...
//
// token symbolic names:
// ...
//
// rule names:
// ...
//
// channel names:
// ...
//
// mode names:
// ...
//
// atn:
// <a single line with comma separated int values> enclosed in a pair of squared brackets.
//
// Data for a parser does not contain channel and mode names.
std::ifstream input(fileName);
if (!input.good())
return {};
std::vector<std::string> literalNames;
std::vector<std::string> symbolicNames;
std::string line;
std::getline(input, line, '\n');
assert(line == "token literal names:");
while (true) {
std::getline(input, line, '\n');
if (line.empty())
break;
literalNames.push_back(line == "null" ? "" : line);
};
std::getline(input, line, '\n');
assert(line == "token symbolic names:");
while (true) {
std::getline(input, line, '\n');
if (line.empty())
break;
symbolicNames.push_back(line == "null" ? "" : line);
};
InterpreterData result(literalNames, symbolicNames);
std::getline(input, line, '\n');
assert(line == "rule names:");
while (true) {
std::getline(input, line, '\n');
if (line.empty())
break;
result.ruleNames.push_back(line);
};
std::getline(input, line, '\n');
if (line == "channel names:") {
while (true) {
std::getline(input, line, '\n');
if (line.empty())
break;
result.channels.push_back(line);
};
std::getline(input, line, '\n');
assert(line == "mode names:");
while (true) {
std::getline(input, line, '\n');
if (line.empty())
break;
result.modes.push_back(line);
};
}
std::vector<int32_t> serializedATN;
std::getline(input, line, '\n');
assert(line == "atn:");
std::getline(input, line, '\n');
std::stringstream tokenizer(line);
std::string value;
while (tokenizer.good()) {
std::getline(tokenizer, value, ',');
unsigned long number;
if (value[0] == '[')
number = std::strtoul(&value[1], nullptr, 10);
else
number = std::strtoul(value.c_str(), nullptr, 10);
serializedATN.push_back(static_cast<int32_t>(number));
}
ATNDeserializer deserializer;
result.atn = deserializer.deserialize(serializedATN);
return result;
}

View File

@@ -0,0 +1,33 @@
/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
* Use of this file is governed by the BSD 3-clause license that
* can be found in the LICENSE.txt file in the project root.
*/
#pragma once
#include "antlr4-common.h"
#include "atn/ATN.h"
#include "Vocabulary.h"
namespace antlr4 {
namespace misc {
struct InterpreterData {
std::unique_ptr<atn::ATN> atn;
dfa::Vocabulary vocabulary;
std::vector<std::string> ruleNames;
std::vector<std::string> channels; // Only valid for lexer grammars.
std::vector<std::string> modes; // ditto
InterpreterData() {}; // For invalid content.
InterpreterData(std::vector<std::string> const& literalNames, std::vector<std::string> const& symbolicNames);
};
// A class to read plain text interpreter data produced by ANTLR.
class ANTLR4CPP_PUBLIC InterpreterDataReader {
public:
static InterpreterData parseFile(std::string const& fileName);
};
} // namespace atn
} // namespace antlr4

View File

@@ -0,0 +1,61 @@
/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
* Use of this file is governed by the BSD 3-clause license that
* can be found in the LICENSE.txt file in the project root.
*/
#include "misc/Interval.h"
using namespace antlr4::misc;
const Interval Interval::INVALID;
size_t Interval::hashCode() const {
size_t hash = 23;
hash = hash * 31 + static_cast<size_t>(a);
hash = hash * 31 + static_cast<size_t>(b);
return hash;
}
bool Interval::startsBeforeDisjoint(const Interval &other) const {
return a < other.a && b < other.a;
}
bool Interval::startsBeforeNonDisjoint(const Interval &other) const {
return a <= other.a && b >= other.a;
}
bool Interval::startsAfter(const Interval &other) const {
return a > other.a;
}
bool Interval::startsAfterDisjoint(const Interval &other) const {
return a > other.b;
}
bool Interval::startsAfterNonDisjoint(const Interval &other) const {
return a > other.a && a <= other.b; // b >= other.b implied
}
bool Interval::disjoint(const Interval &other) const {
return startsBeforeDisjoint(other) || startsAfterDisjoint(other);
}
bool Interval::adjacent(const Interval &other) const {
return a == other.b + 1 || b == other.a - 1;
}
bool Interval::properlyContains(const Interval &other) const {
return other.a >= a && other.b <= b;
}
Interval Interval::Union(const Interval &other) const {
return Interval(std::min(a, other.a), std::max(b, other.b));
}
Interval Interval::intersection(const Interval &other) const {
return Interval(std::max(a, other.a), std::min(b, other.b));
}
std::string Interval::toString() const {
return std::to_string(a) + ".." + std::to_string(b);
}

View File

@@ -0,0 +1,84 @@
/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
* Use of this file is governed by the BSD 3-clause license that
* can be found in the LICENSE.txt file in the project root.
*/
#pragma once
#include "antlr4-common.h"
namespace antlr4 {
namespace misc {
// Helpers to convert certain unsigned symbols (e.g. Token::EOF) to their original numeric value (e.g. -1)
// and vice versa. This is needed mostly for intervals to keep their original order and for toString()
// methods to print the original numeric value (e.g. for tests).
constexpr size_t numericToSymbol(ssize_t v) { return static_cast<size_t>(v); }
constexpr ssize_t symbolToNumeric(size_t v) { return static_cast<ssize_t>(v); }
/// An immutable inclusive interval a..b
class ANTLR4CPP_PUBLIC Interval final {
public:
static const Interval INVALID;
// Must stay signed to guarantee the correct sort order.
ssize_t a;
ssize_t b;
constexpr Interval() : Interval(static_cast<ssize_t>(-1), static_cast<ssize_t>(-2)) {}
constexpr explicit Interval(size_t a_, size_t b_) : Interval(symbolToNumeric(a_), symbolToNumeric(b_)) {}
constexpr Interval(ssize_t a_, ssize_t b_) : a(a_), b(b_) {}
/// return number of elements between a and b inclusively. x..x is length 1.
/// if b < a, then length is 0. 9..10 has length 2.
constexpr size_t length() const { return b >= a ? static_cast<size_t>(b - a + 1) : 0; }
constexpr bool operator==(const Interval &other) const { return a == other.a && b == other.b; }
size_t hashCode() const;
/// <summary>
/// Does this start completely before other? Disjoint </summary>
bool startsBeforeDisjoint(const Interval &other) const;
/// <summary>
/// Does this start at or before other? Nondisjoint </summary>
bool startsBeforeNonDisjoint(const Interval &other) const;
/// <summary>
/// Does this.a start after other.b? May or may not be disjoint </summary>
bool startsAfter(const Interval &other) const;
/// <summary>
/// Does this start completely after other? Disjoint </summary>
bool startsAfterDisjoint(const Interval &other) const;
/// <summary>
/// Does this start after other? NonDisjoint </summary>
bool startsAfterNonDisjoint(const Interval &other) const;
/// <summary>
/// Are both ranges disjoint? I.e., no overlap? </summary>
bool disjoint(const Interval &other) const;
/// <summary>
/// Are two intervals adjacent such as 0..41 and 42..42? </summary>
bool adjacent(const Interval &other) const;
bool properlyContains(const Interval &other) const;
/// <summary>
/// Return the interval computed from combining this and other </summary>
Interval Union(const Interval &other) const;
/// <summary>
/// Return the interval in common between this and o </summary>
Interval intersection(const Interval &other) const;
std::string toString() const;
};
} // namespace atn
} // namespace antlr4

View File

@@ -0,0 +1,508 @@
/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
* Use of this file is governed by the BSD 3-clause license that
* can be found in the LICENSE.txt file in the project root.
*/
#include "misc/MurmurHash.h"
#include "Lexer.h"
#include "Exceptions.h"
#include "Vocabulary.h"
#include "misc/IntervalSet.h"
using namespace antlr4;
using namespace antlr4::misc;
IntervalSet const IntervalSet::COMPLETE_CHAR_SET =
IntervalSet::of(Lexer::MIN_CHAR_VALUE, Lexer::MAX_CHAR_VALUE);
IntervalSet const IntervalSet::EMPTY_SET;
IntervalSet::IntervalSet() : _intervals() {
}
IntervalSet::IntervalSet(const IntervalSet &set) : IntervalSet() {
_intervals = set._intervals;
}
IntervalSet::IntervalSet(IntervalSet&& set) : IntervalSet(std::move(set._intervals)) {
}
IntervalSet::IntervalSet(std::vector<Interval>&& intervals) : _intervals(std::move(intervals)) {
}
IntervalSet& IntervalSet::operator=(const IntervalSet& other) {
_intervals = other._intervals;
return *this;
}
IntervalSet& IntervalSet::operator=(IntervalSet&& other) {
_intervals = std::move(other._intervals);
return *this;
}
IntervalSet IntervalSet::of(ssize_t a) {
return IntervalSet({ Interval(a, a) });
}
IntervalSet IntervalSet::of(ssize_t a, ssize_t b) {
return IntervalSet({ Interval(a, b) });
}
void IntervalSet::clear() {
_intervals.clear();
}
void IntervalSet::add(ssize_t el) {
add(el, el);
}
void IntervalSet::add(ssize_t a, ssize_t b) {
add(Interval(a, b));
}
void IntervalSet::add(const Interval &addition) {
if (addition.b < addition.a) {
return;
}
// find position in list
for (auto iterator = _intervals.begin(); iterator != _intervals.end(); ++iterator) {
Interval r = *iterator;
if (addition == r) {
return;
}
if (addition.adjacent(r) || !addition.disjoint(r)) {
// next to each other, make a single larger interval
Interval bigger = addition.Union(r);
*iterator = bigger;
// make sure we didn't just create an interval that
// should be merged with next interval in list
while (iterator + 1 != _intervals.end()) {
Interval next = *++iterator;
if (!bigger.adjacent(next) && bigger.disjoint(next)) {
break;
}
// if we bump up against or overlap next, merge
iterator = _intervals.erase(iterator);// remove this one
--iterator; // move backwards to what we just set
*iterator = bigger.Union(next); // set to 3 merged ones
// ml: no need to advance iterator, we do that in the next round anyway. ++iterator; // first call to next after previous duplicates the result
}
return;
}
if (addition.startsBeforeDisjoint(r)) {
// insert before r
//--iterator;
_intervals.insert(iterator, addition);
return;
}
// if disjoint and after r, a future iteration will handle it
}
// ok, must be after last interval (and disjoint from last interval)
// just add it
_intervals.push_back(addition);
}
IntervalSet IntervalSet::Or(const std::vector<IntervalSet> &sets) {
IntervalSet result;
for (const auto &s : sets) {
result.addAll(s);
}
return result;
}
IntervalSet& IntervalSet::addAll(const IntervalSet &set) {
// walk set and add each interval
for (auto const& interval : set._intervals) {
add(interval);
}
return *this;
}
IntervalSet IntervalSet::complement(ssize_t minElement, ssize_t maxElement) const {
return complement(IntervalSet::of(minElement, maxElement));
}
IntervalSet IntervalSet::complement(const IntervalSet &vocabulary) const {
return vocabulary.subtract(*this);
}
IntervalSet IntervalSet::subtract(const IntervalSet &other) const {
return subtract(*this, other);
}
IntervalSet IntervalSet::subtract(const IntervalSet &left, const IntervalSet &right) {
if (left.isEmpty()) {
return IntervalSet();
}
if (right.isEmpty()) {
// right set has no elements; just return the copy of the current set
return left;
}
IntervalSet result(left);
size_t resultI = 0;
size_t rightI = 0;
while (resultI < result._intervals.size() && rightI < right._intervals.size()) {
Interval &resultInterval = result._intervals[resultI];
const Interval &rightInterval = right._intervals[rightI];
// operation: (resultInterval - rightInterval) and update indexes
if (rightInterval.b < resultInterval.a) {
rightI++;
continue;
}
if (rightInterval.a > resultInterval.b) {
resultI++;
continue;
}
Interval beforeCurrent;
Interval afterCurrent;
if (rightInterval.a > resultInterval.a) {
beforeCurrent = Interval(resultInterval.a, rightInterval.a - 1);
}
if (rightInterval.b < resultInterval.b) {
afterCurrent = Interval(rightInterval.b + 1, resultInterval.b);
}
if (beforeCurrent.a > -1) { // -1 is the default value
if (afterCurrent.a > -1) {
// split the current interval into two
result._intervals[resultI] = beforeCurrent;
result._intervals.insert(result._intervals.begin() + resultI + 1, afterCurrent);
resultI++;
rightI++;
} else {
// replace the current interval
result._intervals[resultI] = beforeCurrent;
resultI++;
}
} else {
if (afterCurrent.a > -1) {
// replace the current interval
result._intervals[resultI] = afterCurrent;
rightI++;
} else {
// remove the current interval (thus no need to increment resultI)
result._intervals.erase(result._intervals.begin() + resultI);
}
}
}
// If rightI reached right.intervals.size(), no more intervals to subtract from result.
// If resultI reached result.intervals.size(), we would be subtracting from an empty set.
// Either way, we are done.
return result;
}
IntervalSet IntervalSet::Or(const IntervalSet &a) const {
IntervalSet result;
result.addAll(*this);
result.addAll(a);
return result;
}
IntervalSet IntervalSet::And(const IntervalSet &other) const {
IntervalSet intersection;
size_t i = 0;
size_t j = 0;
// iterate down both interval lists looking for nondisjoint intervals
while (i < _intervals.size() && j < other._intervals.size()) {
Interval mine = _intervals[i];
Interval theirs = other._intervals[j];
if (mine.startsBeforeDisjoint(theirs)) {
// move this iterator looking for interval that might overlap
i++;
} else if (theirs.startsBeforeDisjoint(mine)) {
// move other iterator looking for interval that might overlap
j++;
} else if (mine.properlyContains(theirs)) {
// overlap, add intersection, get next theirs
intersection.add(mine.intersection(theirs));
j++;
} else if (theirs.properlyContains(mine)) {
// overlap, add intersection, get next mine
intersection.add(mine.intersection(theirs));
i++;
} else if (!mine.disjoint(theirs)) {
// overlap, add intersection
intersection.add(mine.intersection(theirs));
// Move the iterator of lower range [a..b], but not
// the upper range as it may contain elements that will collide
// with the next iterator. So, if mine=[0..115] and
// theirs=[115..200], then intersection is 115 and move mine
// but not theirs as theirs may collide with the next range
// in thisIter.
// move both iterators to next ranges
if (mine.startsAfterNonDisjoint(theirs)) {
j++;
} else if (theirs.startsAfterNonDisjoint(mine)) {
i++;
}
}
}
return intersection;
}
bool IntervalSet::contains(size_t el) const {
return contains(symbolToNumeric(el));
}
bool IntervalSet::contains(ssize_t el) const {
if (_intervals.empty() || el < _intervals.front().a || el > _intervals.back().b) {
return false;
}
return std::binary_search(_intervals.begin(), _intervals.end(), Interval(el, el), [](const Interval &lhs, const Interval &rhs) {
return lhs.b < rhs.a;
});
}
bool IntervalSet::isEmpty() const {
return _intervals.empty();
}
ssize_t IntervalSet::getSingleElement() const {
if (_intervals.size() == 1) {
if (_intervals[0].a == _intervals[0].b) {
return _intervals[0].a;
}
}
return Token::INVALID_TYPE; // XXX: this value is 0, but 0 is a valid interval range, how can that work?
}
ssize_t IntervalSet::getMaxElement() const {
if (_intervals.empty()) {
return Token::INVALID_TYPE;
}
return _intervals.back().b;
}
ssize_t IntervalSet::getMinElement() const {
if (_intervals.empty()) {
return Token::INVALID_TYPE;
}
return _intervals.front().a;
}
std::vector<Interval> const& IntervalSet::getIntervals() const {
return _intervals;
}
size_t IntervalSet::hashCode() const {
size_t hash = MurmurHash::initialize();
for (const auto &interval : _intervals) {
hash = MurmurHash::update(hash, interval.a);
hash = MurmurHash::update(hash, interval.b);
}
return MurmurHash::finish(hash, _intervals.size() * 2);
}
bool IntervalSet::operator == (const IntervalSet &other) const {
if (_intervals.empty() && other._intervals.empty())
return true;
if (_intervals.size() != other._intervals.size())
return false;
return std::equal(_intervals.begin(), _intervals.end(), other._intervals.begin());
}
std::string IntervalSet::toString() const {
return toString(false);
}
std::string IntervalSet::toString(bool elemAreChar) const {
if (_intervals.empty()) {
return "{}";
}
std::stringstream ss;
size_t effectiveSize = size();
if (effectiveSize > 1) {
ss << "{";
}
bool firstEntry = true;
for (const auto &interval : _intervals) {
if (!firstEntry)
ss << ", ";
firstEntry = false;
ssize_t a = interval.a;
ssize_t b = interval.b;
if (a == b) {
if (a == -1) {
ss << "<EOF>";
} else if (elemAreChar) {
ss << "'" << static_cast<char>(a) << "'";
} else {
ss << a;
}
} else {
if (elemAreChar) {
ss << "'" << static_cast<char>(a) << "'..'" << static_cast<char>(b) << "'";
} else {
ss << a << ".." << b;
}
}
}
if (effectiveSize > 1) {
ss << "}";
}
return ss.str();
}
std::string IntervalSet::toString(const dfa::Vocabulary &vocabulary) const {
if (_intervals.empty()) {
return "{}";
}
std::stringstream ss;
size_t effectiveSize = size();
if (effectiveSize > 1) {
ss << "{";
}
bool firstEntry = true;
for (const auto &interval : _intervals) {
if (!firstEntry)
ss << ", ";
firstEntry = false;
ssize_t a = interval.a;
ssize_t b = interval.b;
if (a == b) {
ss << elementName(vocabulary, a);
} else {
for (ssize_t i = a; i <= b; i++) {
if (i > a) {
ss << ", ";
}
ss << elementName(vocabulary, i);
}
}
}
if (effectiveSize > 1) {
ss << "}";
}
return ss.str();
}
std::string IntervalSet::elementName(const dfa::Vocabulary &vocabulary, ssize_t a) const {
if (a == -1) {
return "<EOF>";
} else if (a == -2) {
return "<EPSILON>";
} else {
return vocabulary.getDisplayName(a);
}
}
size_t IntervalSet::size() const {
size_t result = 0;
for (const auto &interval : _intervals) {
result += size_t(interval.b - interval.a + 1);
}
return result;
}
std::vector<ssize_t> IntervalSet::toList() const {
std::vector<ssize_t> result;
for (const auto &interval : _intervals) {
ssize_t a = interval.a;
ssize_t b = interval.b;
for (ssize_t v = a; v <= b; v++) {
result.push_back(v);
}
}
return result;
}
std::set<ssize_t> IntervalSet::toSet() const {
std::set<ssize_t> result;
for (const auto &interval : _intervals) {
ssize_t a = interval.a;
ssize_t b = interval.b;
for (ssize_t v = a; v <= b; v++) {
result.insert(v);
}
}
return result;
}
ssize_t IntervalSet::get(size_t i) const {
size_t index = 0;
for (const auto &interval : _intervals) {
ssize_t a = interval.a;
ssize_t b = interval.b;
for (ssize_t v = a; v <= b; v++) {
if (index == i) {
return v;
}
index++;
}
}
return -1;
}
void IntervalSet::remove(size_t el) {
remove(symbolToNumeric(el));
}
void IntervalSet::remove(ssize_t el) {
for (size_t i = 0; i < _intervals.size(); ++i) {
Interval &interval = _intervals[i];
ssize_t a = interval.a;
ssize_t b = interval.b;
if (el < a) {
break; // list is sorted and el is before this interval; not here
}
// if whole interval x..x, rm
if (el == a && el == b) {
_intervals.erase(_intervals.begin() + (long)i);
break;
}
// if on left edge x..b, adjust left
if (el == a) {
interval.a++;
break;
}
// if on right edge a..x, adjust right
if (el == b) {
interval.b--;
break;
}
// if in middle a..x..b, split interval
if (el > a && el < b) { // found in this interval
ssize_t oldb = interval.b;
interval.b = el - 1; // [a..x-1]
add(el + 1, oldb); // add [x+1..b]
break; // ml: not in the Java code but I believe we also should stop searching here, as we found x.
}
}
}

View File

@@ -0,0 +1,190 @@
/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
* Use of this file is governed by the BSD 3-clause license that
* can be found in the LICENSE.txt file in the project root.
*/
#pragma once
#include "misc/Interval.h"
#include "Exceptions.h"
namespace antlr4 {
namespace misc {
/**
* This class implements the {@link IntSet} backed by a sorted array of
* non-overlapping intervals. It is particularly efficient for representing
* large collections of numbers, where the majority of elements appear as part
* of a sequential range of numbers that are all part of the set. For example,
* the set { 1, 2, 3, 4, 7, 8 } may be represented as { [1, 4], [7, 8] }.
*
* <p>
* This class is able to represent sets containing any combination of values in
* the range {@link Integer#MIN_VALUE} to {@link Integer#MAX_VALUE}
* (inclusive).</p>
*/
class ANTLR4CPP_PUBLIC IntervalSet final {
public:
static IntervalSet const COMPLETE_CHAR_SET;
static IntervalSet const EMPTY_SET;
private:
/// The list of sorted, disjoint intervals.
std::vector<Interval> _intervals;
explicit IntervalSet(std::vector<Interval>&& intervals);
public:
IntervalSet();
IntervalSet(IntervalSet const& set);
IntervalSet(IntervalSet&& set);
template<typename T1, typename... T_NEXT>
IntervalSet(int, T1 t1, T_NEXT&&... next) : IntervalSet() {
// The first int argument is an ignored count for compatibility
// with the previous varargs based interface.
addItems(t1, std::forward<T_NEXT>(next)...);
}
IntervalSet& operator=(IntervalSet const& set);
IntervalSet& operator=(IntervalSet&& set);
/// Create a set with a single element, el.
static IntervalSet of(ssize_t a);
/// Create a set with all ints within range [a..b] (inclusive)
static IntervalSet of(ssize_t a, ssize_t b);
void clear();
/// Add a single element to the set. An isolated element is stored
/// as a range el..el.
void add(ssize_t el);
/// Add interval; i.e., add all integers from a to b to set.
/// If b<a, do nothing.
/// Keep list in sorted order (by left range value).
/// If overlap, combine ranges. For example,
/// If this is {1..5, 10..20}, adding 6..7 yields
/// {1..5, 6..7, 10..20}. Adding 4..8 yields {1..8, 10..20}.
void add(ssize_t a, ssize_t b);
/// combine all sets in the array returned the or'd value
static IntervalSet Or(const std::vector<IntervalSet> &sets);
// Copy on write so we can cache a..a intervals and sets of that.
void add(const Interval &addition);
IntervalSet& addAll(const IntervalSet &set);
template<typename T1, typename... T_NEXT>
void addItems(T1 t1, T_NEXT&&... next) {
add(t1);
addItems(std::forward<T_NEXT>(next)...);
}
IntervalSet complement(ssize_t minElement, ssize_t maxElement) const;
/// Given the set of possible values (rather than, say UNICODE or MAXINT),
/// return a new set containing all elements in vocabulary, but not in
/// this. The computation is (vocabulary - this).
///
/// 'this' is assumed to be either a subset or equal to vocabulary.
IntervalSet complement(const IntervalSet &vocabulary) const;
/// Compute this-other via this&~other.
/// Return a new set containing all elements in this but not in other.
/// other is assumed to be a subset of this;
/// anything that is in other but not in this will be ignored.
IntervalSet subtract(const IntervalSet &other) const;
/**
* Compute the set difference between two interval sets. The specific
* operation is {@code left - right}. If either of the input sets is
* {@code null}, it is treated as though it was an empty set.
*/
static IntervalSet subtract(const IntervalSet &left, const IntervalSet &right);
IntervalSet Or(const IntervalSet &a) const;
/// Return a new set with the intersection of this set with other. Because
/// the intervals are sorted, we can use an iterator for each list and
/// just walk them together. This is roughly O(min(n,m)) for interval
/// list lengths n and m.
IntervalSet And(const IntervalSet &other) const;
/// Is el in any range of this set?
bool contains(size_t el) const; // For mapping of e.g. Token::EOF to -1 etc.
bool contains(ssize_t el) const;
/// return true if this set has no members
bool isEmpty() const;
/// If this set is a single integer, return it otherwise Token.INVALID_TYPE.
ssize_t getSingleElement() const;
/**
* Returns the maximum value contained in the set.
*
* @return the maximum value contained in the set. If the set is empty, this
* method returns {@link Token#INVALID_TYPE}.
*/
ssize_t getMaxElement() const;
/**
* Returns the minimum value contained in the set.
*
* @return the minimum value contained in the set. If the set is empty, this
* method returns {@link Token#INVALID_TYPE}.
*/
ssize_t getMinElement() const;
/// <summary>
/// Return a list of Interval objects. </summary>
std::vector<Interval> const& getIntervals() const;
size_t hashCode() const;
/// Are two IntervalSets equal? Because all intervals are sorted
/// and disjoint, equals is a simple linear walk over both lists
/// to make sure they are the same.
bool operator == (const IntervalSet &other) const;
std::string toString() const;
std::string toString(bool elemAreChar) const;
std::string toString(const dfa::Vocabulary &vocabulary) const;
protected:
std::string elementName(const dfa::Vocabulary &vocabulary, ssize_t a) const;
public:
size_t size() const;
std::vector<ssize_t> toList() const;
std::set<ssize_t> toSet() const;
/// Get the ith element of ordered set. Used only by RandomPhrase so
/// don't bother to implement if you're not doing that for a new
/// ANTLR code gen target.
ssize_t get(size_t i) const;
void remove(size_t el); // For mapping of e.g. Token::EOF to -1 etc.
void remove(ssize_t el);
private:
void addItems() { /* No-op */ }
};
} // namespace atn
} // namespace antlr4
// Hash function for IntervalSet.
namespace std {
using antlr4::misc::IntervalSet;
template <> struct hash<IntervalSet>
{
size_t operator() (const IntervalSet &x) const
{
return x.hashCode();
}
};
}

View File

@@ -0,0 +1,120 @@
/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
* Use of this file is governed by the BSD 3-clause license that
* can be found in the LICENSE.txt file in the project root.
*/
#include <cstddef>
#include <cstdint>
#include <cstring>
#include "misc/MurmurHash.h"
using namespace antlr4::misc;
// A variation of the MurmurHash3 implementation (https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp)
// Here we unrolled the loop used there into individual calls to update(), as we usually hash object fields
// instead of entire buffers.
// Platform-specific functions and macros
// Microsoft Visual Studio
#if defined(_MSC_VER)
#include <stdlib.h>
#define ROTL32(x,y) _rotl(x,y)
#define ROTL64(x,y) _rotl64(x,y)
#elif ANTLR4CPP_HAVE_BUILTIN(__builtin_rotateleft32) && ANTLR4CPP_HAVE_BUILTIN(__builtin_rotateleft64)
#define ROTL32(x, y) __builtin_rotateleft32(x, y)
#define ROTL64(x, y) __builtin_rotateleft64(x, y)
#else // defined(_MSC_VER)
// Other compilers
namespace {
constexpr uint32_t ROTL32(uint32_t x, int r) {
return (x << r) | (x >> (32 - r));
}
constexpr uint64_t ROTL64(uint64_t x, int r) {
return (x << r) | (x >> (64 - r));
}
}
#endif // !defined(_MSC_VER)
#if SIZE_MAX == UINT64_MAX
size_t MurmurHash::update(size_t hash, size_t value) {
size_t k1 = value;
k1 *= UINT64_C(0x87c37b91114253d5);
k1 = ROTL64(k1, 31);
k1 *= UINT64_C(0x4cf5ad432745937f);
hash ^= k1;
hash = ROTL64(hash, 27);
hash = hash * 5 + UINT64_C(0x52dce729);
return hash;
}
size_t MurmurHash::finish(size_t hash, size_t entryCount) {
hash ^= entryCount * 8;
hash ^= hash >> 33;
hash *= UINT64_C(0xff51afd7ed558ccd);
hash ^= hash >> 33;
hash *= UINT64_C(0xc4ceb9fe1a85ec53);
hash ^= hash >> 33;
return hash;
}
#elif SIZE_MAX == UINT32_MAX
size_t MurmurHash::update(size_t hash, size_t value) {
size_t k1 = value;
k1 *= UINT32_C(0xCC9E2D51);
k1 = ROTL32(k1, 15);
k1 *= UINT32_C(0x1B873593);
hash ^= k1;
hash = ROTL32(hash, 13);
hash = hash * 5 + UINT32_C(0xE6546B64);
return hash;
}
size_t MurmurHash::finish(size_t hash, size_t entryCount) {
hash ^= entryCount * 4;
hash ^= hash >> 16;
hash *= UINT32_C(0x85EBCA6B);
hash ^= hash >> 13;
hash *= UINT32_C(0xC2B2AE35);
hash ^= hash >> 16;
return hash;
}
#else
#error "Expected sizeof(size_t) to be 4 or 8."
#endif
size_t MurmurHash::update(size_t hash, const void *data, size_t size) {
size_t value;
const uint8_t *bytes = static_cast<const uint8_t*>(data);
while (size >= sizeof(size_t)) {
std::memcpy(&value, bytes, sizeof(size_t));
hash = update(hash, value);
bytes += sizeof(size_t);
size -= sizeof(size_t);
}
if (size != 0) {
value = 0;
std::memcpy(&value, bytes, size);
hash = update(hash, value);
}
return hash;
}

View File

@@ -0,0 +1,102 @@
/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
* Use of this file is governed by the BSD 3-clause license that
* can be found in the LICENSE.txt file in the project root.
*/
#pragma once
#include <cstdint>
#include <type_traits>
#include "antlr4-common.h"
namespace antlr4 {
namespace misc {
class ANTLR4CPP_PUBLIC MurmurHash final {
private:
static constexpr size_t DEFAULT_SEED = 0;
/// Initialize the hash using the default seed value.
/// Returns the intermediate hash value.
public:
static size_t initialize() { return initialize(DEFAULT_SEED); }
/// Initialize the hash using the specified seed.
static size_t initialize(size_t seed) { return seed; }
/// Update the intermediate hash value for the next input {@code value}.
/// <param name="hash"> the intermediate hash value </param>
/// <param name="value"> the value to add to the current hash </param>
/// Returns the updated intermediate hash value.
static size_t update(size_t hash, size_t value);
/**
* Update the intermediate hash value for the next input {@code value}.
*
* @param hash the intermediate hash value
* @param value the value to add to the current hash
* @return the updated intermediate hash value
*/
template <class T>
static size_t update(size_t hash, Ref<T> const& value) {
return update(hash, value != nullptr ? value->hashCode() : 0);
}
template <class T>
static size_t update(size_t hash, T *value) {
return update(hash, value != nullptr ? value->hashCode() : 0);
}
static size_t update(size_t hash, const void *data, size_t size);
template <typename T>
static size_t update(size_t hash, const T *data, size_t size) {
return update(hash, static_cast<const void*>(data), size * sizeof(std::remove_reference_t<T>));
}
/// <summary>
/// Apply the final computation steps to the intermediate value {@code hash}
/// to form the final result of the MurmurHash 3 hash function.
/// </summary>
/// <param name="hash"> the intermediate hash value </param>
/// <param name="entryCount"> the number of calls to update() before calling finish() </param>
/// <returns> the final hash result </returns>
static size_t finish(size_t hash, size_t entryCount);
/// Utility function to compute the hash code of an array using the MurmurHash3 algorithm.
///
/// @param <T> the array element type </param>
/// <param name="data"> the array data </param>
/// <param name="seed"> the seed for the MurmurHash algorithm </param>
/// <returns> the hash code of the data </returns>
template<typename T> // where T is C array type
static size_t hashCode(const std::vector<Ref<T>> &data, size_t seed = DEFAULT_SEED) {
size_t hash = initialize(seed);
for (auto &entry : data) {
hash = update(hash, entry);
}
return finish(hash, data.size());
}
static size_t hashCode(const void *data, size_t size, size_t seed = DEFAULT_SEED) {
size_t hash = initialize(seed);
hash = update(hash, data, size);
return finish(hash, size);
}
template <typename T>
static size_t hashCode(const T *data, size_t size, size_t seed = DEFAULT_SEED) {
return hashCode(static_cast<const void*>(data), size * sizeof(std::remove_reference_t<T>), seed);
}
private:
MurmurHash() = delete;
MurmurHash(const MurmurHash&) = delete;
MurmurHash& operator=(const MurmurHash&) = delete;
};
} // namespace atn
} // namespace antlr4

View File

@@ -0,0 +1,4 @@
#include "misc/Predicate.h"
antlr4::misc::Predicate::~Predicate() {
}

View File

@@ -0,0 +1,21 @@
/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
* Use of this file is governed by the BSD 3-clause license that
* can be found in the LICENSE.txt file in the project root.
*/
#pragma once
#include "antlr4-common.h"
namespace antlr4 {
namespace misc {
class ANTLR4CPP_PUBLIC Predicate {
public:
virtual ~Predicate();
virtual bool test(tree::ParseTree *t) = 0;
};
} // namespace tree
} // namespace antlr4