Initial commit from sysy-main
This commit is contained in:
508
antlr/antlr4-runtime-4.13.2/runtime/src/misc/IntervalSet.cpp
Executable file
508
antlr/antlr4-runtime-4.13.2/runtime/src/misc/IntervalSet.cpp
Executable file
@@ -0,0 +1,508 @@
|
||||
/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
|
||||
* Use of this file is governed by the BSD 3-clause license that
|
||||
* can be found in the LICENSE.txt file in the project root.
|
||||
*/
|
||||
|
||||
#include "misc/MurmurHash.h"
|
||||
#include "Lexer.h"
|
||||
#include "Exceptions.h"
|
||||
#include "Vocabulary.h"
|
||||
|
||||
#include "misc/IntervalSet.h"
|
||||
|
||||
using namespace antlr4;
|
||||
using namespace antlr4::misc;
|
||||
|
||||
IntervalSet const IntervalSet::COMPLETE_CHAR_SET =
|
||||
IntervalSet::of(Lexer::MIN_CHAR_VALUE, Lexer::MAX_CHAR_VALUE);
|
||||
|
||||
IntervalSet const IntervalSet::EMPTY_SET;
|
||||
|
||||
IntervalSet::IntervalSet() : _intervals() {
|
||||
}
|
||||
|
||||
IntervalSet::IntervalSet(const IntervalSet &set) : IntervalSet() {
|
||||
_intervals = set._intervals;
|
||||
}
|
||||
|
||||
IntervalSet::IntervalSet(IntervalSet&& set) : IntervalSet(std::move(set._intervals)) {
|
||||
}
|
||||
|
||||
IntervalSet::IntervalSet(std::vector<Interval>&& intervals) : _intervals(std::move(intervals)) {
|
||||
}
|
||||
|
||||
IntervalSet& IntervalSet::operator=(const IntervalSet& other) {
|
||||
_intervals = other._intervals;
|
||||
return *this;
|
||||
}
|
||||
|
||||
IntervalSet& IntervalSet::operator=(IntervalSet&& other) {
|
||||
_intervals = std::move(other._intervals);
|
||||
return *this;
|
||||
}
|
||||
|
||||
IntervalSet IntervalSet::of(ssize_t a) {
|
||||
return IntervalSet({ Interval(a, a) });
|
||||
}
|
||||
|
||||
IntervalSet IntervalSet::of(ssize_t a, ssize_t b) {
|
||||
return IntervalSet({ Interval(a, b) });
|
||||
}
|
||||
|
||||
void IntervalSet::clear() {
|
||||
_intervals.clear();
|
||||
}
|
||||
|
||||
void IntervalSet::add(ssize_t el) {
|
||||
add(el, el);
|
||||
}
|
||||
|
||||
void IntervalSet::add(ssize_t a, ssize_t b) {
|
||||
add(Interval(a, b));
|
||||
}
|
||||
|
||||
void IntervalSet::add(const Interval &addition) {
|
||||
if (addition.b < addition.a) {
|
||||
return;
|
||||
}
|
||||
|
||||
// find position in list
|
||||
for (auto iterator = _intervals.begin(); iterator != _intervals.end(); ++iterator) {
|
||||
Interval r = *iterator;
|
||||
if (addition == r) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (addition.adjacent(r) || !addition.disjoint(r)) {
|
||||
// next to each other, make a single larger interval
|
||||
Interval bigger = addition.Union(r);
|
||||
*iterator = bigger;
|
||||
|
||||
// make sure we didn't just create an interval that
|
||||
// should be merged with next interval in list
|
||||
while (iterator + 1 != _intervals.end()) {
|
||||
Interval next = *++iterator;
|
||||
if (!bigger.adjacent(next) && bigger.disjoint(next)) {
|
||||
break;
|
||||
}
|
||||
|
||||
// if we bump up against or overlap next, merge
|
||||
iterator = _intervals.erase(iterator);// remove this one
|
||||
--iterator; // move backwards to what we just set
|
||||
*iterator = bigger.Union(next); // set to 3 merged ones
|
||||
// ml: no need to advance iterator, we do that in the next round anyway. ++iterator; // first call to next after previous duplicates the result
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if (addition.startsBeforeDisjoint(r)) {
|
||||
// insert before r
|
||||
//--iterator;
|
||||
_intervals.insert(iterator, addition);
|
||||
return;
|
||||
}
|
||||
|
||||
// if disjoint and after r, a future iteration will handle it
|
||||
}
|
||||
|
||||
// ok, must be after last interval (and disjoint from last interval)
|
||||
// just add it
|
||||
_intervals.push_back(addition);
|
||||
}
|
||||
|
||||
IntervalSet IntervalSet::Or(const std::vector<IntervalSet> &sets) {
|
||||
IntervalSet result;
|
||||
for (const auto &s : sets) {
|
||||
result.addAll(s);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
IntervalSet& IntervalSet::addAll(const IntervalSet &set) {
|
||||
// walk set and add each interval
|
||||
for (auto const& interval : set._intervals) {
|
||||
add(interval);
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
IntervalSet IntervalSet::complement(ssize_t minElement, ssize_t maxElement) const {
|
||||
return complement(IntervalSet::of(minElement, maxElement));
|
||||
}
|
||||
|
||||
IntervalSet IntervalSet::complement(const IntervalSet &vocabulary) const {
|
||||
return vocabulary.subtract(*this);
|
||||
}
|
||||
|
||||
IntervalSet IntervalSet::subtract(const IntervalSet &other) const {
|
||||
return subtract(*this, other);
|
||||
}
|
||||
|
||||
IntervalSet IntervalSet::subtract(const IntervalSet &left, const IntervalSet &right) {
|
||||
if (left.isEmpty()) {
|
||||
return IntervalSet();
|
||||
}
|
||||
|
||||
if (right.isEmpty()) {
|
||||
// right set has no elements; just return the copy of the current set
|
||||
return left;
|
||||
}
|
||||
|
||||
IntervalSet result(left);
|
||||
size_t resultI = 0;
|
||||
size_t rightI = 0;
|
||||
while (resultI < result._intervals.size() && rightI < right._intervals.size()) {
|
||||
Interval &resultInterval = result._intervals[resultI];
|
||||
const Interval &rightInterval = right._intervals[rightI];
|
||||
|
||||
// operation: (resultInterval - rightInterval) and update indexes
|
||||
|
||||
if (rightInterval.b < resultInterval.a) {
|
||||
rightI++;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (rightInterval.a > resultInterval.b) {
|
||||
resultI++;
|
||||
continue;
|
||||
}
|
||||
|
||||
Interval beforeCurrent;
|
||||
Interval afterCurrent;
|
||||
if (rightInterval.a > resultInterval.a) {
|
||||
beforeCurrent = Interval(resultInterval.a, rightInterval.a - 1);
|
||||
}
|
||||
|
||||
if (rightInterval.b < resultInterval.b) {
|
||||
afterCurrent = Interval(rightInterval.b + 1, resultInterval.b);
|
||||
}
|
||||
|
||||
if (beforeCurrent.a > -1) { // -1 is the default value
|
||||
if (afterCurrent.a > -1) {
|
||||
// split the current interval into two
|
||||
result._intervals[resultI] = beforeCurrent;
|
||||
result._intervals.insert(result._intervals.begin() + resultI + 1, afterCurrent);
|
||||
resultI++;
|
||||
rightI++;
|
||||
} else {
|
||||
// replace the current interval
|
||||
result._intervals[resultI] = beforeCurrent;
|
||||
resultI++;
|
||||
}
|
||||
} else {
|
||||
if (afterCurrent.a > -1) {
|
||||
// replace the current interval
|
||||
result._intervals[resultI] = afterCurrent;
|
||||
rightI++;
|
||||
} else {
|
||||
// remove the current interval (thus no need to increment resultI)
|
||||
result._intervals.erase(result._intervals.begin() + resultI);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If rightI reached right.intervals.size(), no more intervals to subtract from result.
|
||||
// If resultI reached result.intervals.size(), we would be subtracting from an empty set.
|
||||
// Either way, we are done.
|
||||
return result;
|
||||
}
|
||||
|
||||
IntervalSet IntervalSet::Or(const IntervalSet &a) const {
|
||||
IntervalSet result;
|
||||
result.addAll(*this);
|
||||
result.addAll(a);
|
||||
return result;
|
||||
}
|
||||
|
||||
IntervalSet IntervalSet::And(const IntervalSet &other) const {
|
||||
IntervalSet intersection;
|
||||
size_t i = 0;
|
||||
size_t j = 0;
|
||||
|
||||
// iterate down both interval lists looking for nondisjoint intervals
|
||||
while (i < _intervals.size() && j < other._intervals.size()) {
|
||||
Interval mine = _intervals[i];
|
||||
Interval theirs = other._intervals[j];
|
||||
|
||||
if (mine.startsBeforeDisjoint(theirs)) {
|
||||
// move this iterator looking for interval that might overlap
|
||||
i++;
|
||||
} else if (theirs.startsBeforeDisjoint(mine)) {
|
||||
// move other iterator looking for interval that might overlap
|
||||
j++;
|
||||
} else if (mine.properlyContains(theirs)) {
|
||||
// overlap, add intersection, get next theirs
|
||||
intersection.add(mine.intersection(theirs));
|
||||
j++;
|
||||
} else if (theirs.properlyContains(mine)) {
|
||||
// overlap, add intersection, get next mine
|
||||
intersection.add(mine.intersection(theirs));
|
||||
i++;
|
||||
} else if (!mine.disjoint(theirs)) {
|
||||
// overlap, add intersection
|
||||
intersection.add(mine.intersection(theirs));
|
||||
|
||||
// Move the iterator of lower range [a..b], but not
|
||||
// the upper range as it may contain elements that will collide
|
||||
// with the next iterator. So, if mine=[0..115] and
|
||||
// theirs=[115..200], then intersection is 115 and move mine
|
||||
// but not theirs as theirs may collide with the next range
|
||||
// in thisIter.
|
||||
// move both iterators to next ranges
|
||||
if (mine.startsAfterNonDisjoint(theirs)) {
|
||||
j++;
|
||||
} else if (theirs.startsAfterNonDisjoint(mine)) {
|
||||
i++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return intersection;
|
||||
}
|
||||
|
||||
bool IntervalSet::contains(size_t el) const {
|
||||
return contains(symbolToNumeric(el));
|
||||
}
|
||||
|
||||
bool IntervalSet::contains(ssize_t el) const {
|
||||
if (_intervals.empty() || el < _intervals.front().a || el > _intervals.back().b) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return std::binary_search(_intervals.begin(), _intervals.end(), Interval(el, el), [](const Interval &lhs, const Interval &rhs) {
|
||||
return lhs.b < rhs.a;
|
||||
});
|
||||
}
|
||||
|
||||
bool IntervalSet::isEmpty() const {
|
||||
return _intervals.empty();
|
||||
}
|
||||
|
||||
ssize_t IntervalSet::getSingleElement() const {
|
||||
if (_intervals.size() == 1) {
|
||||
if (_intervals[0].a == _intervals[0].b) {
|
||||
return _intervals[0].a;
|
||||
}
|
||||
}
|
||||
|
||||
return Token::INVALID_TYPE; // XXX: this value is 0, but 0 is a valid interval range, how can that work?
|
||||
}
|
||||
|
||||
ssize_t IntervalSet::getMaxElement() const {
|
||||
if (_intervals.empty()) {
|
||||
return Token::INVALID_TYPE;
|
||||
}
|
||||
|
||||
return _intervals.back().b;
|
||||
}
|
||||
|
||||
ssize_t IntervalSet::getMinElement() const {
|
||||
if (_intervals.empty()) {
|
||||
return Token::INVALID_TYPE;
|
||||
}
|
||||
|
||||
return _intervals.front().a;
|
||||
}
|
||||
|
||||
std::vector<Interval> const& IntervalSet::getIntervals() const {
|
||||
return _intervals;
|
||||
}
|
||||
|
||||
size_t IntervalSet::hashCode() const {
|
||||
size_t hash = MurmurHash::initialize();
|
||||
for (const auto &interval : _intervals) {
|
||||
hash = MurmurHash::update(hash, interval.a);
|
||||
hash = MurmurHash::update(hash, interval.b);
|
||||
}
|
||||
|
||||
return MurmurHash::finish(hash, _intervals.size() * 2);
|
||||
}
|
||||
|
||||
bool IntervalSet::operator == (const IntervalSet &other) const {
|
||||
if (_intervals.empty() && other._intervals.empty())
|
||||
return true;
|
||||
|
||||
if (_intervals.size() != other._intervals.size())
|
||||
return false;
|
||||
|
||||
return std::equal(_intervals.begin(), _intervals.end(), other._intervals.begin());
|
||||
}
|
||||
|
||||
std::string IntervalSet::toString() const {
|
||||
return toString(false);
|
||||
}
|
||||
|
||||
std::string IntervalSet::toString(bool elemAreChar) const {
|
||||
if (_intervals.empty()) {
|
||||
return "{}";
|
||||
}
|
||||
|
||||
std::stringstream ss;
|
||||
size_t effectiveSize = size();
|
||||
if (effectiveSize > 1) {
|
||||
ss << "{";
|
||||
}
|
||||
|
||||
bool firstEntry = true;
|
||||
for (const auto &interval : _intervals) {
|
||||
if (!firstEntry)
|
||||
ss << ", ";
|
||||
firstEntry = false;
|
||||
|
||||
ssize_t a = interval.a;
|
||||
ssize_t b = interval.b;
|
||||
if (a == b) {
|
||||
if (a == -1) {
|
||||
ss << "<EOF>";
|
||||
} else if (elemAreChar) {
|
||||
ss << "'" << static_cast<char>(a) << "'";
|
||||
} else {
|
||||
ss << a;
|
||||
}
|
||||
} else {
|
||||
if (elemAreChar) {
|
||||
ss << "'" << static_cast<char>(a) << "'..'" << static_cast<char>(b) << "'";
|
||||
} else {
|
||||
ss << a << ".." << b;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (effectiveSize > 1) {
|
||||
ss << "}";
|
||||
}
|
||||
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
std::string IntervalSet::toString(const dfa::Vocabulary &vocabulary) const {
|
||||
if (_intervals.empty()) {
|
||||
return "{}";
|
||||
}
|
||||
|
||||
std::stringstream ss;
|
||||
size_t effectiveSize = size();
|
||||
if (effectiveSize > 1) {
|
||||
ss << "{";
|
||||
}
|
||||
|
||||
bool firstEntry = true;
|
||||
for (const auto &interval : _intervals) {
|
||||
if (!firstEntry)
|
||||
ss << ", ";
|
||||
firstEntry = false;
|
||||
|
||||
ssize_t a = interval.a;
|
||||
ssize_t b = interval.b;
|
||||
if (a == b) {
|
||||
ss << elementName(vocabulary, a);
|
||||
} else {
|
||||
for (ssize_t i = a; i <= b; i++) {
|
||||
if (i > a) {
|
||||
ss << ", ";
|
||||
}
|
||||
ss << elementName(vocabulary, i);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (effectiveSize > 1) {
|
||||
ss << "}";
|
||||
}
|
||||
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
std::string IntervalSet::elementName(const dfa::Vocabulary &vocabulary, ssize_t a) const {
|
||||
if (a == -1) {
|
||||
return "<EOF>";
|
||||
} else if (a == -2) {
|
||||
return "<EPSILON>";
|
||||
} else {
|
||||
return vocabulary.getDisplayName(a);
|
||||
}
|
||||
}
|
||||
|
||||
size_t IntervalSet::size() const {
|
||||
size_t result = 0;
|
||||
for (const auto &interval : _intervals) {
|
||||
result += size_t(interval.b - interval.a + 1);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
std::vector<ssize_t> IntervalSet::toList() const {
|
||||
std::vector<ssize_t> result;
|
||||
for (const auto &interval : _intervals) {
|
||||
ssize_t a = interval.a;
|
||||
ssize_t b = interval.b;
|
||||
for (ssize_t v = a; v <= b; v++) {
|
||||
result.push_back(v);
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
std::set<ssize_t> IntervalSet::toSet() const {
|
||||
std::set<ssize_t> result;
|
||||
for (const auto &interval : _intervals) {
|
||||
ssize_t a = interval.a;
|
||||
ssize_t b = interval.b;
|
||||
for (ssize_t v = a; v <= b; v++) {
|
||||
result.insert(v);
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
ssize_t IntervalSet::get(size_t i) const {
|
||||
size_t index = 0;
|
||||
for (const auto &interval : _intervals) {
|
||||
ssize_t a = interval.a;
|
||||
ssize_t b = interval.b;
|
||||
for (ssize_t v = a; v <= b; v++) {
|
||||
if (index == i) {
|
||||
return v;
|
||||
}
|
||||
index++;
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
void IntervalSet::remove(size_t el) {
|
||||
remove(symbolToNumeric(el));
|
||||
}
|
||||
|
||||
void IntervalSet::remove(ssize_t el) {
|
||||
for (size_t i = 0; i < _intervals.size(); ++i) {
|
||||
Interval &interval = _intervals[i];
|
||||
ssize_t a = interval.a;
|
||||
ssize_t b = interval.b;
|
||||
if (el < a) {
|
||||
break; // list is sorted and el is before this interval; not here
|
||||
}
|
||||
|
||||
// if whole interval x..x, rm
|
||||
if (el == a && el == b) {
|
||||
_intervals.erase(_intervals.begin() + (long)i);
|
||||
break;
|
||||
}
|
||||
// if on left edge x..b, adjust left
|
||||
if (el == a) {
|
||||
interval.a++;
|
||||
break;
|
||||
}
|
||||
// if on right edge a..x, adjust right
|
||||
if (el == b) {
|
||||
interval.b--;
|
||||
break;
|
||||
}
|
||||
// if in middle a..x..b, split interval
|
||||
if (el > a && el < b) { // found in this interval
|
||||
ssize_t oldb = interval.b;
|
||||
interval.b = el - 1; // [a..x-1]
|
||||
add(el + 1, oldb); // add [x+1..b]
|
||||
|
||||
break; // ml: not in the Java code but I believe we also should stop searching here, as we found x.
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user