Martchus
8a4f7f5b27
It is actually possible to use normal "/" under Winodows as well in ignore patterns so it makes sense if the matching supports both, "/" and "\".
489 lines
19 KiB
C++
489 lines
19 KiB
C++
#include "./syncthingignorepattern.h"
|
|
|
|
namespace Data {
|
|
|
|
/// \cond
|
|
namespace SyncthingIgnorePatternState {
|
|
enum State {
|
|
Escaping, // passed the esacping character "\"
|
|
AppendRangeLower, // passed a "[" marking the start of a character range
|
|
AppendRangeUpper, // passed a "-" within a range marking the start of an upper-bound range character
|
|
AppendAlternative, // passed a "{" marking the start of an alternative set
|
|
MatchVerbatimly, // initial/default state
|
|
MatchRange, // passed a "]" marking the end of a character range
|
|
MatchAlternatives, // passed a "}" marking the end of an alternative set
|
|
MatchAny, // passed the "?" character that allows matching any character but the path separator
|
|
MatchManyAny, // passed the "*" character that allows matching many arbitrary characters except the path separator
|
|
MatchManyAnyIncludingDirSep, // passed a sequence of two "*" characters allowing to match also the path separator
|
|
};
|
|
|
|
struct Asterisk {
|
|
QString::const_iterator pos;
|
|
SyncthingIgnorePatternState::State state;
|
|
bool visited = false;
|
|
};
|
|
|
|
struct CharacterRange {
|
|
QChar lowerBound, upperBound;
|
|
};
|
|
|
|
struct AlternativeRange {
|
|
explicit AlternativeRange(QString::const_iterator beg)
|
|
: beg(beg)
|
|
, end(beg)
|
|
{
|
|
}
|
|
QString::const_iterator beg, end;
|
|
};
|
|
} // namespace SyncthingIgnorePatternState
|
|
/// \endcond
|
|
|
|
/*!
|
|
* \struct SyncthingIgnorePattern
|
|
* \brief The SyncthingIgnorePattern struct allows matching a Syncthing ignore pattern against a path.
|
|
* \remarks
|
|
* - The `#include`-syntax is not supported.
|
|
* - A "/" is always treated as path separator within the pattern. Additionally, the character specified
|
|
* when calling matches() is treated as path separator as well. This means that under Windows where
|
|
* patterns can contain a "\" one *must* specify paths with a "\" as separator when invoking the matches()
|
|
* function to allow patterns using "/" and "\" to match correctly; otherwiise ignore patterns containing
|
|
* "\" do not work.
|
|
* \sa
|
|
* - https://docs.syncthing.net/users/ignoring.html
|
|
* - https://docs.syncthing.net/rest/db-ignores-get.html
|
|
*/
|
|
|
|
/*!
|
|
* \brief Parses the specified \a pattern populating the struct's fields.
|
|
*/
|
|
SyncthingIgnorePattern::SyncthingIgnorePattern(QString &&pattern)
|
|
: pattern(std::move(pattern))
|
|
{
|
|
if (this->pattern.startsWith(QLatin1String("//"))) {
|
|
comment = true;
|
|
ignore = false;
|
|
return;
|
|
}
|
|
glob = this->pattern;
|
|
for (;;) {
|
|
if (glob.startsWith(QLatin1String("!"))) {
|
|
ignore = !ignore;
|
|
glob.remove(0, 1);
|
|
} else if (glob.startsWith(QLatin1String("(?i)"))) {
|
|
caseInsensitive = true;
|
|
glob.remove(0, 4);
|
|
} else if (glob.startsWith(QLatin1String("(?d)"))) {
|
|
allowRemovalOnParentDirRemoval = true;
|
|
glob.remove(0, 4);
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
/*!
|
|
* \brief Moves the ignore pattern.
|
|
*/
|
|
SyncthingIgnorePattern::SyncthingIgnorePattern(SyncthingIgnorePattern &&) = default;
|
|
|
|
/*!
|
|
* \brief Destroys the ignore pattern.
|
|
*/
|
|
SyncthingIgnorePattern::~SyncthingIgnorePattern()
|
|
{
|
|
}
|
|
|
|
/*!
|
|
* \brief Matches the assigned glob against the specified \a path.
|
|
* \remarks
|
|
* - Returns always false if the pattern is flagged as comment or the glob is empty.
|
|
* - This function tries to follow rules outlined on https://docs.syncthing.net/users/ignoring.html.
|
|
* - The specified \a path is *not* supposed to start with the \a pathSeparator (or a "/"). It must always be a path
|
|
* relative to the root of the Syncthing folder it is contained by. A pattern that is only supposed to match from the
|
|
* root of the Syncthing folder is supposed to start with \a pathSeparator (or a "/"), though.
|
|
* - This function probably doesn't work if the pattern or \a path contain a surrogate pair.
|
|
* - By default, the path separator is "/". If \a path uses a different separator you must specify it as second argument.
|
|
* Note that "/" is still be treated as path separator in this case and the specified \a pathSeparator is only used in
|
|
* addition. This is intended because this way one can specify "\" as \a pathSeparator under Windows but patterns using
|
|
* "/" still work (as they should because "/" and "\" can both be used as path separator under Windows in ignore
|
|
* patterns).
|
|
*/
|
|
bool SyncthingIgnorePattern::matches(const QString &path, QChar pathSeparator) const
|
|
{
|
|
if (comment || glob.isEmpty()) {
|
|
return false;
|
|
}
|
|
|
|
// get iterators
|
|
auto globIter = glob.begin(), globEnd = glob.end();
|
|
auto pathIter = path.begin(), pathEnd = path.end();
|
|
|
|
// handle pattners starting with "/" indicating the pattern must match from the root (see last remark in docstring)
|
|
static
|
|
#if (QT_VERSION >= QT_VERSION_CHECK(6, 0, 0))
|
|
constexpr
|
|
#else
|
|
const
|
|
#endif
|
|
auto genericPathSeparator
|
|
= QChar('/');
|
|
const auto matchFromRoot = *globIter == pathSeparator || *globIter == genericPathSeparator;
|
|
if (matchFromRoot) {
|
|
++globIter;
|
|
}
|
|
|
|
// define variables to track the state when processing the glob pattern
|
|
using namespace SyncthingIgnorePatternState;
|
|
auto state = MatchVerbatimly;
|
|
auto escapedState = MatchVerbatimly;
|
|
auto inAsterisk = false;
|
|
asterisks.clear();
|
|
|
|
// define behavior to handle the current character in the glob pattern not matching the current pattern in the path
|
|
const auto handleMismatch = [&, this] {
|
|
// fail the match immediately if the glob pattern started with a "/" indicating it is supposed to match only from the root
|
|
if (matchFromRoot) {
|
|
return false;
|
|
}
|
|
// deal with the mismatch by trying to match previous asterisks more greedily
|
|
while (!asterisks.empty() && asterisks.back().visited) {
|
|
// do not consider asterisks we have already visited, though (as it would lead to an endless loop)
|
|
asterisks.pop_back();
|
|
}
|
|
if (!asterisks.empty()) {
|
|
// rewind back to when we have passed the last non-visited asterisk
|
|
auto &asterisk = asterisks.back();
|
|
globIter = asterisk.pos;
|
|
inAsterisk = asterisk.visited = true;
|
|
state = asterisk.state;
|
|
return true;
|
|
}
|
|
// deal with the mismatch by checking the path as of the next path element
|
|
for (; pathIter != pathEnd; ++pathIter) {
|
|
// forward to the next path separator
|
|
if (*pathIter != pathSeparator && *pathIter != genericPathSeparator) {
|
|
continue;
|
|
}
|
|
// skip the path separator itself and give up when the end of the path is reached
|
|
if (++pathIter == pathEnd) {
|
|
return false;
|
|
}
|
|
break;
|
|
}
|
|
// give up when the end of the path is reached
|
|
if (pathIter == pathEnd) {
|
|
return false;
|
|
}
|
|
// start matching the glob pattern from the beginning
|
|
globIter = glob.begin();
|
|
asterisks.clear();
|
|
inAsterisk = false;
|
|
return true;
|
|
};
|
|
|
|
// define function to handle single match
|
|
const auto handleSingleMatch = [&, this] {
|
|
// proceed with the next characters on a match
|
|
++globIter;
|
|
inAsterisk = false;
|
|
if (!asterisks.empty()) {
|
|
asterisks.back().visited = false;
|
|
}
|
|
};
|
|
|
|
// define function to match single character against the current character in the path
|
|
static constexpr auto compareSingleCharBasic = [](QChar expectedChar, QChar presentChar, QChar pathSep, bool caseInsensitive) {
|
|
Q_UNUSED(pathSep)
|
|
return caseInsensitive ? presentChar.toCaseFolded() == expectedChar.toCaseFolded() : presentChar == expectedChar;
|
|
};
|
|
const auto compareSingleChar = pathSeparator == genericPathSeparator
|
|
? compareSingleCharBasic
|
|
: [](QChar expectedChar, QChar presentChar, QChar pathSep, bool caseInsensitive) {
|
|
return compareSingleCharBasic(expectedChar, presentChar, pathSep, caseInsensitive)
|
|
|| (expectedChar == genericPathSeparator && presentChar == pathSep);
|
|
};
|
|
const auto matchSingleChar = [&, this](QChar expectedChar) { return compareSingleChar(expectedChar, *pathIter, pathSeparator, caseInsensitive); };
|
|
|
|
// define function to transition to verbatim matching (which makes only sense when not in any of the "Append…"-states)
|
|
const auto transitionToVerbatimMatching = [&] {
|
|
switch (state) {
|
|
case AppendRangeLower:
|
|
case AppendRangeUpper:
|
|
case AppendAlternative:
|
|
break;
|
|
default:
|
|
state = MatchVerbatimly;
|
|
}
|
|
};
|
|
|
|
// try to match each character of the glob against a character in the path
|
|
match:
|
|
while (globIter != globEnd) {
|
|
// decide what to do next depending on the current glob pattern character and state transitioning the state accordingly
|
|
#if (QT_VERSION >= QT_VERSION_CHECK(6, 0, 0))
|
|
static constexpr auto escapeCharacter = QChar('\\');
|
|
static constexpr auto escapeCharacterUnicode = escapeCharacter.unicode();
|
|
#else
|
|
#define escapeCharacterUnicode '\\'
|
|
#define escapeCharacter QChar(escapeCharacterUnicode)
|
|
#endif
|
|
switch (state) {
|
|
case Escaping:
|
|
// treat every character as-is in "escaping" state
|
|
state = escapedState;
|
|
break;
|
|
default:
|
|
// transition state according to special meaning of the current glob pattern character
|
|
switch (globIter->unicode()) {
|
|
case escapeCharacterUnicode:
|
|
if (pathSeparator != escapeCharacter) {
|
|
// transition into "escaping" state
|
|
escapedState = state;
|
|
state = Escaping;
|
|
} else {
|
|
// treat the escape character as normal character if it is the path separator
|
|
// quote from Syncthing documentation: Escaped characters are not supported on Windows, where "\" is the path
|
|
// separator.
|
|
transitionToVerbatimMatching();
|
|
}
|
|
break;
|
|
case '[':
|
|
state = AppendRangeLower;
|
|
characterRange.clear();
|
|
++globIter;
|
|
continue;
|
|
case ']':
|
|
switch (state) {
|
|
case AppendRangeLower:
|
|
case AppendRangeUpper:
|
|
state = MatchRange;
|
|
break;
|
|
default:
|
|
transitionToVerbatimMatching();
|
|
}
|
|
break;
|
|
case '-':
|
|
switch (state) {
|
|
case AppendRangeLower:
|
|
state = AppendRangeUpper;
|
|
++globIter;
|
|
continue;
|
|
default:
|
|
transitionToVerbatimMatching();
|
|
}
|
|
break;
|
|
case '{':
|
|
switch (state) {
|
|
case AppendAlternative:
|
|
continue;
|
|
default:
|
|
state = AppendAlternative;
|
|
alternatives.clear();
|
|
alternatives.emplace_back(++globIter);
|
|
}
|
|
continue;
|
|
case '}':
|
|
switch (state) {
|
|
case AppendAlternative:
|
|
alternatives.back().end = globIter;
|
|
state = MatchAlternatives;
|
|
break;
|
|
default:
|
|
transitionToVerbatimMatching();
|
|
}
|
|
break;
|
|
case ',':
|
|
switch (state) {
|
|
case AppendAlternative:
|
|
alternatives.back().end = globIter;
|
|
alternatives.emplace_back(++globIter);
|
|
continue;
|
|
default:
|
|
transitionToVerbatimMatching();
|
|
}
|
|
break;
|
|
case '?':
|
|
// transition into "match any" state
|
|
state = MatchAny;
|
|
break;
|
|
case '*':
|
|
// transition into one of the "match many any" state (depending on current state)
|
|
switch (state) {
|
|
case MatchManyAny:
|
|
state = MatchManyAnyIncludingDirSep;
|
|
break;
|
|
default:
|
|
state = MatchManyAny;
|
|
}
|
|
break;
|
|
default:
|
|
// try to match/append all other non-special characters as-is
|
|
transitionToVerbatimMatching();
|
|
}
|
|
}
|
|
|
|
// proceed according to state
|
|
switch (state) {
|
|
case Escaping:
|
|
// proceed with the next character in the glob pattern which will be matched as-is (even if it is special)
|
|
[[fallthrough]];
|
|
case AppendAlternative:
|
|
// just move on to the next character (alternatives are populated completely in the previous switch-case)
|
|
++globIter;
|
|
break;
|
|
case AppendRangeLower:
|
|
// add the current character in the glob pattern as start of a new range
|
|
characterRange.emplace_back().lowerBound = *globIter++;
|
|
break;
|
|
case AppendRangeUpper:
|
|
// add the current character in the glob pattern as end of a new or the current range
|
|
(characterRange.empty() ? characterRange.emplace_back() : characterRange.back()).upperBound = *globIter++;
|
|
state = AppendRangeLower;
|
|
break;
|
|
case MatchVerbatimly:
|
|
// match the current character in the glob pattern verbatimly against the current character in the path
|
|
if (pathIter != pathEnd && matchSingleChar(*globIter)) {
|
|
++pathIter;
|
|
handleSingleMatch();
|
|
} else if (inAsterisk
|
|
&& (asterisks.back().state == MatchManyAnyIncludingDirSep
|
|
|| (pathIter == pathEnd || (*pathIter != pathSeparator && *pathIter != genericPathSeparator)))) {
|
|
// consider the path character dealt with despite no match if we have just passed an asterisk in the glob pattern
|
|
if (pathIter != pathEnd) {
|
|
++pathIter;
|
|
} else {
|
|
inAsterisk = false;
|
|
}
|
|
} else if (!handleMismatch()) {
|
|
return false;
|
|
}
|
|
break;
|
|
case MatchRange:
|
|
// match the concluded character range in the glob pattern against the current character in the path
|
|
if (pathIter != pathEnd) {
|
|
auto inRange = false;
|
|
for (const auto &bounds : characterRange) {
|
|
if ((!bounds.upperBound.isNull() && *pathIter >= bounds.lowerBound && *pathIter <= bounds.upperBound)
|
|
|| (bounds.upperBound.isNull() && matchSingleChar(bounds.lowerBound))) {
|
|
inRange = true;
|
|
break;
|
|
}
|
|
}
|
|
if (inRange) {
|
|
characterRange.clear();
|
|
state = MatchVerbatimly;
|
|
++pathIter;
|
|
handleSingleMatch();
|
|
break;
|
|
}
|
|
}
|
|
if (!handleMismatch()) {
|
|
return false;
|
|
}
|
|
break;
|
|
case MatchAlternatives:
|
|
// match the current alternatives as of the current character in the path
|
|
if (pathIter != pathEnd) {
|
|
const auto pathStart = pathIter;
|
|
for (auto &alternative : alternatives) {
|
|
// match characters in the alternative against the path
|
|
// note: Special characters like "*" are matched verbatimly. Is that the correct behavior?
|
|
pathIter = pathStart;
|
|
for (; alternative.beg != alternative.end && pathIter != pathEnd; ++alternative.beg) {
|
|
if (*alternative.beg == escapeCharacter) {
|
|
continue;
|
|
}
|
|
if (!matchSingleChar(*alternative.beg)) {
|
|
break;
|
|
}
|
|
++pathIter;
|
|
}
|
|
// go with the first alternative that fully matched
|
|
// note: What is the correct behavior? Should this be most/least greedy (matching the longest/shortest possible alternative) instead?
|
|
if (alternative.beg == alternative.end) {
|
|
alternatives.clear();
|
|
break;
|
|
}
|
|
}
|
|
if (alternatives.empty()) {
|
|
state = MatchVerbatimly;
|
|
handleSingleMatch();
|
|
break;
|
|
}
|
|
}
|
|
if (!handleMismatch()) {
|
|
return false;
|
|
}
|
|
break;
|
|
case MatchAny:
|
|
// allow the current character in the path to be anything but a path separator; otherwise consider it as mismatch as in the case for an exact match
|
|
if (pathIter == pathEnd || (*pathIter != pathSeparator && *pathIter != genericPathSeparator)) {
|
|
++globIter, ++pathIter;
|
|
} else if (!handleMismatch()) {
|
|
return false;
|
|
}
|
|
break;
|
|
case MatchManyAny: {
|
|
// take record of the asterisks
|
|
auto &glob = asterisks.emplace_back();
|
|
glob.pos = ++globIter;
|
|
glob.state = MatchManyAny;
|
|
inAsterisk = true;
|
|
break;
|
|
}
|
|
case MatchManyAnyIncludingDirSep: {
|
|
// take record of the second asterisks
|
|
auto &glob = asterisks.back();
|
|
glob.pos = ++globIter;
|
|
glob.state = MatchManyAnyIncludingDirSep;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
// check whether all characters of the glob have been matched against all characters of the path
|
|
if (globIter == globEnd) {
|
|
// consider the match a success if all characters of the path were matched or the glob ended with a "**"
|
|
if (pathIter == pathEnd || state == MatchManyAnyIncludingDirSep) {
|
|
return true;
|
|
}
|
|
if (const auto remainingPath = QStringView(pathIter, pathEnd);
|
|
state == MatchManyAny && !(remainingPath.contains(pathSeparator) || remainingPath.contains(genericPathSeparator))) {
|
|
return true;
|
|
}
|
|
|
|
// try again as of the next path segment if the glob fully matched but there are still characters in the path to be matched
|
|
// note: This allows "foo" to match against "foo/foo" even tough the glob characters have already consumed after matching the first path segment.
|
|
if (!matchFromRoot && (*pathIter == pathSeparator || *pathIter == genericPathSeparator)) {
|
|
state = MatchVerbatimly;
|
|
++pathIter;
|
|
globIter = glob.begin();
|
|
asterisks.clear();
|
|
inAsterisk = false;
|
|
goto match;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
/*!
|
|
* \brief Makes an ignore pattern for \a path with the specified settings.
|
|
*/
|
|
QString SyncthingIgnorePattern::forPath(const QString &path, bool ignore, bool caseInsensitive, bool allowRemovalOnParentDirRemoval)
|
|
{
|
|
auto res = QString();
|
|
res.reserve(10 + path.size());
|
|
if (!ignore) {
|
|
res += QChar('!');
|
|
}
|
|
if (caseInsensitive) {
|
|
res += QStringLiteral("(?i)");
|
|
}
|
|
if (allowRemovalOnParentDirRemoval) {
|
|
res += QStringLiteral("(?d)");
|
|
}
|
|
return res += path;
|
|
}
|
|
|
|
} // namespace Data
|