#126 - Allow escaping the . (dot) character to remove ambiguity with the ... wildcard

- naive implementation, the escape lexeme is \\DOT (problematic long term because of overlaps)
- naive implementation, the escape is transformed to simple-regexp (technically this is not needed at all)
This commit is contained in:
SebastianMC 2024-01-25 17:09:30 +01:00
parent c12ecb5c8c
commit f5fafc184f
2 changed files with 58 additions and 2 deletions

View File

@ -527,6 +527,57 @@ describe('SortingSpecProcessor', () => {
})
})
const txtInputThreeDotsCases: string = `
target-folder: AAA
...
\\DOT...
....
...\\DOT
\\DOT...\\DOT
..\\DOT...
//\\......\\.
`
const expectedSortSpecForThreeDotsCases: { [key: string]: CustomSortSpec } = {
"AAA": {
groups: [{
type: CustomSortGroupType.MatchAll
},{
regexPrefix: { regex: /^\./i },
type: CustomSortGroupType.ExactPrefix
},{
exactSuffix: '.',
type: CustomSortGroupType.ExactSuffix
},{
regexSuffix: { regex: /\.$/i },
type: CustomSortGroupType.ExactSuffix
},{
regexPrefix: { regex: /^\./i },
regexSuffix: { regex: /\.$/i },
type: CustomSortGroupType.ExactHeadAndTail
},{
regexPrefix: { regex: /^\.\.\./i },
type: CustomSortGroupType.ExactPrefix
},{
type: CustomSortGroupType.Outsiders
}],
outsidersGroupIdx: 6,
targetFoldersPaths: ['AAA']
}
}
describe('SortingSpecProcessor', () => {
let processor: SortingSpecProcessor;
beforeEach(() => {
processor = new SortingSpecProcessor();
});
it('should correctly handle some of three-dots scenarios', () => {
const inputTxtArr: Array<string> = txtInputThreeDotsCases.split('\n')
const result = processor.parseSortSpecFromText(inputTxtArr, 'mock-folder', 'custom-name-note.md')
expect(result?.sortSpecByPath).toEqual(expectedSortSpecForThreeDotsCases)
})
})
const txtInputTrueAlphabeticalSortAttr: string = `
target-folder: True Alpha
< true a-z

View File

@ -334,6 +334,9 @@ const CompoundNumberDashRegexSymbol: string = '\\-d+' // Compound number with d
const WordInASCIIRegexSymbol: string = '\\a+'
const WordInAnyLanguageRegexSymbol: string = '\\A+'
// _1_ prefix indicates a lexeme which includes another lexeme and thus has to become first-to-scan
const _1_InlineRegexSymbol_Dot: string = '\\DOT'
const InlineRegexSymbol_Digit1: string = '\\d'
const InlineRegexSymbol_Digit2: string = '\\[0-9]'
const InlineRegexSymbol_0_to_3: string = '\\[0-3]'
@ -361,11 +364,12 @@ const sortingSymbolsArr: Array<string> = [
const sortingSymbolsRegex = new RegExp(sortingSymbolsArr.join('|'), 'gi')
const inlineRegexSymbolsArrEscapedForRegex: Array<string> = [
escapeRegexUnsafeCharacters(_1_InlineRegexSymbol_Dot),
escapeRegexUnsafeCharacters(InlineRegexSymbol_Digit1),
escapeRegexUnsafeCharacters(InlineRegexSymbol_Digit2),
escapeRegexUnsafeCharacters(InlineRegexSymbol_0_to_3),
escapeRegexUnsafeCharacters(InlineRegexSymbol_CapitalLetter),
escapeRegexUnsafeCharacters(InlineRegexSymbol_LowercaseLetter)
escapeRegexUnsafeCharacters(InlineRegexSymbol_LowercaseLetter),
]
interface RegexExpr {
@ -376,11 +380,12 @@ interface RegexExpr {
// Don't be confused if the source lexeme is equal to the resulting regex piece, logically these two distinct spaces
const inlineRegexSymbolsToRegexExpressionsArr: { [key: string]: RegexExpr} = {
[_1_InlineRegexSymbol_Dot]: {regexExpr: '\\.'},
[InlineRegexSymbol_Digit1]: {regexExpr: '\\d'},
[InlineRegexSymbol_Digit2]: {regexExpr: '[0-9]'},
[InlineRegexSymbol_0_to_3]: {regexExpr: '[0-3]'},
[InlineRegexSymbol_CapitalLetter]: {regexExpr: '[\\p{Lu}\\p{Lt}]', isUnicode: true, isCaseSensitive: true},
[InlineRegexSymbol_LowercaseLetter]: {regexExpr: '\\p{Ll}', isUnicode: true, isCaseSensitive: true}
[InlineRegexSymbol_LowercaseLetter]: {regexExpr: '\\p{Ll}', isUnicode: true, isCaseSensitive: true},
}
const inlineRegexSymbolsDetectionRegex = new RegExp(inlineRegexSymbolsArrEscapedForRegex.join('|'), 'gi')