From f5fafc184ffabe3631091f952b358c688bbe3959 Mon Sep 17 00:00:00 2001 From: SebastianMC <23032356+SebastianMC@users.noreply.github.com> Date: Thu, 25 Jan 2024 17:09:30 +0100 Subject: [PATCH 1/2] #126 - Allow escaping the . (dot) character to remove ambiguity with the ... wildcard - naive implementation, the escape lexeme is \\DOT (problematic long term because of overlaps) - naive implementation, the escape is transformed to simple-regexp (technically this is not needed at all) --- .../sorting-spec-processor.spec.ts | 51 +++++++++++++++++++ src/custom-sort/sorting-spec-processor.ts | 9 +++- 2 files changed, 58 insertions(+), 2 deletions(-) diff --git a/src/custom-sort/sorting-spec-processor.spec.ts b/src/custom-sort/sorting-spec-processor.spec.ts index fde99c8..0c8aa76 100644 --- a/src/custom-sort/sorting-spec-processor.spec.ts +++ b/src/custom-sort/sorting-spec-processor.spec.ts @@ -527,6 +527,57 @@ describe('SortingSpecProcessor', () => { }) }) +const txtInputThreeDotsCases: string = ` +target-folder: AAA +... +\\DOT... +.... +...\\DOT +\\DOT...\\DOT +..\\DOT... +//\\......\\. +` + +const expectedSortSpecForThreeDotsCases: { [key: string]: CustomSortSpec } = { + "AAA": { + groups: [{ + type: CustomSortGroupType.MatchAll + },{ + regexPrefix: { regex: /^\./i }, + type: CustomSortGroupType.ExactPrefix + },{ + exactSuffix: '.', + type: CustomSortGroupType.ExactSuffix + },{ + regexSuffix: { regex: /\.$/i }, + type: CustomSortGroupType.ExactSuffix + },{ + regexPrefix: { regex: /^\./i }, + regexSuffix: { regex: /\.$/i }, + type: CustomSortGroupType.ExactHeadAndTail + },{ + regexPrefix: { regex: /^\.\.\./i }, + type: CustomSortGroupType.ExactPrefix + },{ + type: CustomSortGroupType.Outsiders + }], + outsidersGroupIdx: 6, + targetFoldersPaths: ['AAA'] + } +} + +describe('SortingSpecProcessor', () => { + let processor: SortingSpecProcessor; + beforeEach(() => { + processor = new SortingSpecProcessor(); + }); + it('should correctly handle some of three-dots scenarios', () => { + const inputTxtArr: Array = txtInputThreeDotsCases.split('\n') + const result = processor.parseSortSpecFromText(inputTxtArr, 'mock-folder', 'custom-name-note.md') + expect(result?.sortSpecByPath).toEqual(expectedSortSpecForThreeDotsCases) + }) +}) + const txtInputTrueAlphabeticalSortAttr: string = ` target-folder: True Alpha < true a-z diff --git a/src/custom-sort/sorting-spec-processor.ts b/src/custom-sort/sorting-spec-processor.ts index 2109e28..177d170 100644 --- a/src/custom-sort/sorting-spec-processor.ts +++ b/src/custom-sort/sorting-spec-processor.ts @@ -334,6 +334,9 @@ const CompoundNumberDashRegexSymbol: string = '\\-d+' // Compound number with d const WordInASCIIRegexSymbol: string = '\\a+' const WordInAnyLanguageRegexSymbol: string = '\\A+' +// _1_ prefix indicates a lexeme which includes another lexeme and thus has to become first-to-scan +const _1_InlineRegexSymbol_Dot: string = '\\DOT' + const InlineRegexSymbol_Digit1: string = '\\d' const InlineRegexSymbol_Digit2: string = '\\[0-9]' const InlineRegexSymbol_0_to_3: string = '\\[0-3]' @@ -361,11 +364,12 @@ const sortingSymbolsArr: Array = [ const sortingSymbolsRegex = new RegExp(sortingSymbolsArr.join('|'), 'gi') const inlineRegexSymbolsArrEscapedForRegex: Array = [ + escapeRegexUnsafeCharacters(_1_InlineRegexSymbol_Dot), escapeRegexUnsafeCharacters(InlineRegexSymbol_Digit1), escapeRegexUnsafeCharacters(InlineRegexSymbol_Digit2), escapeRegexUnsafeCharacters(InlineRegexSymbol_0_to_3), escapeRegexUnsafeCharacters(InlineRegexSymbol_CapitalLetter), - escapeRegexUnsafeCharacters(InlineRegexSymbol_LowercaseLetter) + escapeRegexUnsafeCharacters(InlineRegexSymbol_LowercaseLetter), ] interface RegexExpr { @@ -376,11 +380,12 @@ interface RegexExpr { // Don't be confused if the source lexeme is equal to the resulting regex piece, logically these two distinct spaces const inlineRegexSymbolsToRegexExpressionsArr: { [key: string]: RegexExpr} = { + [_1_InlineRegexSymbol_Dot]: {regexExpr: '\\.'}, [InlineRegexSymbol_Digit1]: {regexExpr: '\\d'}, [InlineRegexSymbol_Digit2]: {regexExpr: '[0-9]'}, [InlineRegexSymbol_0_to_3]: {regexExpr: '[0-3]'}, [InlineRegexSymbol_CapitalLetter]: {regexExpr: '[\\p{Lu}\\p{Lt}]', isUnicode: true, isCaseSensitive: true}, - [InlineRegexSymbol_LowercaseLetter]: {regexExpr: '\\p{Ll}', isUnicode: true, isCaseSensitive: true} + [InlineRegexSymbol_LowercaseLetter]: {regexExpr: '\\p{Ll}', isUnicode: true, isCaseSensitive: true}, } const inlineRegexSymbolsDetectionRegex = new RegExp(inlineRegexSymbolsArrEscapedForRegex.join('|'), 'gi') From e0208e2793941bd0005e1b7558169ed5cf90672e Mon Sep 17 00:00:00 2001 From: SebastianMC <23032356+SebastianMC@users.noreply.github.com> Date: Thu, 25 Jan 2024 20:54:15 +0100 Subject: [PATCH 2/2] #126 - Allow escaping the . (dot) character to remove ambiguity with the ... wildcard - simplistic implementation - use `./...` syntax to remove ambiguity of `....` --- .../sorting-spec-processor.spec.ts | 111 +++++++++++++++--- src/custom-sort/sorting-spec-processor.ts | 37 ++++-- 2 files changed, 122 insertions(+), 26 deletions(-) diff --git a/src/custom-sort/sorting-spec-processor.spec.ts b/src/custom-sort/sorting-spec-processor.spec.ts index 0c8aa76..c84bdc8 100644 --- a/src/custom-sort/sorting-spec-processor.spec.ts +++ b/src/custom-sort/sorting-spec-processor.spec.ts @@ -530,38 +530,49 @@ describe('SortingSpecProcessor', () => { const txtInputThreeDotsCases: string = ` target-folder: AAA ... -\\DOT... .... -...\\DOT -\\DOT...\\DOT -..\\DOT... -//\\......\\. + // Only in the below scenario the / is treated as empty-separator and swallowed +./... + // Below tricky and not obvious cases +../... +.../.. +../...S +S.../.. +S../...S ` const expectedSortSpecForThreeDotsCases: { [key: string]: CustomSortSpec } = { "AAA": { groups: [{ type: CustomSortGroupType.MatchAll - },{ - regexPrefix: { regex: /^\./i }, - type: CustomSortGroupType.ExactPrefix },{ exactSuffix: '.', type: CustomSortGroupType.ExactSuffix },{ - regexSuffix: { regex: /\.$/i }, + exactPrefix: '.', + type: CustomSortGroupType.ExactPrefix + },{ + exactPrefix: '..', + type: CustomSortGroupType.ExactPrefix + },{ + exactSuffix: '/..', type: CustomSortGroupType.ExactSuffix },{ - regexPrefix: { regex: /^\./i }, - regexSuffix: { regex: /\.$/i }, + exactPrefix: '..', + exactSuffix: 'S', type: CustomSortGroupType.ExactHeadAndTail },{ - regexPrefix: { regex: /^\.\.\./i }, - type: CustomSortGroupType.ExactPrefix + exactPrefix: 'S', + exactSuffix: '/..', + type: CustomSortGroupType.ExactHeadAndTail + },{ + exactPrefix: 'S..', + exactSuffix: 'S', + type: CustomSortGroupType.ExactHeadAndTail },{ type: CustomSortGroupType.Outsiders }], - outsidersGroupIdx: 6, + outsidersGroupIdx: 8, targetFoldersPaths: ['AAA'] } } @@ -2839,6 +2850,78 @@ describe('convertPlainStringSortingGroupSpecToArraySpec', () => { '...', 'tion. !!!' ]) }) + it('should recognize four dots escaper - variant 0', () => { + const s = './...' + expect(processor.convertPlainStringSortingGroupSpecToArraySpec(s)).toEqual([ + '.', '...' + ]) + }) + it('should recognize four dots escaper - variant 1', () => { + const s = '../...' + expect(processor.convertPlainStringSortingGroupSpecToArraySpec(s)).toEqual([ + '..', '...' + ]) + }) + it('should recognize four dots escaper - variant 2', () => { + const s = './...Some' + expect(processor.convertPlainStringSortingGroupSpecToArraySpec(s)).toEqual([ + '.', '...', 'Some' + ]) + }) + it('should recognize four dots escaper - variant 3', () => { + const s = 'Some./...' + expect(processor.convertPlainStringSortingGroupSpecToArraySpec(s)).toEqual([ + 'Some.','...' + ]) + }) + it('should recognize four dots escaper - variant 3a', () => { + const s = 'Some./.....' + expect(processor.convertPlainStringSortingGroupSpecToArraySpec(s)).toEqual([ + 'Some./..','...' + ]) + }) + it('should recognize four dots escaper - variant 3b', () => { + const s = 'Some./.....X' + expect(processor.convertPlainStringSortingGroupSpecToArraySpec(s)).toEqual([ + 'Some.','...', '..X' + ]) + }) + it('should recognize four dots escaper - variant 4', () => { + const s = 'Some./...Some' + expect(processor.convertPlainStringSortingGroupSpecToArraySpec(s)).toEqual([ + 'Some.','...', 'Some' + ]) + }) + it('should recognize four dots escaper - tricky variant 4', () => { + const s = 'Some./... haha ...Some' + expect(processor.convertPlainStringSortingGroupSpecToArraySpec(s)).toEqual([ + 'Some.','...', ' haha ...Some' + ]) + }) + it('should recognize four dots escaper - tricky variant 5', () => { + const s = 'S.../..' + expect(processor.convertPlainStringSortingGroupSpecToArraySpec(s)).toEqual([ + 'S','...', '/..' + ]) + }) + it('should NOT recognize four dots escaper - tricky variant 1', () => { + const s = 'Some... haha ./...Some' + expect(processor.convertPlainStringSortingGroupSpecToArraySpec(s)).toEqual([ + 'Some','...', ' haha ./...Some' + ]) + }) + it('should NOT recognize four dots escaper - tricky variant 2', () => { + const s = 'Some... haha .../...Some' + expect(processor.convertPlainStringSortingGroupSpecToArraySpec(s)).toEqual([ + 'Some', '...', ' haha .../...Some' + ]) + }) + it('should NOT recognize four dots escaper - tricky variant 3', () => { + const s = '.../...' + expect(processor.convertPlainStringSortingGroupSpecToArraySpec(s)).toEqual([ + '...', '/...' + ]) + }) it('should recognize some edge case', () => { const s = 'Edge...... ... ..... ... eee?' expect(processor.convertPlainStringSortingGroupSpecToArraySpec(s)).toEqual([ diff --git a/src/custom-sort/sorting-spec-processor.ts b/src/custom-sort/sorting-spec-processor.ts index 177d170..693e9bf 100644 --- a/src/custom-sort/sorting-spec-processor.ts +++ b/src/custom-sort/sorting-spec-processor.ts @@ -98,6 +98,10 @@ const ContextFreeProblems = new Set([ const ThreeDots = '...'; const ThreeDotsLength = ThreeDots.length; +const AmbigueFourDotsEscaper = './...' +const AmbigueFourDotsEscaperLength = AmbigueFourDotsEscaper.length +const AmbigueFourDotsEscaperOverlap = 1 // Number of leading chars in the Escaper to retain in original string + interface CustomSortOrderAscDescPair { asc: CustomSortOrder desc: CustomSortOrder @@ -334,9 +338,6 @@ const CompoundNumberDashRegexSymbol: string = '\\-d+' // Compound number with d const WordInASCIIRegexSymbol: string = '\\a+' const WordInAnyLanguageRegexSymbol: string = '\\A+' -// _1_ prefix indicates a lexeme which includes another lexeme and thus has to become first-to-scan -const _1_InlineRegexSymbol_Dot: string = '\\DOT' - const InlineRegexSymbol_Digit1: string = '\\d' const InlineRegexSymbol_Digit2: string = '\\[0-9]' const InlineRegexSymbol_0_to_3: string = '\\[0-3]' @@ -364,7 +365,6 @@ const sortingSymbolsArr: Array = [ const sortingSymbolsRegex = new RegExp(sortingSymbolsArr.join('|'), 'gi') const inlineRegexSymbolsArrEscapedForRegex: Array = [ - escapeRegexUnsafeCharacters(_1_InlineRegexSymbol_Dot), escapeRegexUnsafeCharacters(InlineRegexSymbol_Digit1), escapeRegexUnsafeCharacters(InlineRegexSymbol_Digit2), escapeRegexUnsafeCharacters(InlineRegexSymbol_0_to_3), @@ -380,7 +380,6 @@ interface RegexExpr { // Don't be confused if the source lexeme is equal to the resulting regex piece, logically these two distinct spaces const inlineRegexSymbolsToRegexExpressionsArr: { [key: string]: RegexExpr} = { - [_1_InlineRegexSymbol_Dot]: {regexExpr: '\\.'}, [InlineRegexSymbol_Digit1]: {regexExpr: '\\d'}, [InlineRegexSymbol_Digit2]: {regexExpr: '[0-9]'}, [InlineRegexSymbol_0_to_3]: {regexExpr: '[0-3]'}, @@ -1559,7 +1558,7 @@ export class SortingSpecProcessor { [Attribute.OrderUnspecified]: this.validateOrderAttrValue.bind(this) } - convertPlainStringSortingGroupSpecToArraySpec = (spec: string): Array => { + convertPlainStringSortingGroupSpecToArraySpec = (spec: string): Array => { spec = spec.trim() if (isThreeDots(spec)) { return [ThreeDots] @@ -1568,16 +1567,30 @@ export class SortingSpecProcessor { return [ThreeDots, spec.substring(ThreeDotsLength)]; } if (spec.endsWith(ThreeDots)) { - return [spec.substring(0, spec.length - ThreeDotsLength), ThreeDots]; + if (spec.endsWith(AmbigueFourDotsEscaper)) { + return [spec.substring(0, spec.length - AmbigueFourDotsEscaperLength + AmbigueFourDotsEscaperOverlap), ThreeDots]; + } else { + return [spec.substring(0, spec.length - ThreeDotsLength), ThreeDots]; + } } const idx = spec.indexOf(ThreeDots); + const idxOfAmbigueFourDotsEscaper = spec.indexOf(AmbigueFourDotsEscaper) if (idx > 0) { - return [ - spec.substring(0, idx), - ThreeDots, - spec.substring(idx + ThreeDotsLength) - ]; + if (idxOfAmbigueFourDotsEscaper >= 0 && + idxOfAmbigueFourDotsEscaper === idx - (AmbigueFourDotsEscaperLength - ThreeDotsLength) ) { + return [ + spec.substring(0, idxOfAmbigueFourDotsEscaper + AmbigueFourDotsEscaperOverlap), + ThreeDots, + spec.substring(idx + ThreeDotsLength) + ]; + } else { + return [ + spec.substring(0, idx), + ThreeDots, + spec.substring(idx + ThreeDotsLength) + ]; + } } // Unrecognized, treat as exact match