#126 - Allow escaping the . (dot) character to remove ambiguity with the ... wildcard

- naive implementation, the escape lexeme is \\DOT (problematic long term because of overlaps) - naive implementation, the escape is transformed to simple-regexp (technically this is not needed at all)
2024-01-25 17:09:30 +01:00 · 2024-01-25 17:09:30 +01:00 · f5fafc184f
parent c12ecb5c8c
commit f5fafc184f
2 changed files with 58 additions and 2 deletions
--- a/src/custom-sort/sorting-spec-processor.spec.ts
+++ b/src/custom-sort/sorting-spec-processor.spec.ts
@ -527,6 +527,57 @@ describe('SortingSpecProcessor', () => {
 	})
 })

+const txtInputThreeDotsCases: string = `
+target-folder: AAA
+...
+\\DOT...
+....
+...\\DOT
+\\DOT...\\DOT
+..\\DOT...
+//\\......\\.
+`
+
+const expectedSortSpecForThreeDotsCases: { [key: string]: CustomSortSpec } = {
+	"AAA": {
+		groups: [{
+			type: CustomSortGroupType.MatchAll
+		},{
+			regexPrefix: { regex: /^\./i },
+			type: CustomSortGroupType.ExactPrefix
+		},{
+			exactSuffix: '.',
+			type: CustomSortGroupType.ExactSuffix
+		},{
+			regexSuffix: { regex: /\.$/i },
+			type: CustomSortGroupType.ExactSuffix
+		},{
+			regexPrefix: { regex: /^\./i },
+			regexSuffix: { regex: /\.$/i },
+			type: CustomSortGroupType.ExactHeadAndTail
+		},{
+			regexPrefix: { regex: /^\.\.\./i },
+			type: CustomSortGroupType.ExactPrefix
+		},{
+			type: CustomSortGroupType.Outsiders
+		}],
+		outsidersGroupIdx: 6,
+		targetFoldersPaths: ['AAA']
+	}
+}
+
+describe('SortingSpecProcessor', () => {
+	let processor: SortingSpecProcessor;
+	beforeEach(() => {
+		processor = new SortingSpecProcessor();
+	});
+	it('should correctly handle some of three-dots scenarios', () => {
+		const inputTxtArr: Array<string> = txtInputThreeDotsCases.split('\n')
+		const result = processor.parseSortSpecFromText(inputTxtArr, 'mock-folder', 'custom-name-note.md')
+		expect(result?.sortSpecByPath).toEqual(expectedSortSpecForThreeDotsCases)
+	})
+})
+
 const txtInputTrueAlphabeticalSortAttr: string = `
 target-folder: True Alpha
 < true a-z
--- a/src/custom-sort/sorting-spec-processor.ts
+++ b/src/custom-sort/sorting-spec-processor.ts
@ -334,6 +334,9 @@ const CompoundNumberDashRegexSymbol: string = '\\-d+'  // Compound number with d
 const WordInASCIIRegexSymbol: string = '\\a+'
 const WordInAnyLanguageRegexSymbol: string = '\\A+'

+// _1_ prefix indicates a lexeme which includes another lexeme and thus has to become first-to-scan
+const _1_InlineRegexSymbol_Dot: string = '\\DOT'
+
 const InlineRegexSymbol_Digit1: string = '\\d'
 const InlineRegexSymbol_Digit2: string = '\\[0-9]'
 const InlineRegexSymbol_0_to_3: string = '\\[0-3]'
@ -361,11 +364,12 @@ const sortingSymbolsArr: Array<string> = [
 const sortingSymbolsRegex = new RegExp(sortingSymbolsArr.join('|'), 'gi')

 const inlineRegexSymbolsArrEscapedForRegex: Array<string> = [
+	escapeRegexUnsafeCharacters(_1_InlineRegexSymbol_Dot),
 	escapeRegexUnsafeCharacters(InlineRegexSymbol_Digit1),
 	escapeRegexUnsafeCharacters(InlineRegexSymbol_Digit2),
 	escapeRegexUnsafeCharacters(InlineRegexSymbol_0_to_3),
 	escapeRegexUnsafeCharacters(InlineRegexSymbol_CapitalLetter),
-	escapeRegexUnsafeCharacters(InlineRegexSymbol_LowercaseLetter)
+	escapeRegexUnsafeCharacters(InlineRegexSymbol_LowercaseLetter),
 ]

 interface RegexExpr {
@ -376,11 +380,12 @@ interface RegexExpr {

 // Don't be confused if the source lexeme is equal to the resulting regex piece, logically these two distinct spaces
 const inlineRegexSymbolsToRegexExpressionsArr: { [key: string]: RegexExpr} = {
+	[_1_InlineRegexSymbol_Dot]: {regexExpr: '\\.'},
 	[InlineRegexSymbol_Digit1]: {regexExpr: '\\d'},
 	[InlineRegexSymbol_Digit2]: {regexExpr: '[0-9]'},
 	[InlineRegexSymbol_0_to_3]: {regexExpr: '[0-3]'},
 	[InlineRegexSymbol_CapitalLetter]: {regexExpr: '[\\p{Lu}\\p{Lt}]', isUnicode: true, isCaseSensitive: true},
-	[InlineRegexSymbol_LowercaseLetter]: {regexExpr: '\\p{Ll}', isUnicode: true, isCaseSensitive: true}
+	[InlineRegexSymbol_LowercaseLetter]: {regexExpr: '\\p{Ll}', isUnicode: true, isCaseSensitive: true},
 }

 const inlineRegexSymbolsDetectionRegex = new RegExp(inlineRegexSymbolsArrEscapedForRegex.join('|'), 'gi')