From 88327f6314f72a86b27aee8afc243301454d7f14 Mon Sep 17 00:00:00 2001 From: SebastianMC <23032356+SebastianMC@users.noreply.github.com> Date: Fri, 24 Nov 2023 18:52:26 +0100 Subject: [PATCH] #115 - Add inline regex support to match capital letters and lowercase letters explicitly --- src/custom-sort/custom-sort.ts | 2 + .../sorting-spec-processor.spec.ts | 6 ++ src/custom-sort/sorting-spec-processor.ts | 59 ++++++++++++++----- 3 files changed, 53 insertions(+), 14 deletions(-) diff --git a/src/custom-sort/custom-sort.ts b/src/custom-sort/custom-sort.ts index 275f163..5c2557f 100644 --- a/src/custom-sort/custom-sort.ts +++ b/src/custom-sort/custom-sort.ts @@ -372,6 +372,7 @@ export const determineSortingGroup = function (entry: TFile | TFolder, spec: Cus switch (group.type) { case CustomSortGroupType.ExactPrefix: if (group.exactPrefix) { + console.log(`Exact prefix check`) if (nameForMatching.startsWith(group.exactPrefix)) { determined = true; } @@ -379,6 +380,7 @@ export const determineSortingGroup = function (entry: TFile | TFolder, spec: Cus const [matched, matchedGroup] = matchGroupRegex(group.regexPrefix!, nameForMatching) determined = matched derivedText = matchedGroup ?? derivedText + console.log(`Exact regexp prefix check ${group.regexPrefix?.regex?.toString()} vs. ${nameForMatching} = ${matched}`) } break; case CustomSortGroupType.ExactSuffix: diff --git a/src/custom-sort/sorting-spec-processor.spec.ts b/src/custom-sort/sorting-spec-processor.spec.ts index 8656ccd..d9b0ac0 100644 --- a/src/custom-sort/sorting-spec-processor.spec.ts +++ b/src/custom-sort/sorting-spec-processor.spec.ts @@ -2957,17 +2957,23 @@ describe('convertPlainStringWithNumericSortingSymbolToRegex', () => { it('should correctly include regex token for string end', () => { const input1 = 'Part\\-D+:' const input2 = ' \\[0-9]\\-D+' + const input3 = ' \\l\\[0-9]\\-D+' const result1 = convertPlainStringToRegex(input1, RegexpUsedAs.Suffix) const result2 = convertPlainStringToRegex(input2, RegexpUsedAs.Suffix) + const result3 = convertPlainStringToRegex(input3, RegexpUsedAs.Suffix) expect(result1?.regexpSpec.regex).toEqual(/Part *(\d+(?:-\d+)*):$/i) expect(result2?.regexpSpec.regex).toEqual(/ [0-9] *(\d+(?:-\d+)*)$/i) + expect(result3?.regexpSpec.regex).toEqual(/ \p{Ll}[0-9] *(\d+(?:-\d+)*)$/u) }) it('should correctly include regex token for string begin and end', () => { const input1 = 'Part\\.D+:' const input2 = ' \\d \\[0-9] ' + const input3 = ' \\d \\[0-9] \\C' const result1 = convertPlainStringToRegex(input1, RegexpUsedAs.FullMatch) const result2 = convertPlainStringToRegex(input2, RegexpUsedAs.FullMatch) + const result3 = convertPlainStringToRegex(input3, RegexpUsedAs.FullMatch) expect(result1?.regexpSpec.regex).toEqual(/^Part *(\d+(?:\.\d+)*):$/i) expect(result2?.regexpSpec.regex).toEqual(/^ \d [0-9] $/i) + expect(result3?.regexpSpec.regex).toEqual(/^ \d [0-9] [\p{Lu}\p{Lt}]$/u) }) }) diff --git a/src/custom-sort/sorting-spec-processor.ts b/src/custom-sort/sorting-spec-processor.ts index 8a41145..187bbfa 100644 --- a/src/custom-sort/sorting-spec-processor.ts +++ b/src/custom-sort/sorting-spec-processor.ts @@ -325,6 +325,9 @@ const InlineRegexSymbol_Digit1: string = '\\d' const InlineRegexSymbol_Digit2: string = '\\[0-9]' const InlineRegexSymbol_0_to_3: string = '\\[0-3]' +const InlineRegexSymbol_CapitalLetter: string = '\\C' +const InlineRegexSymbol_LowercaseLetter: string = '\\l' + const UnsafeRegexCharsRegex: RegExp = /[\^$.\-+\[\]{}()|*?=!\\]/g export const escapeRegexUnsafeCharacters = (s: string): string => { @@ -347,14 +350,24 @@ const sortingSymbolsRegex = new RegExp(sortingSymbolsArr.join('|'), 'gi') const inlineRegexSymbolsArrEscapedForRegex: Array = [ escapeRegexUnsafeCharacters(InlineRegexSymbol_Digit1), escapeRegexUnsafeCharacters(InlineRegexSymbol_Digit2), - escapeRegexUnsafeCharacters(InlineRegexSymbol_0_to_3) + escapeRegexUnsafeCharacters(InlineRegexSymbol_0_to_3), + escapeRegexUnsafeCharacters(InlineRegexSymbol_CapitalLetter), + escapeRegexUnsafeCharacters(InlineRegexSymbol_LowercaseLetter) ] +interface RegexExpr { + regexExpr: string + isUnicode?: boolean + isCaseSensitive?: boolean +} + // Don't be confused if the source lexeme is equal to the resulting regex piece, logically these two distinct spaces -const inlineRegexSymbolsToRegexExpressionsArr: { [key: string]: string} = { - [InlineRegexSymbol_Digit1]: '\\d', - [InlineRegexSymbol_Digit2]: '[0-9]', - [InlineRegexSymbol_0_to_3]: '[0-3]', +const inlineRegexSymbolsToRegexExpressionsArr: { [key: string]: RegexExpr} = { + [InlineRegexSymbol_Digit1]: {regexExpr: '\\d'}, + [InlineRegexSymbol_Digit2]: {regexExpr: '[0-9]'}, + [InlineRegexSymbol_0_to_3]: {regexExpr: '[0-3]'}, + [InlineRegexSymbol_CapitalLetter]: {regexExpr: '[\\p{Lu}\\p{Lt}]', isUnicode: true, isCaseSensitive: true}, + [InlineRegexSymbol_LowercaseLetter]: {regexExpr: '\\p{Ll}', isUnicode: true, isCaseSensitive: true} } const inlineRegexSymbolsDetectionRegex = new RegExp(inlineRegexSymbolsArrEscapedForRegex.join('|'), 'gi') @@ -500,12 +513,14 @@ export const convertPlainStringToRegex = (s: string, actAs: RegexpUsedAs): Regex const [extractedPrefix, extractedSuffix] = s!.split(detectedSymbol) const regexPrefix: string = regexMatchesStart ? '^' : '' const regexSuffix: string = regexMatchesEnding ? '$' : '' - const escapedProcessedPrefix: string = convertInlineRegexSymbolsAndEscapeTheRest(extractedPrefix) - const escapedProcessedSuffix: string = convertInlineRegexSymbolsAndEscapeTheRest(extractedSuffix) - const regexFlags: string = replacement.unicodeRegex ? 'ui' : 'i' + const escapedProcessedPrefix: RegexAsString = convertInlineRegexSymbolsAndEscapeTheRest(extractedPrefix) + const escapedProcessedSuffix: RegexAsString = convertInlineRegexSymbolsAndEscapeTheRest(extractedSuffix) + const regexUnicode: boolean = !!replacement.unicodeRegex || !!escapedProcessedPrefix.isUnicodeRegex || !!escapedProcessedSuffix.isUnicodeRegex + const regexCaseSensitive: boolean = !!escapedProcessedPrefix.isCaseSensitiveRegex || !!escapedProcessedSuffix.isCaseSensitiveRegex + const regexFlags: string = `${regexUnicode?'u':''}${regexCaseSensitive?'':'i'}` return { regexpSpec: { - regex: new RegExp(`${regexPrefix}${escapedProcessedPrefix}${replacement.regexpStr}${escapedProcessedSuffix}${regexSuffix}`, regexFlags), + regex: new RegExp(`${regexPrefix}${escapedProcessedPrefix.s}${replacement.regexpStr}${escapedProcessedSuffix.s}${regexSuffix}`, regexFlags), normalizerFn: replacement.normalizerFn }, prefix: extractedPrefix, @@ -516,9 +531,10 @@ export const convertPlainStringToRegex = (s: string, actAs: RegexpUsedAs): Regex const replacement: RegexAsString = convertInlineRegexSymbolsAndEscapeTheRest(s)! const regexPrefix: string = regexMatchesStart ? '^' : '' const regexSuffix: string = regexMatchesEnding ? '$' : '' + const regexFlags: string = `${replacement.isUnicodeRegex?'u':''}${replacement.isCaseSensitiveRegex?'':'i'}` return { regexpSpec: { - regex: new RegExp(`${regexPrefix}${replacement}${regexSuffix}`, 'i') + regex: new RegExp(`${regexPrefix}${replacement.s}${regexSuffix}`, regexFlags) }, prefix: '', // shouldn't be used anyway because of the below containsAdvancedRegex: false suffix: '', // ---- // ---- @@ -529,14 +545,22 @@ export const convertPlainStringToRegex = (s: string, actAs: RegexpUsedAs): Regex } } -type RegexAsString = string +export interface RegexAsString { + s: string + isUnicodeRegex?: boolean + isCaseSensitiveRegex?: boolean +} export const convertInlineRegexSymbolsAndEscapeTheRest = (s: string): RegexAsString => { if (s === '') { - return s + return { + s: s + } } let regexAsString: Array = [] + let isUnicode: boolean = false + let isCaseSensitive: boolean = false while (s!.length > 0) { // detect the first inline regex @@ -562,7 +586,10 @@ export const convertInlineRegexSymbolsAndEscapeTheRest = (s: string): RegexAsStr regexAsString.push(escapeRegexUnsafeCharacters(charsBeforeRegexSymbol)) s = s!.substring(earliestRegexSymbolIdx) } - regexAsString.push(inlineRegexSymbolsToRegexExpressionsArr[earliestRegexSymbol!]) + const expr = inlineRegexSymbolsToRegexExpressionsArr[earliestRegexSymbol!] + regexAsString.push(expr.regexExpr) + isUnicode ||= !!expr.isUnicode + isCaseSensitive ||= !!expr.isCaseSensitive s = s!.substring(earliestRegexSymbol!.length) } else { regexAsString.push(escapeRegexUnsafeCharacters(s)) @@ -570,7 +597,11 @@ export const convertInlineRegexSymbolsAndEscapeTheRest = (s: string): RegexAsStr } } - return regexAsString.join('') + return { + s: regexAsString.join(''), + isUnicodeRegex: isUnicode, + isCaseSensitiveRegex: isCaseSensitive + } } export const MatchFolderNameLexeme: string = 'name:'