#115 - Add inline regex support to match capital letters and lowercase letters explicitly

This commit is contained in:
SebastianMC 2023-11-24 18:52:26 +01:00
parent 39cddc301a
commit 88327f6314
3 changed files with 53 additions and 14 deletions

View File

@ -372,6 +372,7 @@ export const determineSortingGroup = function (entry: TFile | TFolder, spec: Cus
switch (group.type) { switch (group.type) {
case CustomSortGroupType.ExactPrefix: case CustomSortGroupType.ExactPrefix:
if (group.exactPrefix) { if (group.exactPrefix) {
console.log(`Exact prefix check`)
if (nameForMatching.startsWith(group.exactPrefix)) { if (nameForMatching.startsWith(group.exactPrefix)) {
determined = true; determined = true;
} }
@ -379,6 +380,7 @@ export const determineSortingGroup = function (entry: TFile | TFolder, spec: Cus
const [matched, matchedGroup] = matchGroupRegex(group.regexPrefix!, nameForMatching) const [matched, matchedGroup] = matchGroupRegex(group.regexPrefix!, nameForMatching)
determined = matched determined = matched
derivedText = matchedGroup ?? derivedText derivedText = matchedGroup ?? derivedText
console.log(`Exact regexp prefix check ${group.regexPrefix?.regex?.toString()} vs. ${nameForMatching} = ${matched}`)
} }
break; break;
case CustomSortGroupType.ExactSuffix: case CustomSortGroupType.ExactSuffix:

View File

@ -2957,17 +2957,23 @@ describe('convertPlainStringWithNumericSortingSymbolToRegex', () => {
it('should correctly include regex token for string end', () => { it('should correctly include regex token for string end', () => {
const input1 = 'Part\\-D+:' const input1 = 'Part\\-D+:'
const input2 = ' \\[0-9]\\-D+' const input2 = ' \\[0-9]\\-D+'
const input3 = ' \\l\\[0-9]\\-D+'
const result1 = convertPlainStringToRegex(input1, RegexpUsedAs.Suffix) const result1 = convertPlainStringToRegex(input1, RegexpUsedAs.Suffix)
const result2 = convertPlainStringToRegex(input2, RegexpUsedAs.Suffix) const result2 = convertPlainStringToRegex(input2, RegexpUsedAs.Suffix)
const result3 = convertPlainStringToRegex(input3, RegexpUsedAs.Suffix)
expect(result1?.regexpSpec.regex).toEqual(/Part *(\d+(?:-\d+)*):$/i) expect(result1?.regexpSpec.regex).toEqual(/Part *(\d+(?:-\d+)*):$/i)
expect(result2?.regexpSpec.regex).toEqual(/ [0-9] *(\d+(?:-\d+)*)$/i) expect(result2?.regexpSpec.regex).toEqual(/ [0-9] *(\d+(?:-\d+)*)$/i)
expect(result3?.regexpSpec.regex).toEqual(/ \p{Ll}[0-9] *(\d+(?:-\d+)*)$/u)
}) })
it('should correctly include regex token for string begin and end', () => { it('should correctly include regex token for string begin and end', () => {
const input1 = 'Part\\.D+:' const input1 = 'Part\\.D+:'
const input2 = ' \\d \\[0-9] ' const input2 = ' \\d \\[0-9] '
const input3 = ' \\d \\[0-9] \\C'
const result1 = convertPlainStringToRegex(input1, RegexpUsedAs.FullMatch) const result1 = convertPlainStringToRegex(input1, RegexpUsedAs.FullMatch)
const result2 = convertPlainStringToRegex(input2, RegexpUsedAs.FullMatch) const result2 = convertPlainStringToRegex(input2, RegexpUsedAs.FullMatch)
const result3 = convertPlainStringToRegex(input3, RegexpUsedAs.FullMatch)
expect(result1?.regexpSpec.regex).toEqual(/^Part *(\d+(?:\.\d+)*):$/i) expect(result1?.regexpSpec.regex).toEqual(/^Part *(\d+(?:\.\d+)*):$/i)
expect(result2?.regexpSpec.regex).toEqual(/^ \d [0-9] $/i) expect(result2?.regexpSpec.regex).toEqual(/^ \d [0-9] $/i)
expect(result3?.regexpSpec.regex).toEqual(/^ \d [0-9] [\p{Lu}\p{Lt}]$/u)
}) })
}) })

View File

@ -325,6 +325,9 @@ const InlineRegexSymbol_Digit1: string = '\\d'
const InlineRegexSymbol_Digit2: string = '\\[0-9]' const InlineRegexSymbol_Digit2: string = '\\[0-9]'
const InlineRegexSymbol_0_to_3: string = '\\[0-3]' const InlineRegexSymbol_0_to_3: string = '\\[0-3]'
const InlineRegexSymbol_CapitalLetter: string = '\\C'
const InlineRegexSymbol_LowercaseLetter: string = '\\l'
const UnsafeRegexCharsRegex: RegExp = /[\^$.\-+\[\]{}()|*?=!\\]/g const UnsafeRegexCharsRegex: RegExp = /[\^$.\-+\[\]{}()|*?=!\\]/g
export const escapeRegexUnsafeCharacters = (s: string): string => { export const escapeRegexUnsafeCharacters = (s: string): string => {
@ -347,14 +350,24 @@ const sortingSymbolsRegex = new RegExp(sortingSymbolsArr.join('|'), 'gi')
const inlineRegexSymbolsArrEscapedForRegex: Array<string> = [ const inlineRegexSymbolsArrEscapedForRegex: Array<string> = [
escapeRegexUnsafeCharacters(InlineRegexSymbol_Digit1), escapeRegexUnsafeCharacters(InlineRegexSymbol_Digit1),
escapeRegexUnsafeCharacters(InlineRegexSymbol_Digit2), escapeRegexUnsafeCharacters(InlineRegexSymbol_Digit2),
escapeRegexUnsafeCharacters(InlineRegexSymbol_0_to_3) escapeRegexUnsafeCharacters(InlineRegexSymbol_0_to_3),
escapeRegexUnsafeCharacters(InlineRegexSymbol_CapitalLetter),
escapeRegexUnsafeCharacters(InlineRegexSymbol_LowercaseLetter)
] ]
interface RegexExpr {
regexExpr: string
isUnicode?: boolean
isCaseSensitive?: boolean
}
// Don't be confused if the source lexeme is equal to the resulting regex piece, logically these two distinct spaces // Don't be confused if the source lexeme is equal to the resulting regex piece, logically these two distinct spaces
const inlineRegexSymbolsToRegexExpressionsArr: { [key: string]: string} = { const inlineRegexSymbolsToRegexExpressionsArr: { [key: string]: RegexExpr} = {
[InlineRegexSymbol_Digit1]: '\\d', [InlineRegexSymbol_Digit1]: {regexExpr: '\\d'},
[InlineRegexSymbol_Digit2]: '[0-9]', [InlineRegexSymbol_Digit2]: {regexExpr: '[0-9]'},
[InlineRegexSymbol_0_to_3]: '[0-3]', [InlineRegexSymbol_0_to_3]: {regexExpr: '[0-3]'},
[InlineRegexSymbol_CapitalLetter]: {regexExpr: '[\\p{Lu}\\p{Lt}]', isUnicode: true, isCaseSensitive: true},
[InlineRegexSymbol_LowercaseLetter]: {regexExpr: '\\p{Ll}', isUnicode: true, isCaseSensitive: true}
} }
const inlineRegexSymbolsDetectionRegex = new RegExp(inlineRegexSymbolsArrEscapedForRegex.join('|'), 'gi') const inlineRegexSymbolsDetectionRegex = new RegExp(inlineRegexSymbolsArrEscapedForRegex.join('|'), 'gi')
@ -500,12 +513,14 @@ export const convertPlainStringToRegex = (s: string, actAs: RegexpUsedAs): Regex
const [extractedPrefix, extractedSuffix] = s!.split(detectedSymbol) const [extractedPrefix, extractedSuffix] = s!.split(detectedSymbol)
const regexPrefix: string = regexMatchesStart ? '^' : '' const regexPrefix: string = regexMatchesStart ? '^' : ''
const regexSuffix: string = regexMatchesEnding ? '$' : '' const regexSuffix: string = regexMatchesEnding ? '$' : ''
const escapedProcessedPrefix: string = convertInlineRegexSymbolsAndEscapeTheRest(extractedPrefix) const escapedProcessedPrefix: RegexAsString = convertInlineRegexSymbolsAndEscapeTheRest(extractedPrefix)
const escapedProcessedSuffix: string = convertInlineRegexSymbolsAndEscapeTheRest(extractedSuffix) const escapedProcessedSuffix: RegexAsString = convertInlineRegexSymbolsAndEscapeTheRest(extractedSuffix)
const regexFlags: string = replacement.unicodeRegex ? 'ui' : 'i' const regexUnicode: boolean = !!replacement.unicodeRegex || !!escapedProcessedPrefix.isUnicodeRegex || !!escapedProcessedSuffix.isUnicodeRegex
const regexCaseSensitive: boolean = !!escapedProcessedPrefix.isCaseSensitiveRegex || !!escapedProcessedSuffix.isCaseSensitiveRegex
const regexFlags: string = `${regexUnicode?'u':''}${regexCaseSensitive?'':'i'}`
return { return {
regexpSpec: { regexpSpec: {
regex: new RegExp(`${regexPrefix}${escapedProcessedPrefix}${replacement.regexpStr}${escapedProcessedSuffix}${regexSuffix}`, regexFlags), regex: new RegExp(`${regexPrefix}${escapedProcessedPrefix.s}${replacement.regexpStr}${escapedProcessedSuffix.s}${regexSuffix}`, regexFlags),
normalizerFn: replacement.normalizerFn normalizerFn: replacement.normalizerFn
}, },
prefix: extractedPrefix, prefix: extractedPrefix,
@ -516,9 +531,10 @@ export const convertPlainStringToRegex = (s: string, actAs: RegexpUsedAs): Regex
const replacement: RegexAsString = convertInlineRegexSymbolsAndEscapeTheRest(s)! const replacement: RegexAsString = convertInlineRegexSymbolsAndEscapeTheRest(s)!
const regexPrefix: string = regexMatchesStart ? '^' : '' const regexPrefix: string = regexMatchesStart ? '^' : ''
const regexSuffix: string = regexMatchesEnding ? '$' : '' const regexSuffix: string = regexMatchesEnding ? '$' : ''
const regexFlags: string = `${replacement.isUnicodeRegex?'u':''}${replacement.isCaseSensitiveRegex?'':'i'}`
return { return {
regexpSpec: { regexpSpec: {
regex: new RegExp(`${regexPrefix}${replacement}${regexSuffix}`, 'i') regex: new RegExp(`${regexPrefix}${replacement.s}${regexSuffix}`, regexFlags)
}, },
prefix: '', // shouldn't be used anyway because of the below containsAdvancedRegex: false prefix: '', // shouldn't be used anyway because of the below containsAdvancedRegex: false
suffix: '', // ---- // ---- suffix: '', // ---- // ----
@ -529,14 +545,22 @@ export const convertPlainStringToRegex = (s: string, actAs: RegexpUsedAs): Regex
} }
} }
type RegexAsString = string export interface RegexAsString {
s: string
isUnicodeRegex?: boolean
isCaseSensitiveRegex?: boolean
}
export const convertInlineRegexSymbolsAndEscapeTheRest = (s: string): RegexAsString => { export const convertInlineRegexSymbolsAndEscapeTheRest = (s: string): RegexAsString => {
if (s === '') { if (s === '') {
return s return {
s: s
}
} }
let regexAsString: Array<string> = [] let regexAsString: Array<string> = []
let isUnicode: boolean = false
let isCaseSensitive: boolean = false
while (s!.length > 0) { while (s!.length > 0) {
// detect the first inline regex // detect the first inline regex
@ -562,7 +586,10 @@ export const convertInlineRegexSymbolsAndEscapeTheRest = (s: string): RegexAsStr
regexAsString.push(escapeRegexUnsafeCharacters(charsBeforeRegexSymbol)) regexAsString.push(escapeRegexUnsafeCharacters(charsBeforeRegexSymbol))
s = s!.substring(earliestRegexSymbolIdx) s = s!.substring(earliestRegexSymbolIdx)
} }
regexAsString.push(inlineRegexSymbolsToRegexExpressionsArr[earliestRegexSymbol!]) const expr = inlineRegexSymbolsToRegexExpressionsArr[earliestRegexSymbol!]
regexAsString.push(expr.regexExpr)
isUnicode ||= !!expr.isUnicode
isCaseSensitive ||= !!expr.isCaseSensitive
s = s!.substring(earliestRegexSymbol!.length) s = s!.substring(earliestRegexSymbol!.length)
} else { } else {
regexAsString.push(escapeRegexUnsafeCharacters(s)) regexAsString.push(escapeRegexUnsafeCharacters(s))
@ -570,7 +597,11 @@ export const convertInlineRegexSymbolsAndEscapeTheRest = (s: string): RegexAsStr
} }
} }
return regexAsString.join('') return {
s: regexAsString.join(''),
isUnicodeRegex: isUnicode,
isCaseSensitiveRegex: isCaseSensitive
}
} }
export const MatchFolderNameLexeme: string = 'name:' export const MatchFolderNameLexeme: string = 'name:'