#115 - Add inline regex support to match capital letters and lowercase letters explicitly

This commit is contained in:
SebastianMC 2023-11-24 18:52:26 +01:00
parent 39cddc301a
commit 88327f6314
3 changed files with 53 additions and 14 deletions

View File

@ -372,6 +372,7 @@ export const determineSortingGroup = function (entry: TFile | TFolder, spec: Cus
switch (group.type) {
case CustomSortGroupType.ExactPrefix:
if (group.exactPrefix) {
console.log(`Exact prefix check`)
if (nameForMatching.startsWith(group.exactPrefix)) {
determined = true;
}
@ -379,6 +380,7 @@ export const determineSortingGroup = function (entry: TFile | TFolder, spec: Cus
const [matched, matchedGroup] = matchGroupRegex(group.regexPrefix!, nameForMatching)
determined = matched
derivedText = matchedGroup ?? derivedText
console.log(`Exact regexp prefix check ${group.regexPrefix?.regex?.toString()} vs. ${nameForMatching} = ${matched}`)
}
break;
case CustomSortGroupType.ExactSuffix:

View File

@ -2957,17 +2957,23 @@ describe('convertPlainStringWithNumericSortingSymbolToRegex', () => {
it('should correctly include regex token for string end', () => {
const input1 = 'Part\\-D+:'
const input2 = ' \\[0-9]\\-D+'
const input3 = ' \\l\\[0-9]\\-D+'
const result1 = convertPlainStringToRegex(input1, RegexpUsedAs.Suffix)
const result2 = convertPlainStringToRegex(input2, RegexpUsedAs.Suffix)
const result3 = convertPlainStringToRegex(input3, RegexpUsedAs.Suffix)
expect(result1?.regexpSpec.regex).toEqual(/Part *(\d+(?:-\d+)*):$/i)
expect(result2?.regexpSpec.regex).toEqual(/ [0-9] *(\d+(?:-\d+)*)$/i)
expect(result3?.regexpSpec.regex).toEqual(/ \p{Ll}[0-9] *(\d+(?:-\d+)*)$/u)
})
it('should correctly include regex token for string begin and end', () => {
const input1 = 'Part\\.D+:'
const input2 = ' \\d \\[0-9] '
const input3 = ' \\d \\[0-9] \\C'
const result1 = convertPlainStringToRegex(input1, RegexpUsedAs.FullMatch)
const result2 = convertPlainStringToRegex(input2, RegexpUsedAs.FullMatch)
const result3 = convertPlainStringToRegex(input3, RegexpUsedAs.FullMatch)
expect(result1?.regexpSpec.regex).toEqual(/^Part *(\d+(?:\.\d+)*):$/i)
expect(result2?.regexpSpec.regex).toEqual(/^ \d [0-9] $/i)
expect(result3?.regexpSpec.regex).toEqual(/^ \d [0-9] [\p{Lu}\p{Lt}]$/u)
})
})

View File

@ -325,6 +325,9 @@ const InlineRegexSymbol_Digit1: string = '\\d'
const InlineRegexSymbol_Digit2: string = '\\[0-9]'
const InlineRegexSymbol_0_to_3: string = '\\[0-3]'
const InlineRegexSymbol_CapitalLetter: string = '\\C'
const InlineRegexSymbol_LowercaseLetter: string = '\\l'
const UnsafeRegexCharsRegex: RegExp = /[\^$.\-+\[\]{}()|*?=!\\]/g
export const escapeRegexUnsafeCharacters = (s: string): string => {
@ -347,14 +350,24 @@ const sortingSymbolsRegex = new RegExp(sortingSymbolsArr.join('|'), 'gi')
const inlineRegexSymbolsArrEscapedForRegex: Array<string> = [
escapeRegexUnsafeCharacters(InlineRegexSymbol_Digit1),
escapeRegexUnsafeCharacters(InlineRegexSymbol_Digit2),
escapeRegexUnsafeCharacters(InlineRegexSymbol_0_to_3)
escapeRegexUnsafeCharacters(InlineRegexSymbol_0_to_3),
escapeRegexUnsafeCharacters(InlineRegexSymbol_CapitalLetter),
escapeRegexUnsafeCharacters(InlineRegexSymbol_LowercaseLetter)
]
interface RegexExpr {
regexExpr: string
isUnicode?: boolean
isCaseSensitive?: boolean
}
// Don't be confused if the source lexeme is equal to the resulting regex piece, logically these two distinct spaces
const inlineRegexSymbolsToRegexExpressionsArr: { [key: string]: string} = {
[InlineRegexSymbol_Digit1]: '\\d',
[InlineRegexSymbol_Digit2]: '[0-9]',
[InlineRegexSymbol_0_to_3]: '[0-3]',
const inlineRegexSymbolsToRegexExpressionsArr: { [key: string]: RegexExpr} = {
[InlineRegexSymbol_Digit1]: {regexExpr: '\\d'},
[InlineRegexSymbol_Digit2]: {regexExpr: '[0-9]'},
[InlineRegexSymbol_0_to_3]: {regexExpr: '[0-3]'},
[InlineRegexSymbol_CapitalLetter]: {regexExpr: '[\\p{Lu}\\p{Lt}]', isUnicode: true, isCaseSensitive: true},
[InlineRegexSymbol_LowercaseLetter]: {regexExpr: '\\p{Ll}', isUnicode: true, isCaseSensitive: true}
}
const inlineRegexSymbolsDetectionRegex = new RegExp(inlineRegexSymbolsArrEscapedForRegex.join('|'), 'gi')
@ -500,12 +513,14 @@ export const convertPlainStringToRegex = (s: string, actAs: RegexpUsedAs): Regex
const [extractedPrefix, extractedSuffix] = s!.split(detectedSymbol)
const regexPrefix: string = regexMatchesStart ? '^' : ''
const regexSuffix: string = regexMatchesEnding ? '$' : ''
const escapedProcessedPrefix: string = convertInlineRegexSymbolsAndEscapeTheRest(extractedPrefix)
const escapedProcessedSuffix: string = convertInlineRegexSymbolsAndEscapeTheRest(extractedSuffix)
const regexFlags: string = replacement.unicodeRegex ? 'ui' : 'i'
const escapedProcessedPrefix: RegexAsString = convertInlineRegexSymbolsAndEscapeTheRest(extractedPrefix)
const escapedProcessedSuffix: RegexAsString = convertInlineRegexSymbolsAndEscapeTheRest(extractedSuffix)
const regexUnicode: boolean = !!replacement.unicodeRegex || !!escapedProcessedPrefix.isUnicodeRegex || !!escapedProcessedSuffix.isUnicodeRegex
const regexCaseSensitive: boolean = !!escapedProcessedPrefix.isCaseSensitiveRegex || !!escapedProcessedSuffix.isCaseSensitiveRegex
const regexFlags: string = `${regexUnicode?'u':''}${regexCaseSensitive?'':'i'}`
return {
regexpSpec: {
regex: new RegExp(`${regexPrefix}${escapedProcessedPrefix}${replacement.regexpStr}${escapedProcessedSuffix}${regexSuffix}`, regexFlags),
regex: new RegExp(`${regexPrefix}${escapedProcessedPrefix.s}${replacement.regexpStr}${escapedProcessedSuffix.s}${regexSuffix}`, regexFlags),
normalizerFn: replacement.normalizerFn
},
prefix: extractedPrefix,
@ -516,9 +531,10 @@ export const convertPlainStringToRegex = (s: string, actAs: RegexpUsedAs): Regex
const replacement: RegexAsString = convertInlineRegexSymbolsAndEscapeTheRest(s)!
const regexPrefix: string = regexMatchesStart ? '^' : ''
const regexSuffix: string = regexMatchesEnding ? '$' : ''
const regexFlags: string = `${replacement.isUnicodeRegex?'u':''}${replacement.isCaseSensitiveRegex?'':'i'}`
return {
regexpSpec: {
regex: new RegExp(`${regexPrefix}${replacement}${regexSuffix}`, 'i')
regex: new RegExp(`${regexPrefix}${replacement.s}${regexSuffix}`, regexFlags)
},
prefix: '', // shouldn't be used anyway because of the below containsAdvancedRegex: false
suffix: '', // ---- // ----
@ -529,14 +545,22 @@ export const convertPlainStringToRegex = (s: string, actAs: RegexpUsedAs): Regex
}
}
type RegexAsString = string
export interface RegexAsString {
s: string
isUnicodeRegex?: boolean
isCaseSensitiveRegex?: boolean
}
export const convertInlineRegexSymbolsAndEscapeTheRest = (s: string): RegexAsString => {
if (s === '') {
return s
return {
s: s
}
}
let regexAsString: Array<string> = []
let isUnicode: boolean = false
let isCaseSensitive: boolean = false
while (s!.length > 0) {
// detect the first inline regex
@ -562,7 +586,10 @@ export const convertInlineRegexSymbolsAndEscapeTheRest = (s: string): RegexAsStr
regexAsString.push(escapeRegexUnsafeCharacters(charsBeforeRegexSymbol))
s = s!.substring(earliestRegexSymbolIdx)
}
regexAsString.push(inlineRegexSymbolsToRegexExpressionsArr[earliestRegexSymbol!])
const expr = inlineRegexSymbolsToRegexExpressionsArr[earliestRegexSymbol!]
regexAsString.push(expr.regexExpr)
isUnicode ||= !!expr.isUnicode
isCaseSensitive ||= !!expr.isCaseSensitive
s = s!.substring(earliestRegexSymbol!.length)
} else {
regexAsString.push(escapeRegexUnsafeCharacters(s))
@ -570,7 +597,11 @@ export const convertInlineRegexSymbolsAndEscapeTheRest = (s: string): RegexAsStr
}
}
return regexAsString.join('')
return {
s: regexAsString.join(''),
isUnicodeRegex: isUnicode,
isCaseSensitiveRegex: isCaseSensitive
}
}
export const MatchFolderNameLexeme: string = 'name:'