From e788c92543aa4d76da58673bd196e80771716eb4 Mon Sep 17 00:00:00 2001 From: SebastianMC <23032356+SebastianMC@users.noreply.github.com> Date: Tue, 7 Mar 2023 19:01:44 +0100 Subject: [PATCH] #67 - feature alphabetic wildcard - sorting symbol \a+ for ASCII word - sorting symbol \A+ for any modern language word (involved advanced unicode regexp) --- src/custom-sort/custom-sort-types.ts | 1 + src/custom-sort/matchers.spec.ts | 109 ++++++++++++++++-- src/custom-sort/matchers.ts | 27 +++-- .../sorting-spec-processor.spec.ts | 52 ++++++--- src/custom-sort/sorting-spec-processor.ts | 89 ++++++++------ 5 files changed, 203 insertions(+), 75 deletions(-) diff --git a/src/custom-sort/custom-sort-types.ts b/src/custom-sort/custom-sort-types.ts index 0612b00..9a88b7f 100644 --- a/src/custom-sort/custom-sort-types.ts +++ b/src/custom-sort/custom-sort-types.ts @@ -40,6 +40,7 @@ export interface RecognizedOrderValue { } export type NormalizerFn = (s: string) => string | null +export const IdentityNormalizerFn: NormalizerFn = (s: string) => s export interface RegExpSpec { regex: RegExp diff --git a/src/custom-sort/matchers.spec.ts b/src/custom-sort/matchers.spec.ts index a9fffae..823c0f8 100644 --- a/src/custom-sort/matchers.spec.ts +++ b/src/custom-sort/matchers.spec.ts @@ -3,15 +3,22 @@ import { getNormalizedRomanNumber, prependWithZeros, romanToIntStr, - NumberRegex, - CompoundNumberDotRegex, - CompoundNumberDashRegex, - RomanNumberRegex, - CompoundRomanNumberDotRegex, - CompoundRomanNumberDashRegex + NumberRegexStr, + CompoundNumberDotRegexStr, + CompoundNumberDashRegexStr, + RomanNumberRegexStr, + CompoundRomanNumberDotRegexStr, + CompoundRomanNumberDashRegexStr, + WordInASCIIRegexStr, + WordInAnyLanguageRegexStr } from "./matchers"; +import {SortingSpecProcessor} from "./sorting-spec-processor"; describe('Plain numbers regexp', () => { + let regexp: RegExp; + beforeEach(() => { + regexp = new RegExp('^' + NumberRegexStr, 'i'); + }); it.each([ ['', null], [' ', null], @@ -23,7 +30,7 @@ describe('Plain numbers regexp', () => { ['9', '9'], ['7328964783268794325496783', '7328964783268794325496783'] ])('%s => %s', (s: string, out: string | null) => { - const match: RegExpMatchArray | null = s.match(NumberRegex) + const match: RegExpMatchArray | null = s.match(regexp) if (out) { expect(match).not.toBeNull() expect(match?.[1]).toBe(out) @@ -34,6 +41,10 @@ describe('Plain numbers regexp', () => { }) describe('Plain compound numbers regexp (dot)', () => { + let regexp: RegExp; + beforeEach(() => { + regexp = new RegExp('^' + CompoundNumberDotRegexStr, 'i'); + }); it.each([ ['', null], [' ', null], @@ -55,7 +66,7 @@ describe('Plain compound numbers regexp (dot)', () => { ['56.78.-.1abc', '56.78'], ['56.78-.1abc', '56.78'], ])('%s => %s', (s: string, out: string | null) => { - const match: RegExpMatchArray | null = s.match(CompoundNumberDotRegex) + const match: RegExpMatchArray | null = s.match(regexp) if (out) { expect(match).not.toBeNull() expect(match?.[1]).toBe(out) @@ -66,6 +77,10 @@ describe('Plain compound numbers regexp (dot)', () => { }) describe('Plain compound numbers regexp (dash)', () => { + let regexp: RegExp; + beforeEach(() => { + regexp = new RegExp('^' + CompoundNumberDashRegexStr, 'i'); + }); it.each([ ['', null], [' ', null], @@ -87,7 +102,7 @@ describe('Plain compound numbers regexp (dash)', () => { ['56-78-.-1abc', '56-78'], ['56-78.-1abc', '56-78'], ])('%s => %s', (s: string, out: string | null) => { - const match: RegExpMatchArray | null = s.match(CompoundNumberDashRegex) + const match: RegExpMatchArray | null = s.match(regexp) if (out) { expect(match).not.toBeNull() expect(match?.[1]).toBe(out) @@ -98,6 +113,10 @@ describe('Plain compound numbers regexp (dash)', () => { }) describe('Plain Roman numbers regexp', () => { + let regexp: RegExp; + beforeEach(() => { + regexp = new RegExp('^' + RomanNumberRegexStr, 'i'); + }); it.each([ ['', null], [' ', null], @@ -109,7 +128,7 @@ describe('Plain Roman numbers regexp', () => { ['iiiii', 'iiiii'], ['viviviv794325496783', 'viviviv'] ])('%s => %s', (s: string, out: string | null) => { - const match: RegExpMatchArray | null = s.match(RomanNumberRegex) + const match: RegExpMatchArray | null = s.match(regexp) if (out) { expect(match).not.toBeNull() expect(match?.[1]).toBe(out) @@ -120,6 +139,10 @@ describe('Plain Roman numbers regexp', () => { }) describe('Roman compound numbers regexp (dot)', () => { + let regexp: RegExp; + beforeEach(() => { + regexp = new RegExp('^' + CompoundRomanNumberDotRegexStr, 'i'); + }); it.each([ ['', null], [' ', null], @@ -143,7 +166,7 @@ describe('Roman compound numbers regexp (dot)', () => { ['xvx.d-.iabc', 'xvx.d'], ['xvx.d..iabc', 'xvx.d'], ])('%s => %s', (s: string, out: string | null) => { - const match: RegExpMatchArray | null = s.match(CompoundRomanNumberDotRegex) + const match: RegExpMatchArray | null = s.match(regexp) if (out) { expect(match).not.toBeNull() expect(match?.[1]).toBe(out) @@ -154,6 +177,10 @@ describe('Roman compound numbers regexp (dot)', () => { }) describe('Roman compound numbers regexp (dash)', () => { + let regexp: RegExp; + beforeEach(() => { + regexp = new RegExp('^' + CompoundRomanNumberDashRegexStr, 'i'); + }); it.each([ ['', null], [' ', null], @@ -177,7 +204,65 @@ describe('Roman compound numbers regexp (dash)', () => { ['xvx-d.-iabc', 'xvx-d'], ['xvx-d--iabc', 'xvx-d'] ])('%s => %s', (s: string, out: string | null) => { - const match: RegExpMatchArray | null = s.match(CompoundRomanNumberDashRegex) + const match: RegExpMatchArray | null = s.match(regexp) + if (out) { + expect(match).not.toBeNull() + expect(match?.[1]).toBe(out) + } else { + expect(match).toBeNull() + } + }) +}) + +describe('ASCII word regexp', () => { + let regexp: RegExp; + beforeEach(() => { + regexp = new RegExp('^' + WordInASCIIRegexStr, 'i'); + }); + it.each([ + ['', null], + [' ', null], + [' I', null], // leading spaces are not swallowed + ['I ', 'I'], // trailing spaces are swallowed + ['Abc', 'Abc'], + ['Sun', 'Sun'], + ['Hello123', 'Hello'], + ['John_', 'John'], + ['Title.', 'Title'], + ['Deutschstäder', 'Deutschst'], + ['ItalianoàèéìòùÈ', 'Italiano'], + ['PolskićśńĄł', 'Polski'] + ])('%s => %s', (s: string, out: string | null) => { + const match: RegExpMatchArray | null = s.match(regexp) + if (out) { + expect(match).not.toBeNull() + expect(match?.[1]).toBe(out) + } else { + expect(match).toBeNull() + } + }) +}) + +describe('Unicode word regexp', () => { + let regexp: RegExp; + beforeEach(() => { + regexp = new RegExp('^' + WordInAnyLanguageRegexStr, 'ui'); + }); + it.each([ + ['', null], + [' ', null], + [' I', null], // leading spaces are not swallowed + ['I ', 'I'], // trailing characters are ignored in unit test + ['Abc', 'Abc'], + ['Sun', 'Sun'], + ['Hello123', 'Hello'], + ['John_', 'John'], + ['Title.', 'Title'], + ['Deutschstäder_', 'Deutschstäder'], + ['ItalianoàèéìòùÈ', 'ItalianoàèéìòùÈ'], + ['PolskićśńĄł', 'PolskićśńĄł'] + ])('%s => %s', (s: string, out: string | null) => { + const match: RegExpMatchArray | null = s.match(regexp) if (out) { expect(match).not.toBeNull() expect(match?.[1]).toBe(out) diff --git a/src/custom-sort/matchers.ts b/src/custom-sort/matchers.ts index b1fde50..0f0ae37 100644 --- a/src/custom-sort/matchers.ts +++ b/src/custom-sort/matchers.ts @@ -1,16 +1,10 @@ -export const RomanNumberRegex: RegExp = /^ *([MDCLXVI]+)/i; // Roman number -export const RomanNumberRegexStr: string = ' *([MDCLXVI]+)'; -export const CompoundRomanNumberDotRegex: RegExp = /^ *([MDCLXVI]+(?:\.[MDCLXVI]+)*)/i; // Compound Roman number with dot as separator -export const CompoundRomanNumberDotRegexStr: string = ' *([MDCLXVI]+(?:\\.[MDCLXVI]+)*)'; -export const CompoundRomanNumberDashRegex: RegExp = /^ *([MDCLXVI]+(?:-[MDCLXVI]+)*)/i; // Compound Roman number with dash as separator -export const CompoundRomanNumberDashRegexStr: string = ' *([MDCLXVI]+(?:-[MDCLXVI]+)*)'; +export const RomanNumberRegexStr: string = ' *([MDCLXVI]+)'; // Roman number +export const CompoundRomanNumberDotRegexStr: string = ' *([MDCLXVI]+(?:\\.[MDCLXVI]+)*)';// Compound Roman number with dot as separator +export const CompoundRomanNumberDashRegexStr: string = ' *([MDCLXVI]+(?:-[MDCLXVI]+)*)'; // Compound Roman number with dash as separator -export const NumberRegex: RegExp = /^ *(\d+)/; // Plain number -export const NumberRegexStr: string = ' *(\\d+)'; -export const CompoundNumberDotRegex: RegExp = /^ *(\d+(?:\.\d+)*)/; // Compound number with dot as separator -export const CompoundNumberDotRegexStr: string = ' *(\\d+(?:\\.\\d+)*)'; -export const CompoundNumberDashRegex: RegExp = /^ *(\d+(?:-\d+)*)/; // Compound number with dash as separator -export const CompoundNumberDashRegexStr: string = ' *(\\d+(?:-\\d+)*)'; +export const NumberRegexStr: string = ' *(\\d+)'; // Plain number +export const CompoundNumberDotRegexStr: string = ' *(\\d+(?:\\.\\d+)*)'; // Compound number with dot as separator +export const CompoundNumberDashRegexStr: string = ' *(\\d+(?:-\\d+)*)'; // Compound number with dash as separator export const DOT_SEPARATOR = '.' export const DASH_SEPARATOR = '-' @@ -20,6 +14,15 @@ const PIPE_SEPARATOR = '|' // ASCII 124 export const DEFAULT_NORMALIZATION_PLACES = 8; // Fixed width of a normalized number (with leading zeros) +// Property escapes: +// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions/Unicode_Property_Escapes +// https://stackoverflow.com/a/48902765 +// +// Using Unicode property escapes to express 'a letter in any modern language' +export const WordInAnyLanguageRegexStr = '(\\p{Letter}+)' // remember about the /u option -> /\p{Letter}+/u + +export const WordInASCIIRegexStr = '([a-zA-Z]+)' + export function prependWithZeros(s: string, minLength: number) { if (s.length < minLength) { const delta: number = minLength - s.length; diff --git a/src/custom-sort/sorting-spec-processor.spec.ts b/src/custom-sort/sorting-spec-processor.spec.ts index e181b36..f774270 100644 --- a/src/custom-sort/sorting-spec-processor.spec.ts +++ b/src/custom-sort/sorting-spec-processor.spec.ts @@ -5,16 +5,16 @@ import { ConsumedFolderMatchingRegexp, consumeFolderByRegexpExpression, convertPlainStringToRegex, - detectNumericSortingSymbols, + detectSortingSymbols, escapeRegexUnsafeCharacters, - extractNumericSortingSymbol, - hasMoreThanOneNumericSortingSymbol, + extractSortingSymbol, + hasMoreThanOneSortingSymbol, NumberNormalizerFn, RegexpUsedAs, RomanNumberNormalizerFn, SortingSpecProcessor } from "./sorting-spec-processor" -import {CustomSortGroupType, CustomSortOrder, CustomSortSpec} from "./custom-sort-types"; +import {CustomSortGroupType, CustomSortOrder, CustomSortSpec, IdentityNormalizerFn} from "./custom-sort-types"; import {FolderMatchingRegexp, FolderMatchingTreeNode} from "./folder-matching-rules"; const txtInputExampleA: string = ` @@ -347,7 +347,7 @@ const expectedSortSpecsExampleA: { [key: string]: CustomSortSpec } = { } } -const expectedSortSpecsExampleNumericSortingSymbols: { [key: string]: CustomSortSpec } = { +const expectedSortSpecsExampleSortingSymbols: { [key: string]: CustomSortSpec } = { "mock-folder": { groups: [{ foldersOnly: true, @@ -388,21 +388,37 @@ const expectedSortSpecsExampleNumericSortingSymbols: { [key: string]: CustomSort regex: / *(\d+)plain syntax\?\?\?$/i, normalizerFn: NumberNormalizerFn } + }, { + order: CustomSortOrder.alphabetical, + type: CustomSortGroupType.ExactName, + regexPrefix: { + regex: /^Here goes ASCII word ([a-zA-Z]+)$/i, + normalizerFn: IdentityNormalizerFn + } + }, { + order: CustomSortOrder.alphabetical, + type: CustomSortGroupType.ExactName, + regexPrefix: { + regex: /^(\p{Letter}+)\. is for any modern language word$/iu, + normalizerFn: IdentityNormalizerFn + } }, { type: CustomSortGroupType.Outsiders, order: CustomSortOrder.alphabetical, }], targetFoldersPaths: ['mock-folder'], - outsidersGroupIdx: 5 + outsidersGroupIdx: 7 } } -const txtInputExampleNumericSortingSymbols: string = ` +const txtInputExampleSortingSymbols: string = ` /folders Chapter \\.d+ ... /:files ...section \\-r+. % Appendix \\-d+ (attachments) Plain syntax\\R+ ... works? And this kind of... \\D+plain syntax??? +Here goes ASCII word \\a+ +\\A+. is for any modern language word ` describe('SortingSpecProcessor', () => { @@ -420,10 +436,10 @@ describe('SortingSpecProcessor', () => { const result = processor.parseSortSpecFromText(inputTxtArr, 'mock-folder', 'custom-name-note.md') expect(result?.sortSpecByPath).toEqual(expectedSortSpecsExampleA) }) - it('should generate correct SortSpecs (example with numerical sorting symbols)', () => { - const inputTxtArr: Array = txtInputExampleNumericSortingSymbols.split('\n') + it('should generate correct SortSpecs (example with sorting symbols)', () => { + const inputTxtArr: Array = txtInputExampleSortingSymbols.split('\n') const result = processor.parseSortSpecFromText(inputTxtArr, 'mock-folder', 'custom-name-note.md') - expect(result?.sortSpecByPath).toEqual(expectedSortSpecsExampleNumericSortingSymbols) + expect(result?.sortSpecByPath).toEqual(expectedSortSpecsExampleSortingSymbols) }) }) @@ -1735,7 +1751,7 @@ describe('SortingSpecProcessor error detection and reporting', () => { expect(result).toBeNull() expect(errorsLogger).toHaveBeenCalledTimes(2) expect(errorsLogger).toHaveBeenNthCalledWith(1, - `${ERR_PREFIX} 9:TooManyNumericSortingSymbols Maximum one numeric sorting indicator allowed per line ${ERR_SUFFIX_IN_LINE(2)}`) + `${ERR_PREFIX} 9:TooManySortingSymbols Maximum one sorting symbol allowed per line ${ERR_SUFFIX_IN_LINE(2)}`) expect(errorsLogger).toHaveBeenNthCalledWith(2, ERR_LINE_TXT('% Chapter\\R+ ... page\\d+ ')) }) it('should recognize error: nested standard obsidian sorting attribute', () => { @@ -1916,7 +1932,7 @@ describe('SortingSpecProcessor error detection and reporting', () => { expect(result).toBeNull() expect(errorsLogger).toHaveBeenCalledTimes(2) expect(errorsLogger).toHaveBeenNthCalledWith(1, - `${ERR_PREFIX} 10:NumericalSymbolAdjacentToWildcard Numerical sorting symbol must not be directly adjacent to a wildcard because of potential performance problem. An additional explicit separator helps in such case. ${ERR_SUFFIX_IN_LINE(1)}`) + `${ERR_PREFIX} 10:SortingSymbolAdjacentToWildcard Sorting symbol must not be directly adjacent to a wildcard because of potential performance problem. An additional explicit separator helps in such case. ${ERR_SUFFIX_IN_LINE(1)}`) expect(errorsLogger).toHaveBeenNthCalledWith(2, ERR_LINE_TXT(s)) }) it.each([ @@ -2092,7 +2108,7 @@ describe('escapeRegexUnsafeCharacters', () => { }) }) -describe('detectNumericSortingSymbols', () => { +describe('detectSortingSymbols', () => { it.each([ ['', false], ['d+', false], @@ -2107,12 +2123,12 @@ describe('detectNumericSortingSymbols', () => { ['\\d+abcd\\d+efgh', true], ['\\d+\\.D+\\-d+\\R+\\.r+\\-R+ \\d+', true] ])('should correctly detect in >%s< (%s) sorting regex symbols', (s: string, b: boolean) => { - const result = detectNumericSortingSymbols(s) + const result = detectSortingSymbols(s) expect(result).toBe(b) }) }) -describe('hasMoreThanOneNumericSortingSymbol', () => { +describe('hasMoreThanOneSortingSymbol', () => { it.each([ ['', false], [' d+', false], @@ -2128,12 +2144,12 @@ describe('hasMoreThanOneNumericSortingSymbol', () => { ['\\R+abcd\\.R+efgh', true], ['\\d+\\.D+\\-d+\\R+\\.r+\\-R+ \\d+', true] ])('should correctly detect in >%s< (%s) sorting regex symbols', (s: string, b: boolean) => { - const result = hasMoreThanOneNumericSortingSymbol(s) + const result = hasMoreThanOneSortingSymbol(s) expect(result).toBe(b) }) }) -describe('extractNumericSortingSymbol', () => { +describe('extractSortingSymbol', () => { it.each([ ['', null], ['d+', null], @@ -2144,7 +2160,7 @@ describe('extractNumericSortingSymbol', () => { ['--\\.D+\\d+', '\\.D+'], ['wdwqwqe\\d+\\.D+\\-d+\\R+\\.r+\\-R+ \\d+', '\\d+'] ])('should correctly extract from >%s< the numeric sorting symbol (%s)', (s: string, ss: string) => { - const result = extractNumericSortingSymbol(s) + const result = extractSortingSymbol(s) expect(result).toBe(ss) }) }) diff --git a/src/custom-sort/sorting-spec-processor.ts b/src/custom-sort/sorting-spec-processor.ts index e6c31f0..937f5ba 100644 --- a/src/custom-sort/sorting-spec-processor.ts +++ b/src/custom-sort/sorting-spec-processor.ts @@ -4,6 +4,7 @@ import { CustomSortOrder, CustomSortSpec, DEFAULT_METADATA_FIELD_FOR_SORTING, + IdentityNormalizerFn, NormalizerFn, RecognizedOrderValue, RegExpSpec @@ -19,10 +20,11 @@ import { getNormalizedNumber, getNormalizedRomanNumber, NumberRegexStr, - RomanNumberRegexStr + RomanNumberRegexStr, + WordInAnyLanguageRegexStr, + WordInASCIIRegexStr } from "./matchers"; import { - FolderMatchingRegexp, FolderWildcardMatching, MATCH_ALL_SUFFIX, MATCH_CHILDREN_1_SUFFIX, @@ -62,8 +64,8 @@ export enum ProblemCode { NoSpaceBetweenAttributeAndValue, InvalidAttributeValue, TargetFolderNestedSpec, - TooManyNumericSortingSymbols, - NumericalSymbolAdjacentToWildcard, + TooManySortingSymbols, + SortingSymbolAdjacentToWildcard, ItemToHideExactNameWithExtRequired, ItemToHideNoSupportForThreeDots, DuplicateWildcardSortSpecForSameFolder, @@ -279,6 +281,9 @@ const NumberRegexSymbol: string = '\\d+' // Plain number const CompoundNumberDotRegexSymbol: string = '\\.d+' // Compound number with dot as separator const CompoundNumberDashRegexSymbol: string = '\\-d+' // Compound number with dash as separator +const WordInASCIIRegexSymbol: string = '\\a+' +const WordInAnyLanguageRegexSymbol: string = '\\A+' + const InlineRegexSymbol_Digit1: string = '\\d' const InlineRegexSymbol_Digit2: string = '\\[0-9]' const InlineRegexSymbol_0_to_3: string = '\\[0-3]' @@ -289,16 +294,18 @@ export const escapeRegexUnsafeCharacters = (s: string): string => { return s.replace(UnsafeRegexCharsRegex, '\\$&') } -const numericSortingSymbolsArr: Array = [ +const sortingSymbolsArr: Array = [ escapeRegexUnsafeCharacters(NumberRegexSymbol), escapeRegexUnsafeCharacters(RomanNumberRegexSymbol), escapeRegexUnsafeCharacters(CompoundNumberDotRegexSymbol), escapeRegexUnsafeCharacters(CompoundNumberDashRegexSymbol), escapeRegexUnsafeCharacters(CompoundRomanNumberDotRegexSymbol), escapeRegexUnsafeCharacters(CompoundRomanNumberDashRegexSymbol), + escapeRegexUnsafeCharacters(WordInASCIIRegexSymbol), + escapeRegexUnsafeCharacters(WordInAnyLanguageRegexSymbol) ] -const numericSortingSymbolsRegex = new RegExp(numericSortingSymbolsArr.join('|'), 'gi') +const sortingSymbolsRegex = new RegExp(sortingSymbolsArr.join('|'), 'gi') const inlineRegexSymbolsArrEscapedForRegex: Array = [ escapeRegexUnsafeCharacters(InlineRegexSymbol_Digit1), @@ -315,13 +322,13 @@ const inlineRegexSymbolsToRegexExpressionsArr: { [key: string]: string} = { const inlineRegexSymbolsDetectionRegex = new RegExp(inlineRegexSymbolsArrEscapedForRegex.join('|'), 'gi') -export const hasMoreThanOneNumericSortingSymbol = (s: string): boolean => { - numericSortingSymbolsRegex.lastIndex = 0 - return numericSortingSymbolsRegex.test(s) && numericSortingSymbolsRegex.test(s) +export const hasMoreThanOneSortingSymbol = (s: string): boolean => { + sortingSymbolsRegex.lastIndex = 0 + return sortingSymbolsRegex.test(s) && sortingSymbolsRegex.test(s) } -export const detectNumericSortingSymbols = (s: string): boolean => { - numericSortingSymbolsRegex.lastIndex = 0 - return numericSortingSymbolsRegex.test(s) +export const detectSortingSymbols = (s: string): boolean => { + sortingSymbolsRegex.lastIndex = 0 + return sortingSymbolsRegex.test(s) } export const detectInlineRegex = (s?: string): boolean => { @@ -329,10 +336,10 @@ export const detectInlineRegex = (s?: string): boolean => { return s ? inlineRegexSymbolsDetectionRegex.test(s) : false } -export const extractNumericSortingSymbol = (s?: string): string | null => { +export const extractSortingSymbol = (s?: string): string | null => { if (s) { - numericSortingSymbolsRegex.lastIndex = 0 - const matches: RegExpMatchArray | null = numericSortingSymbolsRegex.exec(s) + sortingSymbolsRegex.lastIndex = 0 + const matches: RegExpMatchArray | null = sortingSymbolsRegex.exec(s) return matches ? matches[0] : null } else { return null @@ -343,6 +350,7 @@ export interface RegExpSpecStr { regexpStr: string normalizerFn: NormalizerFn advancedRegexType: AdvancedRegexType + unicodeRegex?: boolean } // Exposed as named exports to allow unit testing @@ -360,10 +368,12 @@ export enum AdvancedRegexType { CompoundDashNumber, RomanNumber, CompoundDotRomanNumber, - CompoundDashRomanNumber + CompoundDashRomanNumber, + WordInASCII, + WordInAnyLanguage } -const numericSortingSymbolToRegexpStr: { [key: string]: RegExpSpecStr } = { +const sortingSymbolToRegexpStr: { [key: string]: RegExpSpecStr } = { [RomanNumberRegexSymbol.toLowerCase()]: { regexpStr: RomanNumberRegexStr, normalizerFn: RomanNumberNormalizerFn, @@ -393,6 +403,17 @@ const numericSortingSymbolToRegexpStr: { [key: string]: RegExpSpecStr } = { regexpStr: CompoundNumberDashRegexStr, normalizerFn: CompoundDashNumberNormalizerFn, advancedRegexType: AdvancedRegexType.CompoundDashNumber + }, + [WordInASCIIRegexSymbol]: { // Intentionally retain character case + regexpStr: WordInASCIIRegexStr, + normalizerFn: IdentityNormalizerFn, + advancedRegexType: AdvancedRegexType.WordInASCII + }, + [WordInAnyLanguageRegexSymbol]: { // Intentionally retain character case + regexpStr: WordInAnyLanguageRegexStr, + normalizerFn: IdentityNormalizerFn, + advancedRegexType: AdvancedRegexType.WordInAnyLanguage, + unicodeRegex: true } } @@ -435,17 +456,19 @@ export const convertPlainStringToFullMatchRegex = (s: string): RegexMatcherInfo export const convertPlainStringToRegex = (s: string, actAs: RegexpUsedAs): RegexMatcherInfo | null => { const regexMatchesStart: boolean = [RegexpUsedAs.Prefix, RegexpUsedAs.FullMatch].includes(actAs) const regexMatchesEnding: boolean = [RegexpUsedAs.Suffix, RegexpUsedAs.FullMatch].includes(actAs) - const detectedSymbol: string | null = extractNumericSortingSymbol(s) + const detectedSymbol: string | null = extractSortingSymbol(s) if (detectedSymbol) { - const replacement: RegExpSpecStr = numericSortingSymbolToRegexpStr[detectedSymbol.toLowerCase()] + // for some sorting symbols lower- and upper-case syntax has different meaning, for some others not + const replacement: RegExpSpecStr = sortingSymbolToRegexpStr[detectedSymbol] ?? sortingSymbolToRegexpStr[detectedSymbol.toLowerCase()] const [extractedPrefix, extractedSuffix] = s!.split(detectedSymbol) const regexPrefix: string = regexMatchesStart ? '^' : '' const regexSuffix: string = regexMatchesEnding ? '$' : '' const escapedProcessedPrefix: string = convertInlineRegexSymbolsAndEscapeTheRest(extractedPrefix) const escapedProcessedSuffix: string = convertInlineRegexSymbolsAndEscapeTheRest(extractedSuffix) + const regexFlags: string = replacement.unicodeRegex ? 'ui' : 'i' return { regexpSpec: { - regex: new RegExp(`${regexPrefix}${escapedProcessedPrefix}${replacement.regexpStr}${escapedProcessedSuffix}${regexSuffix}`, 'i'), + regex: new RegExp(`${regexPrefix}${escapedProcessedPrefix}${replacement.regexpStr}${escapedProcessedSuffix}${regexSuffix}`, regexFlags), normalizerFn: replacement.normalizerFn }, prefix: extractedPrefix, @@ -680,7 +703,7 @@ const extractIdentifier = (text: string, defaultResult?: string): string | undef return identifier ? identifier : defaultResult } -const ADJACENCY_ERROR: string = "Numerical sorting symbol must not be directly adjacent to a wildcard because of potential performance problem. An additional explicit separator helps in such case." +const ADJACENCY_ERROR: string = "Sorting symbol must not be directly adjacent to a wildcard because of potential performance problem. An additional explicit separator helps in such case." export class SortingSpecProcessor { ctx: ProcessingContext @@ -983,8 +1006,8 @@ export class SortingSpecProcessor { private parseSortingGroupSpec = (line: string): ParsedSortingGroup | null => { let s: string = line.trim() - if (hasMoreThanOneNumericSortingSymbol(s)) { - this.problem(ProblemCode.TooManyNumericSortingSymbols, 'Maximum one numeric sorting indicator allowed per line') + if (hasMoreThanOneSortingSymbol(s)) { + this.problem(ProblemCode.TooManySortingSymbols, 'Maximum one sorting symbol allowed per line') return null } @@ -1151,7 +1174,7 @@ export class SortingSpecProcessor { if (group.itemToHide) { if (!this.consumeParsedItemToHide(group)) { - this.problem(ProblemCode.ItemToHideNoSupportForThreeDots, 'For hiding of file or folder, the exact name with ext is required and no numeric sorting indicator allowed') + this.problem(ProblemCode.ItemToHideNoSupportForThreeDots, 'For hiding of file or folder, the exact name with ext is required and no sorting symbols allowed') return false } else { return true @@ -1159,7 +1182,7 @@ export class SortingSpecProcessor { } else { // !group.itemToHide const newGroup: CustomSortGroup | null = this.consumeParsedSortingGroupSpec(group) if (newGroup) { - if (this.adjustSortingGroupForNumericSortingSymbol(newGroup)) { + if (this.adjustSortingGroupForSortingSymbol(newGroup)) { if (this.ctx.currentSpec) { const groupIdx = this.ctx.currentSpec.groups.push(newGroup) - 1 this.ctx.currentSpecGroup = newGroup @@ -1445,7 +1468,7 @@ export class SortingSpecProcessor { if (!isThreeDots(theOnly)) { const nameWithExt: string = theOnly.trim() if (nameWithExt) { // Sanity check - if (!detectNumericSortingSymbols(nameWithExt)) { + if (!detectSortingSymbols(nameWithExt)) { if (this.ctx.currentSpec) { const itemsToHide: Set = this.ctx.currentSpec?.itemsToHide ?? new Set() itemsToHide.add(nameWithExt) @@ -1572,17 +1595,17 @@ export class SortingSpecProcessor { // Returns true if no regex will be involved (hence no adjustment) or if correctly adjusted with regex private adjustSortingGroupForRegexBasedMatchers = (group: CustomSortGroup): boolean => { - return this.adjustSortingGroupForNumericSortingSymbol(group) + return this.adjustSortingGroupForSortingSymbol(group) } - // Returns true if no numeric sorting symbol (hence no adjustment) or if correctly adjusted with regex - private adjustSortingGroupForNumericSortingSymbol = (group: CustomSortGroup): boolean => { + // Returns true if no sorting symbol (hence no adjustment) or if correctly adjusted with regex + private adjustSortingGroupForSortingSymbol = (group: CustomSortGroup): boolean => { switch (group.type) { case CustomSortGroupType.ExactPrefix: const regexInPrefix = convertPlainStringToLeftRegex(group.exactPrefix!) if (regexInPrefix) { if (regexInPrefix.containsAdvancedRegex && checkAdjacency(regexInPrefix).noSuffix) { - this.problem(ProblemCode.NumericalSymbolAdjacentToWildcard, ADJACENCY_ERROR) + this.problem(ProblemCode.SortingSymbolAdjacentToWildcard, ADJACENCY_ERROR) return false; } delete group.exactPrefix @@ -1593,7 +1616,7 @@ export class SortingSpecProcessor { const regexInSuffix = convertPlainStringToRightRegex(group.exactSuffix!) if (regexInSuffix) { if (regexInSuffix.containsAdvancedRegex && checkAdjacency(regexInSuffix).noPrefix) { - this.problem(ProblemCode.NumericalSymbolAdjacentToWildcard, ADJACENCY_ERROR) + this.problem(ProblemCode.SortingSymbolAdjacentToWildcard, ADJACENCY_ERROR) return false; } delete group.exactSuffix @@ -1604,7 +1627,7 @@ export class SortingSpecProcessor { const regexInHead = convertPlainStringToLeftRegex(group.exactPrefix!) if (regexInHead) { if (regexInHead.containsAdvancedRegex && checkAdjacency(regexInHead).noSuffix) { - this.problem(ProblemCode.NumericalSymbolAdjacentToWildcard, ADJACENCY_ERROR) + this.problem(ProblemCode.SortingSymbolAdjacentToWildcard, ADJACENCY_ERROR) return false; } delete group.exactPrefix @@ -1613,7 +1636,7 @@ export class SortingSpecProcessor { const regexInTail = convertPlainStringToRightRegex(group.exactSuffix!) if (regexInTail) { if (regexInTail.containsAdvancedRegex && checkAdjacency(regexInTail).noPrefix) { - this.problem(ProblemCode.NumericalSymbolAdjacentToWildcard, ADJACENCY_ERROR) + this.problem(ProblemCode.SortingSymbolAdjacentToWildcard, ADJACENCY_ERROR) return false; } delete group.exactSuffix