#67 - feature alphabetic wildcard

- sorting symbol \a+ for ASCII word
- sorting symbol \A+ for any modern language word (involved advanced unicode regexp)
This commit is contained in:
SebastianMC 2023-03-07 19:01:44 +01:00
parent fe68c554b8
commit e788c92543
5 changed files with 203 additions and 75 deletions

View File

@ -40,6 +40,7 @@ export interface RecognizedOrderValue {
} }
export type NormalizerFn = (s: string) => string | null export type NormalizerFn = (s: string) => string | null
export const IdentityNormalizerFn: NormalizerFn = (s: string) => s
export interface RegExpSpec { export interface RegExpSpec {
regex: RegExp regex: RegExp

View File

@ -3,15 +3,22 @@ import {
getNormalizedRomanNumber, getNormalizedRomanNumber,
prependWithZeros, prependWithZeros,
romanToIntStr, romanToIntStr,
NumberRegex, NumberRegexStr,
CompoundNumberDotRegex, CompoundNumberDotRegexStr,
CompoundNumberDashRegex, CompoundNumberDashRegexStr,
RomanNumberRegex, RomanNumberRegexStr,
CompoundRomanNumberDotRegex, CompoundRomanNumberDotRegexStr,
CompoundRomanNumberDashRegex CompoundRomanNumberDashRegexStr,
WordInASCIIRegexStr,
WordInAnyLanguageRegexStr
} from "./matchers"; } from "./matchers";
import {SortingSpecProcessor} from "./sorting-spec-processor";
describe('Plain numbers regexp', () => { describe('Plain numbers regexp', () => {
let regexp: RegExp;
beforeEach(() => {
regexp = new RegExp('^' + NumberRegexStr, 'i');
});
it.each([ it.each([
['', null], ['', null],
[' ', null], [' ', null],
@ -23,7 +30,7 @@ describe('Plain numbers regexp', () => {
['9', '9'], ['9', '9'],
['7328964783268794325496783', '7328964783268794325496783'] ['7328964783268794325496783', '7328964783268794325496783']
])('%s => %s', (s: string, out: string | null) => { ])('%s => %s', (s: string, out: string | null) => {
const match: RegExpMatchArray | null = s.match(NumberRegex) const match: RegExpMatchArray | null = s.match(regexp)
if (out) { if (out) {
expect(match).not.toBeNull() expect(match).not.toBeNull()
expect(match?.[1]).toBe(out) expect(match?.[1]).toBe(out)
@ -34,6 +41,10 @@ describe('Plain numbers regexp', () => {
}) })
describe('Plain compound numbers regexp (dot)', () => { describe('Plain compound numbers regexp (dot)', () => {
let regexp: RegExp;
beforeEach(() => {
regexp = new RegExp('^' + CompoundNumberDotRegexStr, 'i');
});
it.each([ it.each([
['', null], ['', null],
[' ', null], [' ', null],
@ -55,7 +66,7 @@ describe('Plain compound numbers regexp (dot)', () => {
['56.78.-.1abc', '56.78'], ['56.78.-.1abc', '56.78'],
['56.78-.1abc', '56.78'], ['56.78-.1abc', '56.78'],
])('%s => %s', (s: string, out: string | null) => { ])('%s => %s', (s: string, out: string | null) => {
const match: RegExpMatchArray | null = s.match(CompoundNumberDotRegex) const match: RegExpMatchArray | null = s.match(regexp)
if (out) { if (out) {
expect(match).not.toBeNull() expect(match).not.toBeNull()
expect(match?.[1]).toBe(out) expect(match?.[1]).toBe(out)
@ -66,6 +77,10 @@ describe('Plain compound numbers regexp (dot)', () => {
}) })
describe('Plain compound numbers regexp (dash)', () => { describe('Plain compound numbers regexp (dash)', () => {
let regexp: RegExp;
beforeEach(() => {
regexp = new RegExp('^' + CompoundNumberDashRegexStr, 'i');
});
it.each([ it.each([
['', null], ['', null],
[' ', null], [' ', null],
@ -87,7 +102,7 @@ describe('Plain compound numbers regexp (dash)', () => {
['56-78-.-1abc', '56-78'], ['56-78-.-1abc', '56-78'],
['56-78.-1abc', '56-78'], ['56-78.-1abc', '56-78'],
])('%s => %s', (s: string, out: string | null) => { ])('%s => %s', (s: string, out: string | null) => {
const match: RegExpMatchArray | null = s.match(CompoundNumberDashRegex) const match: RegExpMatchArray | null = s.match(regexp)
if (out) { if (out) {
expect(match).not.toBeNull() expect(match).not.toBeNull()
expect(match?.[1]).toBe(out) expect(match?.[1]).toBe(out)
@ -98,6 +113,10 @@ describe('Plain compound numbers regexp (dash)', () => {
}) })
describe('Plain Roman numbers regexp', () => { describe('Plain Roman numbers regexp', () => {
let regexp: RegExp;
beforeEach(() => {
regexp = new RegExp('^' + RomanNumberRegexStr, 'i');
});
it.each([ it.each([
['', null], ['', null],
[' ', null], [' ', null],
@ -109,7 +128,7 @@ describe('Plain Roman numbers regexp', () => {
['iiiii', 'iiiii'], ['iiiii', 'iiiii'],
['viviviv794325496783', 'viviviv'] ['viviviv794325496783', 'viviviv']
])('%s => %s', (s: string, out: string | null) => { ])('%s => %s', (s: string, out: string | null) => {
const match: RegExpMatchArray | null = s.match(RomanNumberRegex) const match: RegExpMatchArray | null = s.match(regexp)
if (out) { if (out) {
expect(match).not.toBeNull() expect(match).not.toBeNull()
expect(match?.[1]).toBe(out) expect(match?.[1]).toBe(out)
@ -120,6 +139,10 @@ describe('Plain Roman numbers regexp', () => {
}) })
describe('Roman compound numbers regexp (dot)', () => { describe('Roman compound numbers regexp (dot)', () => {
let regexp: RegExp;
beforeEach(() => {
regexp = new RegExp('^' + CompoundRomanNumberDotRegexStr, 'i');
});
it.each([ it.each([
['', null], ['', null],
[' ', null], [' ', null],
@ -143,7 +166,7 @@ describe('Roman compound numbers regexp (dot)', () => {
['xvx.d-.iabc', 'xvx.d'], ['xvx.d-.iabc', 'xvx.d'],
['xvx.d..iabc', 'xvx.d'], ['xvx.d..iabc', 'xvx.d'],
])('%s => %s', (s: string, out: string | null) => { ])('%s => %s', (s: string, out: string | null) => {
const match: RegExpMatchArray | null = s.match(CompoundRomanNumberDotRegex) const match: RegExpMatchArray | null = s.match(regexp)
if (out) { if (out) {
expect(match).not.toBeNull() expect(match).not.toBeNull()
expect(match?.[1]).toBe(out) expect(match?.[1]).toBe(out)
@ -154,6 +177,10 @@ describe('Roman compound numbers regexp (dot)', () => {
}) })
describe('Roman compound numbers regexp (dash)', () => { describe('Roman compound numbers regexp (dash)', () => {
let regexp: RegExp;
beforeEach(() => {
regexp = new RegExp('^' + CompoundRomanNumberDashRegexStr, 'i');
});
it.each([ it.each([
['', null], ['', null],
[' ', null], [' ', null],
@ -177,7 +204,65 @@ describe('Roman compound numbers regexp (dash)', () => {
['xvx-d.-iabc', 'xvx-d'], ['xvx-d.-iabc', 'xvx-d'],
['xvx-d--iabc', 'xvx-d'] ['xvx-d--iabc', 'xvx-d']
])('%s => %s', (s: string, out: string | null) => { ])('%s => %s', (s: string, out: string | null) => {
const match: RegExpMatchArray | null = s.match(CompoundRomanNumberDashRegex) const match: RegExpMatchArray | null = s.match(regexp)
if (out) {
expect(match).not.toBeNull()
expect(match?.[1]).toBe(out)
} else {
expect(match).toBeNull()
}
})
})
describe('ASCII word regexp', () => {
let regexp: RegExp;
beforeEach(() => {
regexp = new RegExp('^' + WordInASCIIRegexStr, 'i');
});
it.each([
['', null],
[' ', null],
[' I', null], // leading spaces are not swallowed
['I ', 'I'], // trailing spaces are swallowed
['Abc', 'Abc'],
['Sun', 'Sun'],
['Hello123', 'Hello'],
['John_', 'John'],
['Title.', 'Title'],
['Deutschstäder', 'Deutschst'],
['ItalianoàèéìòùÈ', 'Italiano'],
['PolskićśńĄł', 'Polski']
])('%s => %s', (s: string, out: string | null) => {
const match: RegExpMatchArray | null = s.match(regexp)
if (out) {
expect(match).not.toBeNull()
expect(match?.[1]).toBe(out)
} else {
expect(match).toBeNull()
}
})
})
describe('Unicode word regexp', () => {
let regexp: RegExp;
beforeEach(() => {
regexp = new RegExp('^' + WordInAnyLanguageRegexStr, 'ui');
});
it.each([
['', null],
[' ', null],
[' I', null], // leading spaces are not swallowed
['I ', 'I'], // trailing characters are ignored in unit test
['Abc', 'Abc'],
['Sun', 'Sun'],
['Hello123', 'Hello'],
['John_', 'John'],
['Title.', 'Title'],
['Deutschstäder_', 'Deutschstäder'],
['ItalianoàèéìòùÈ', 'ItalianoàèéìòùÈ'],
['PolskićśńĄł', 'PolskićśńĄł']
])('%s => %s', (s: string, out: string | null) => {
const match: RegExpMatchArray | null = s.match(regexp)
if (out) { if (out) {
expect(match).not.toBeNull() expect(match).not.toBeNull()
expect(match?.[1]).toBe(out) expect(match?.[1]).toBe(out)

View File

@ -1,16 +1,10 @@
export const RomanNumberRegex: RegExp = /^ *([MDCLXVI]+)/i; // Roman number export const RomanNumberRegexStr: string = ' *([MDCLXVI]+)'; // Roman number
export const RomanNumberRegexStr: string = ' *([MDCLXVI]+)'; export const CompoundRomanNumberDotRegexStr: string = ' *([MDCLXVI]+(?:\\.[MDCLXVI]+)*)';// Compound Roman number with dot as separator
export const CompoundRomanNumberDotRegex: RegExp = /^ *([MDCLXVI]+(?:\.[MDCLXVI]+)*)/i; // Compound Roman number with dot as separator export const CompoundRomanNumberDashRegexStr: string = ' *([MDCLXVI]+(?:-[MDCLXVI]+)*)'; // Compound Roman number with dash as separator
export const CompoundRomanNumberDotRegexStr: string = ' *([MDCLXVI]+(?:\\.[MDCLXVI]+)*)';
export const CompoundRomanNumberDashRegex: RegExp = /^ *([MDCLXVI]+(?:-[MDCLXVI]+)*)/i; // Compound Roman number with dash as separator
export const CompoundRomanNumberDashRegexStr: string = ' *([MDCLXVI]+(?:-[MDCLXVI]+)*)';
export const NumberRegex: RegExp = /^ *(\d+)/; // Plain number export const NumberRegexStr: string = ' *(\\d+)'; // Plain number
export const NumberRegexStr: string = ' *(\\d+)'; export const CompoundNumberDotRegexStr: string = ' *(\\d+(?:\\.\\d+)*)'; // Compound number with dot as separator
export const CompoundNumberDotRegex: RegExp = /^ *(\d+(?:\.\d+)*)/; // Compound number with dot as separator export const CompoundNumberDashRegexStr: string = ' *(\\d+(?:-\\d+)*)'; // Compound number with dash as separator
export const CompoundNumberDotRegexStr: string = ' *(\\d+(?:\\.\\d+)*)';
export const CompoundNumberDashRegex: RegExp = /^ *(\d+(?:-\d+)*)/; // Compound number with dash as separator
export const CompoundNumberDashRegexStr: string = ' *(\\d+(?:-\\d+)*)';
export const DOT_SEPARATOR = '.' export const DOT_SEPARATOR = '.'
export const DASH_SEPARATOR = '-' export const DASH_SEPARATOR = '-'
@ -20,6 +14,15 @@ const PIPE_SEPARATOR = '|' // ASCII 124
export const DEFAULT_NORMALIZATION_PLACES = 8; // Fixed width of a normalized number (with leading zeros) export const DEFAULT_NORMALIZATION_PLACES = 8; // Fixed width of a normalized number (with leading zeros)
// Property escapes:
// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions/Unicode_Property_Escapes
// https://stackoverflow.com/a/48902765
//
// Using Unicode property escapes to express 'a letter in any modern language'
export const WordInAnyLanguageRegexStr = '(\\p{Letter}+)' // remember about the /u option -> /\p{Letter}+/u
export const WordInASCIIRegexStr = '([a-zA-Z]+)'
export function prependWithZeros(s: string, minLength: number) { export function prependWithZeros(s: string, minLength: number) {
if (s.length < minLength) { if (s.length < minLength) {
const delta: number = minLength - s.length; const delta: number = minLength - s.length;

View File

@ -5,16 +5,16 @@ import {
ConsumedFolderMatchingRegexp, ConsumedFolderMatchingRegexp,
consumeFolderByRegexpExpression, consumeFolderByRegexpExpression,
convertPlainStringToRegex, convertPlainStringToRegex,
detectNumericSortingSymbols, detectSortingSymbols,
escapeRegexUnsafeCharacters, escapeRegexUnsafeCharacters,
extractNumericSortingSymbol, extractSortingSymbol,
hasMoreThanOneNumericSortingSymbol, hasMoreThanOneSortingSymbol,
NumberNormalizerFn, NumberNormalizerFn,
RegexpUsedAs, RegexpUsedAs,
RomanNumberNormalizerFn, RomanNumberNormalizerFn,
SortingSpecProcessor SortingSpecProcessor
} from "./sorting-spec-processor" } from "./sorting-spec-processor"
import {CustomSortGroupType, CustomSortOrder, CustomSortSpec} from "./custom-sort-types"; import {CustomSortGroupType, CustomSortOrder, CustomSortSpec, IdentityNormalizerFn} from "./custom-sort-types";
import {FolderMatchingRegexp, FolderMatchingTreeNode} from "./folder-matching-rules"; import {FolderMatchingRegexp, FolderMatchingTreeNode} from "./folder-matching-rules";
const txtInputExampleA: string = ` const txtInputExampleA: string = `
@ -347,7 +347,7 @@ const expectedSortSpecsExampleA: { [key: string]: CustomSortSpec } = {
} }
} }
const expectedSortSpecsExampleNumericSortingSymbols: { [key: string]: CustomSortSpec } = { const expectedSortSpecsExampleSortingSymbols: { [key: string]: CustomSortSpec } = {
"mock-folder": { "mock-folder": {
groups: [{ groups: [{
foldersOnly: true, foldersOnly: true,
@ -388,21 +388,37 @@ const expectedSortSpecsExampleNumericSortingSymbols: { [key: string]: CustomSort
regex: / *(\d+)plain syntax\?\?\?$/i, regex: / *(\d+)plain syntax\?\?\?$/i,
normalizerFn: NumberNormalizerFn normalizerFn: NumberNormalizerFn
} }
}, {
order: CustomSortOrder.alphabetical,
type: CustomSortGroupType.ExactName,
regexPrefix: {
regex: /^Here goes ASCII word ([a-zA-Z]+)$/i,
normalizerFn: IdentityNormalizerFn
}
}, {
order: CustomSortOrder.alphabetical,
type: CustomSortGroupType.ExactName,
regexPrefix: {
regex: /^(\p{Letter}+)\. is for any modern language word$/iu,
normalizerFn: IdentityNormalizerFn
}
}, { }, {
type: CustomSortGroupType.Outsiders, type: CustomSortGroupType.Outsiders,
order: CustomSortOrder.alphabetical, order: CustomSortOrder.alphabetical,
}], }],
targetFoldersPaths: ['mock-folder'], targetFoldersPaths: ['mock-folder'],
outsidersGroupIdx: 5 outsidersGroupIdx: 7
} }
} }
const txtInputExampleNumericSortingSymbols: string = ` const txtInputExampleSortingSymbols: string = `
/folders Chapter \\.d+ ... /folders Chapter \\.d+ ...
/:files ...section \\-r+. /:files ...section \\-r+.
% Appendix \\-d+ (attachments) % Appendix \\-d+ (attachments)
Plain syntax\\R+ ... works? Plain syntax\\R+ ... works?
And this kind of... \\D+plain syntax??? And this kind of... \\D+plain syntax???
Here goes ASCII word \\a+
\\A+. is for any modern language word
` `
describe('SortingSpecProcessor', () => { describe('SortingSpecProcessor', () => {
@ -420,10 +436,10 @@ describe('SortingSpecProcessor', () => {
const result = processor.parseSortSpecFromText(inputTxtArr, 'mock-folder', 'custom-name-note.md') const result = processor.parseSortSpecFromText(inputTxtArr, 'mock-folder', 'custom-name-note.md')
expect(result?.sortSpecByPath).toEqual(expectedSortSpecsExampleA) expect(result?.sortSpecByPath).toEqual(expectedSortSpecsExampleA)
}) })
it('should generate correct SortSpecs (example with numerical sorting symbols)', () => { it('should generate correct SortSpecs (example with sorting symbols)', () => {
const inputTxtArr: Array<string> = txtInputExampleNumericSortingSymbols.split('\n') const inputTxtArr: Array<string> = txtInputExampleSortingSymbols.split('\n')
const result = processor.parseSortSpecFromText(inputTxtArr, 'mock-folder', 'custom-name-note.md') const result = processor.parseSortSpecFromText(inputTxtArr, 'mock-folder', 'custom-name-note.md')
expect(result?.sortSpecByPath).toEqual(expectedSortSpecsExampleNumericSortingSymbols) expect(result?.sortSpecByPath).toEqual(expectedSortSpecsExampleSortingSymbols)
}) })
}) })
@ -1735,7 +1751,7 @@ describe('SortingSpecProcessor error detection and reporting', () => {
expect(result).toBeNull() expect(result).toBeNull()
expect(errorsLogger).toHaveBeenCalledTimes(2) expect(errorsLogger).toHaveBeenCalledTimes(2)
expect(errorsLogger).toHaveBeenNthCalledWith(1, expect(errorsLogger).toHaveBeenNthCalledWith(1,
`${ERR_PREFIX} 9:TooManyNumericSortingSymbols Maximum one numeric sorting indicator allowed per line ${ERR_SUFFIX_IN_LINE(2)}`) `${ERR_PREFIX} 9:TooManySortingSymbols Maximum one sorting symbol allowed per line ${ERR_SUFFIX_IN_LINE(2)}`)
expect(errorsLogger).toHaveBeenNthCalledWith(2, ERR_LINE_TXT('% Chapter\\R+ ... page\\d+ ')) expect(errorsLogger).toHaveBeenNthCalledWith(2, ERR_LINE_TXT('% Chapter\\R+ ... page\\d+ '))
}) })
it('should recognize error: nested standard obsidian sorting attribute', () => { it('should recognize error: nested standard obsidian sorting attribute', () => {
@ -1916,7 +1932,7 @@ describe('SortingSpecProcessor error detection and reporting', () => {
expect(result).toBeNull() expect(result).toBeNull()
expect(errorsLogger).toHaveBeenCalledTimes(2) expect(errorsLogger).toHaveBeenCalledTimes(2)
expect(errorsLogger).toHaveBeenNthCalledWith(1, expect(errorsLogger).toHaveBeenNthCalledWith(1,
`${ERR_PREFIX} 10:NumericalSymbolAdjacentToWildcard Numerical sorting symbol must not be directly adjacent to a wildcard because of potential performance problem. An additional explicit separator helps in such case. ${ERR_SUFFIX_IN_LINE(1)}`) `${ERR_PREFIX} 10:SortingSymbolAdjacentToWildcard Sorting symbol must not be directly adjacent to a wildcard because of potential performance problem. An additional explicit separator helps in such case. ${ERR_SUFFIX_IN_LINE(1)}`)
expect(errorsLogger).toHaveBeenNthCalledWith(2, ERR_LINE_TXT(s)) expect(errorsLogger).toHaveBeenNthCalledWith(2, ERR_LINE_TXT(s))
}) })
it.each([ it.each([
@ -2092,7 +2108,7 @@ describe('escapeRegexUnsafeCharacters', () => {
}) })
}) })
describe('detectNumericSortingSymbols', () => { describe('detectSortingSymbols', () => {
it.each([ it.each([
['', false], ['', false],
['d+', false], ['d+', false],
@ -2107,12 +2123,12 @@ describe('detectNumericSortingSymbols', () => {
['\\d+abcd\\d+efgh', true], ['\\d+abcd\\d+efgh', true],
['\\d+\\.D+\\-d+\\R+\\.r+\\-R+ \\d+', true] ['\\d+\\.D+\\-d+\\R+\\.r+\\-R+ \\d+', true]
])('should correctly detect in >%s< (%s) sorting regex symbols', (s: string, b: boolean) => { ])('should correctly detect in >%s< (%s) sorting regex symbols', (s: string, b: boolean) => {
const result = detectNumericSortingSymbols(s) const result = detectSortingSymbols(s)
expect(result).toBe(b) expect(result).toBe(b)
}) })
}) })
describe('hasMoreThanOneNumericSortingSymbol', () => { describe('hasMoreThanOneSortingSymbol', () => {
it.each([ it.each([
['', false], ['', false],
[' d+', false], [' d+', false],
@ -2128,12 +2144,12 @@ describe('hasMoreThanOneNumericSortingSymbol', () => {
['\\R+abcd\\.R+efgh', true], ['\\R+abcd\\.R+efgh', true],
['\\d+\\.D+\\-d+\\R+\\.r+\\-R+ \\d+', true] ['\\d+\\.D+\\-d+\\R+\\.r+\\-R+ \\d+', true]
])('should correctly detect in >%s< (%s) sorting regex symbols', (s: string, b: boolean) => { ])('should correctly detect in >%s< (%s) sorting regex symbols', (s: string, b: boolean) => {
const result = hasMoreThanOneNumericSortingSymbol(s) const result = hasMoreThanOneSortingSymbol(s)
expect(result).toBe(b) expect(result).toBe(b)
}) })
}) })
describe('extractNumericSortingSymbol', () => { describe('extractSortingSymbol', () => {
it.each([ it.each([
['', null], ['', null],
['d+', null], ['d+', null],
@ -2144,7 +2160,7 @@ describe('extractNumericSortingSymbol', () => {
['--\\.D+\\d+', '\\.D+'], ['--\\.D+\\d+', '\\.D+'],
['wdwqwqe\\d+\\.D+\\-d+\\R+\\.r+\\-R+ \\d+', '\\d+'] ['wdwqwqe\\d+\\.D+\\-d+\\R+\\.r+\\-R+ \\d+', '\\d+']
])('should correctly extract from >%s< the numeric sorting symbol (%s)', (s: string, ss: string) => { ])('should correctly extract from >%s< the numeric sorting symbol (%s)', (s: string, ss: string) => {
const result = extractNumericSortingSymbol(s) const result = extractSortingSymbol(s)
expect(result).toBe(ss) expect(result).toBe(ss)
}) })
}) })

View File

@ -4,6 +4,7 @@ import {
CustomSortOrder, CustomSortOrder,
CustomSortSpec, CustomSortSpec,
DEFAULT_METADATA_FIELD_FOR_SORTING, DEFAULT_METADATA_FIELD_FOR_SORTING,
IdentityNormalizerFn,
NormalizerFn, NormalizerFn,
RecognizedOrderValue, RecognizedOrderValue,
RegExpSpec RegExpSpec
@ -19,10 +20,11 @@ import {
getNormalizedNumber, getNormalizedNumber,
getNormalizedRomanNumber, getNormalizedRomanNumber,
NumberRegexStr, NumberRegexStr,
RomanNumberRegexStr RomanNumberRegexStr,
WordInAnyLanguageRegexStr,
WordInASCIIRegexStr
} from "./matchers"; } from "./matchers";
import { import {
FolderMatchingRegexp,
FolderWildcardMatching, FolderWildcardMatching,
MATCH_ALL_SUFFIX, MATCH_ALL_SUFFIX,
MATCH_CHILDREN_1_SUFFIX, MATCH_CHILDREN_1_SUFFIX,
@ -62,8 +64,8 @@ export enum ProblemCode {
NoSpaceBetweenAttributeAndValue, NoSpaceBetweenAttributeAndValue,
InvalidAttributeValue, InvalidAttributeValue,
TargetFolderNestedSpec, TargetFolderNestedSpec,
TooManyNumericSortingSymbols, TooManySortingSymbols,
NumericalSymbolAdjacentToWildcard, SortingSymbolAdjacentToWildcard,
ItemToHideExactNameWithExtRequired, ItemToHideExactNameWithExtRequired,
ItemToHideNoSupportForThreeDots, ItemToHideNoSupportForThreeDots,
DuplicateWildcardSortSpecForSameFolder, DuplicateWildcardSortSpecForSameFolder,
@ -279,6 +281,9 @@ const NumberRegexSymbol: string = '\\d+' // Plain number
const CompoundNumberDotRegexSymbol: string = '\\.d+' // Compound number with dot as separator const CompoundNumberDotRegexSymbol: string = '\\.d+' // Compound number with dot as separator
const CompoundNumberDashRegexSymbol: string = '\\-d+' // Compound number with dash as separator const CompoundNumberDashRegexSymbol: string = '\\-d+' // Compound number with dash as separator
const WordInASCIIRegexSymbol: string = '\\a+'
const WordInAnyLanguageRegexSymbol: string = '\\A+'
const InlineRegexSymbol_Digit1: string = '\\d' const InlineRegexSymbol_Digit1: string = '\\d'
const InlineRegexSymbol_Digit2: string = '\\[0-9]' const InlineRegexSymbol_Digit2: string = '\\[0-9]'
const InlineRegexSymbol_0_to_3: string = '\\[0-3]' const InlineRegexSymbol_0_to_3: string = '\\[0-3]'
@ -289,16 +294,18 @@ export const escapeRegexUnsafeCharacters = (s: string): string => {
return s.replace(UnsafeRegexCharsRegex, '\\$&') return s.replace(UnsafeRegexCharsRegex, '\\$&')
} }
const numericSortingSymbolsArr: Array<string> = [ const sortingSymbolsArr: Array<string> = [
escapeRegexUnsafeCharacters(NumberRegexSymbol), escapeRegexUnsafeCharacters(NumberRegexSymbol),
escapeRegexUnsafeCharacters(RomanNumberRegexSymbol), escapeRegexUnsafeCharacters(RomanNumberRegexSymbol),
escapeRegexUnsafeCharacters(CompoundNumberDotRegexSymbol), escapeRegexUnsafeCharacters(CompoundNumberDotRegexSymbol),
escapeRegexUnsafeCharacters(CompoundNumberDashRegexSymbol), escapeRegexUnsafeCharacters(CompoundNumberDashRegexSymbol),
escapeRegexUnsafeCharacters(CompoundRomanNumberDotRegexSymbol), escapeRegexUnsafeCharacters(CompoundRomanNumberDotRegexSymbol),
escapeRegexUnsafeCharacters(CompoundRomanNumberDashRegexSymbol), escapeRegexUnsafeCharacters(CompoundRomanNumberDashRegexSymbol),
escapeRegexUnsafeCharacters(WordInASCIIRegexSymbol),
escapeRegexUnsafeCharacters(WordInAnyLanguageRegexSymbol)
] ]
const numericSortingSymbolsRegex = new RegExp(numericSortingSymbolsArr.join('|'), 'gi') const sortingSymbolsRegex = new RegExp(sortingSymbolsArr.join('|'), 'gi')
const inlineRegexSymbolsArrEscapedForRegex: Array<string> = [ const inlineRegexSymbolsArrEscapedForRegex: Array<string> = [
escapeRegexUnsafeCharacters(InlineRegexSymbol_Digit1), escapeRegexUnsafeCharacters(InlineRegexSymbol_Digit1),
@ -315,13 +322,13 @@ const inlineRegexSymbolsToRegexExpressionsArr: { [key: string]: string} = {
const inlineRegexSymbolsDetectionRegex = new RegExp(inlineRegexSymbolsArrEscapedForRegex.join('|'), 'gi') const inlineRegexSymbolsDetectionRegex = new RegExp(inlineRegexSymbolsArrEscapedForRegex.join('|'), 'gi')
export const hasMoreThanOneNumericSortingSymbol = (s: string): boolean => { export const hasMoreThanOneSortingSymbol = (s: string): boolean => {
numericSortingSymbolsRegex.lastIndex = 0 sortingSymbolsRegex.lastIndex = 0
return numericSortingSymbolsRegex.test(s) && numericSortingSymbolsRegex.test(s) return sortingSymbolsRegex.test(s) && sortingSymbolsRegex.test(s)
} }
export const detectNumericSortingSymbols = (s: string): boolean => { export const detectSortingSymbols = (s: string): boolean => {
numericSortingSymbolsRegex.lastIndex = 0 sortingSymbolsRegex.lastIndex = 0
return numericSortingSymbolsRegex.test(s) return sortingSymbolsRegex.test(s)
} }
export const detectInlineRegex = (s?: string): boolean => { export const detectInlineRegex = (s?: string): boolean => {
@ -329,10 +336,10 @@ export const detectInlineRegex = (s?: string): boolean => {
return s ? inlineRegexSymbolsDetectionRegex.test(s) : false return s ? inlineRegexSymbolsDetectionRegex.test(s) : false
} }
export const extractNumericSortingSymbol = (s?: string): string | null => { export const extractSortingSymbol = (s?: string): string | null => {
if (s) { if (s) {
numericSortingSymbolsRegex.lastIndex = 0 sortingSymbolsRegex.lastIndex = 0
const matches: RegExpMatchArray | null = numericSortingSymbolsRegex.exec(s) const matches: RegExpMatchArray | null = sortingSymbolsRegex.exec(s)
return matches ? matches[0] : null return matches ? matches[0] : null
} else { } else {
return null return null
@ -343,6 +350,7 @@ export interface RegExpSpecStr {
regexpStr: string regexpStr: string
normalizerFn: NormalizerFn normalizerFn: NormalizerFn
advancedRegexType: AdvancedRegexType advancedRegexType: AdvancedRegexType
unicodeRegex?: boolean
} }
// Exposed as named exports to allow unit testing // Exposed as named exports to allow unit testing
@ -360,10 +368,12 @@ export enum AdvancedRegexType {
CompoundDashNumber, CompoundDashNumber,
RomanNumber, RomanNumber,
CompoundDotRomanNumber, CompoundDotRomanNumber,
CompoundDashRomanNumber CompoundDashRomanNumber,
WordInASCII,
WordInAnyLanguage
} }
const numericSortingSymbolToRegexpStr: { [key: string]: RegExpSpecStr } = { const sortingSymbolToRegexpStr: { [key: string]: RegExpSpecStr } = {
[RomanNumberRegexSymbol.toLowerCase()]: { [RomanNumberRegexSymbol.toLowerCase()]: {
regexpStr: RomanNumberRegexStr, regexpStr: RomanNumberRegexStr,
normalizerFn: RomanNumberNormalizerFn, normalizerFn: RomanNumberNormalizerFn,
@ -393,6 +403,17 @@ const numericSortingSymbolToRegexpStr: { [key: string]: RegExpSpecStr } = {
regexpStr: CompoundNumberDashRegexStr, regexpStr: CompoundNumberDashRegexStr,
normalizerFn: CompoundDashNumberNormalizerFn, normalizerFn: CompoundDashNumberNormalizerFn,
advancedRegexType: AdvancedRegexType.CompoundDashNumber advancedRegexType: AdvancedRegexType.CompoundDashNumber
},
[WordInASCIIRegexSymbol]: { // Intentionally retain character case
regexpStr: WordInASCIIRegexStr,
normalizerFn: IdentityNormalizerFn,
advancedRegexType: AdvancedRegexType.WordInASCII
},
[WordInAnyLanguageRegexSymbol]: { // Intentionally retain character case
regexpStr: WordInAnyLanguageRegexStr,
normalizerFn: IdentityNormalizerFn,
advancedRegexType: AdvancedRegexType.WordInAnyLanguage,
unicodeRegex: true
} }
} }
@ -435,17 +456,19 @@ export const convertPlainStringToFullMatchRegex = (s: string): RegexMatcherInfo
export const convertPlainStringToRegex = (s: string, actAs: RegexpUsedAs): RegexMatcherInfo | null => { export const convertPlainStringToRegex = (s: string, actAs: RegexpUsedAs): RegexMatcherInfo | null => {
const regexMatchesStart: boolean = [RegexpUsedAs.Prefix, RegexpUsedAs.FullMatch].includes(actAs) const regexMatchesStart: boolean = [RegexpUsedAs.Prefix, RegexpUsedAs.FullMatch].includes(actAs)
const regexMatchesEnding: boolean = [RegexpUsedAs.Suffix, RegexpUsedAs.FullMatch].includes(actAs) const regexMatchesEnding: boolean = [RegexpUsedAs.Suffix, RegexpUsedAs.FullMatch].includes(actAs)
const detectedSymbol: string | null = extractNumericSortingSymbol(s) const detectedSymbol: string | null = extractSortingSymbol(s)
if (detectedSymbol) { if (detectedSymbol) {
const replacement: RegExpSpecStr = numericSortingSymbolToRegexpStr[detectedSymbol.toLowerCase()] // for some sorting symbols lower- and upper-case syntax has different meaning, for some others not
const replacement: RegExpSpecStr = sortingSymbolToRegexpStr[detectedSymbol] ?? sortingSymbolToRegexpStr[detectedSymbol.toLowerCase()]
const [extractedPrefix, extractedSuffix] = s!.split(detectedSymbol) const [extractedPrefix, extractedSuffix] = s!.split(detectedSymbol)
const regexPrefix: string = regexMatchesStart ? '^' : '' const regexPrefix: string = regexMatchesStart ? '^' : ''
const regexSuffix: string = regexMatchesEnding ? '$' : '' const regexSuffix: string = regexMatchesEnding ? '$' : ''
const escapedProcessedPrefix: string = convertInlineRegexSymbolsAndEscapeTheRest(extractedPrefix) const escapedProcessedPrefix: string = convertInlineRegexSymbolsAndEscapeTheRest(extractedPrefix)
const escapedProcessedSuffix: string = convertInlineRegexSymbolsAndEscapeTheRest(extractedSuffix) const escapedProcessedSuffix: string = convertInlineRegexSymbolsAndEscapeTheRest(extractedSuffix)
const regexFlags: string = replacement.unicodeRegex ? 'ui' : 'i'
return { return {
regexpSpec: { regexpSpec: {
regex: new RegExp(`${regexPrefix}${escapedProcessedPrefix}${replacement.regexpStr}${escapedProcessedSuffix}${regexSuffix}`, 'i'), regex: new RegExp(`${regexPrefix}${escapedProcessedPrefix}${replacement.regexpStr}${escapedProcessedSuffix}${regexSuffix}`, regexFlags),
normalizerFn: replacement.normalizerFn normalizerFn: replacement.normalizerFn
}, },
prefix: extractedPrefix, prefix: extractedPrefix,
@ -680,7 +703,7 @@ const extractIdentifier = (text: string, defaultResult?: string): string | undef
return identifier ? identifier : defaultResult return identifier ? identifier : defaultResult
} }
const ADJACENCY_ERROR: string = "Numerical sorting symbol must not be directly adjacent to a wildcard because of potential performance problem. An additional explicit separator helps in such case." const ADJACENCY_ERROR: string = "Sorting symbol must not be directly adjacent to a wildcard because of potential performance problem. An additional explicit separator helps in such case."
export class SortingSpecProcessor { export class SortingSpecProcessor {
ctx: ProcessingContext ctx: ProcessingContext
@ -983,8 +1006,8 @@ export class SortingSpecProcessor {
private parseSortingGroupSpec = (line: string): ParsedSortingGroup | null => { private parseSortingGroupSpec = (line: string): ParsedSortingGroup | null => {
let s: string = line.trim() let s: string = line.trim()
if (hasMoreThanOneNumericSortingSymbol(s)) { if (hasMoreThanOneSortingSymbol(s)) {
this.problem(ProblemCode.TooManyNumericSortingSymbols, 'Maximum one numeric sorting indicator allowed per line') this.problem(ProblemCode.TooManySortingSymbols, 'Maximum one sorting symbol allowed per line')
return null return null
} }
@ -1151,7 +1174,7 @@ export class SortingSpecProcessor {
if (group.itemToHide) { if (group.itemToHide) {
if (!this.consumeParsedItemToHide(group)) { if (!this.consumeParsedItemToHide(group)) {
this.problem(ProblemCode.ItemToHideNoSupportForThreeDots, 'For hiding of file or folder, the exact name with ext is required and no numeric sorting indicator allowed') this.problem(ProblemCode.ItemToHideNoSupportForThreeDots, 'For hiding of file or folder, the exact name with ext is required and no sorting symbols allowed')
return false return false
} else { } else {
return true return true
@ -1159,7 +1182,7 @@ export class SortingSpecProcessor {
} else { // !group.itemToHide } else { // !group.itemToHide
const newGroup: CustomSortGroup | null = this.consumeParsedSortingGroupSpec(group) const newGroup: CustomSortGroup | null = this.consumeParsedSortingGroupSpec(group)
if (newGroup) { if (newGroup) {
if (this.adjustSortingGroupForNumericSortingSymbol(newGroup)) { if (this.adjustSortingGroupForSortingSymbol(newGroup)) {
if (this.ctx.currentSpec) { if (this.ctx.currentSpec) {
const groupIdx = this.ctx.currentSpec.groups.push(newGroup) - 1 const groupIdx = this.ctx.currentSpec.groups.push(newGroup) - 1
this.ctx.currentSpecGroup = newGroup this.ctx.currentSpecGroup = newGroup
@ -1445,7 +1468,7 @@ export class SortingSpecProcessor {
if (!isThreeDots(theOnly)) { if (!isThreeDots(theOnly)) {
const nameWithExt: string = theOnly.trim() const nameWithExt: string = theOnly.trim()
if (nameWithExt) { // Sanity check if (nameWithExt) { // Sanity check
if (!detectNumericSortingSymbols(nameWithExt)) { if (!detectSortingSymbols(nameWithExt)) {
if (this.ctx.currentSpec) { if (this.ctx.currentSpec) {
const itemsToHide: Set<string> = this.ctx.currentSpec?.itemsToHide ?? new Set<string>() const itemsToHide: Set<string> = this.ctx.currentSpec?.itemsToHide ?? new Set<string>()
itemsToHide.add(nameWithExt) itemsToHide.add(nameWithExt)
@ -1572,17 +1595,17 @@ export class SortingSpecProcessor {
// Returns true if no regex will be involved (hence no adjustment) or if correctly adjusted with regex // Returns true if no regex will be involved (hence no adjustment) or if correctly adjusted with regex
private adjustSortingGroupForRegexBasedMatchers = (group: CustomSortGroup): boolean => { private adjustSortingGroupForRegexBasedMatchers = (group: CustomSortGroup): boolean => {
return this.adjustSortingGroupForNumericSortingSymbol(group) return this.adjustSortingGroupForSortingSymbol(group)
} }
// Returns true if no numeric sorting symbol (hence no adjustment) or if correctly adjusted with regex // Returns true if no sorting symbol (hence no adjustment) or if correctly adjusted with regex
private adjustSortingGroupForNumericSortingSymbol = (group: CustomSortGroup): boolean => { private adjustSortingGroupForSortingSymbol = (group: CustomSortGroup): boolean => {
switch (group.type) { switch (group.type) {
case CustomSortGroupType.ExactPrefix: case CustomSortGroupType.ExactPrefix:
const regexInPrefix = convertPlainStringToLeftRegex(group.exactPrefix!) const regexInPrefix = convertPlainStringToLeftRegex(group.exactPrefix!)
if (regexInPrefix) { if (regexInPrefix) {
if (regexInPrefix.containsAdvancedRegex && checkAdjacency(regexInPrefix).noSuffix) { if (regexInPrefix.containsAdvancedRegex && checkAdjacency(regexInPrefix).noSuffix) {
this.problem(ProblemCode.NumericalSymbolAdjacentToWildcard, ADJACENCY_ERROR) this.problem(ProblemCode.SortingSymbolAdjacentToWildcard, ADJACENCY_ERROR)
return false; return false;
} }
delete group.exactPrefix delete group.exactPrefix
@ -1593,7 +1616,7 @@ export class SortingSpecProcessor {
const regexInSuffix = convertPlainStringToRightRegex(group.exactSuffix!) const regexInSuffix = convertPlainStringToRightRegex(group.exactSuffix!)
if (regexInSuffix) { if (regexInSuffix) {
if (regexInSuffix.containsAdvancedRegex && checkAdjacency(regexInSuffix).noPrefix) { if (regexInSuffix.containsAdvancedRegex && checkAdjacency(regexInSuffix).noPrefix) {
this.problem(ProblemCode.NumericalSymbolAdjacentToWildcard, ADJACENCY_ERROR) this.problem(ProblemCode.SortingSymbolAdjacentToWildcard, ADJACENCY_ERROR)
return false; return false;
} }
delete group.exactSuffix delete group.exactSuffix
@ -1604,7 +1627,7 @@ export class SortingSpecProcessor {
const regexInHead = convertPlainStringToLeftRegex(group.exactPrefix!) const regexInHead = convertPlainStringToLeftRegex(group.exactPrefix!)
if (regexInHead) { if (regexInHead) {
if (regexInHead.containsAdvancedRegex && checkAdjacency(regexInHead).noSuffix) { if (regexInHead.containsAdvancedRegex && checkAdjacency(regexInHead).noSuffix) {
this.problem(ProblemCode.NumericalSymbolAdjacentToWildcard, ADJACENCY_ERROR) this.problem(ProblemCode.SortingSymbolAdjacentToWildcard, ADJACENCY_ERROR)
return false; return false;
} }
delete group.exactPrefix delete group.exactPrefix
@ -1613,7 +1636,7 @@ export class SortingSpecProcessor {
const regexInTail = convertPlainStringToRightRegex(group.exactSuffix!) const regexInTail = convertPlainStringToRightRegex(group.exactSuffix!)
if (regexInTail) { if (regexInTail) {
if (regexInTail.containsAdvancedRegex && checkAdjacency(regexInTail).noPrefix) { if (regexInTail.containsAdvancedRegex && checkAdjacency(regexInTail).noPrefix) {
this.problem(ProblemCode.NumericalSymbolAdjacentToWildcard, ADJACENCY_ERROR) this.problem(ProblemCode.SortingSymbolAdjacentToWildcard, ADJACENCY_ERROR)
return false; return false;
} }
delete group.exactSuffix delete group.exactSuffix