From ec0049302bb90fbdcabfd1b5b664dc4500254e40 Mon Sep 17 00:00:00 2001 From: SebastianMC <23032356+SebastianMC@users.noreply.github.com> Date: Sun, 18 Dec 2022 19:59:58 +0100 Subject: [PATCH] 32 feature: wider support of controlled regexp (#41) #32 - Implementation completed with rich unit tests coverage. - manual.md contains a simple example of the new feature - support for undocumented `\[0-3]` for the requester of the feature ;-) --- docs/manual.md | 103 +++- docs/syntax-reference.md | 4 +- src/custom-sort/custom-sort-types.ts | 5 +- src/custom-sort/custom-sort.spec.ts | 517 +++++++++++++++++- src/custom-sort/custom-sort.ts | 73 ++- .../sorting-spec-processor.spec.ts | 93 +++- src/custom-sort/sorting-spec-processor.ts | 220 ++++++-- 7 files changed, 905 insertions(+), 110 deletions(-) diff --git a/docs/manual.md b/docs/manual.md index b256943..f6c87a9 100644 --- a/docs/manual.md +++ b/docs/manual.md @@ -1,6 +1,6 @@ -Yet to be filled with content ;-) - -See [syntax-reference.md](./syntax-reference.md), maybe that file has already some content? +> Document is partial, creation in progress +> Please refer to [README.md](../README.md) for usage examples +> Check [syntax-reference.md](./syntax-reference.md), maybe that file has already some content? --- Some sections added ad-hoc, to be integrated later @@ -67,3 +67,100 @@ For clarity: the three available prefixes `/!` and `/!!` and `/!!!` allow for fu > --- > ``` > The sorting group expressed as `/:files` alone acts as a sorting group 'catch-all-files, which don't match any other sorting rule for the folder' + +## Simple wildcards + +Currently, the below simple wildcard syntax is supported: + +### A single digit (exactly one) + +An expression like `\d` or `\[0-9]` matches a single digit (exactly one) + +**Example 1**: + +A group specification of `/:files Section \d\d`\ +matches notes with names `Section 23` or `Section 01`, yet not a note like `Section 5` + +An opposite example: + +A group specification of `/:files Section \d`\ +matches the note with name `Section 5` and doesn't match notes `Section 23` or `Section 01` + +However, be careful if used in connection with a wildcard `...` - the behavior could be surprising: + +A group specification of `/:files Section \d...`\ +matches all notes like `Section 5`, `Section 23` or `Section 015` + +**Example 2**: + +As described above, the `\d` is equivalent to `\[0-9]` and can be used interchangeably\ +A group specification of `/folders Notes of \[0-9]\[0-9]\[0-9]\[0-9]`\ +matches the notes with titles like `Notes of 2022` or `Notes of 1999` + +## Combining sorting groups + +A prefix of `/+` used in sorting group specification tells the sorting engine +to combine the group with adjanced groups also prefixed with `/+` + +**Example:** + +The below sorting spec: +```yaml +--- +sorting-spec: | + Notes \d\d\d\d + > advanced modified + Notes \d\d\d\d-\d\d + > advanced modified +--- +``` +defines two sorting groups: +- first go the notes or folders with title like `Notes 2022` or `Notes 1999` +- then go notes or folders like `Notes 2022-12` or `Notes 1999-11` + +Both groups sorted by recent modification date, the newest go first\ +Implicitly, all other files or folders go below these two groups + +Using the `/+` prefix you can combine the two groups into a logical one: +```yaml +--- +sorting-spec: | + /+ Notes \d\d\d\d + /+ Notes \d\d\d\d-\d\d + > advanced modified +--- +``` +the result is that: +- notes or folders with title like `Notes 2022` or `Notes 1999` +- **AND** +- notes or folders like `Notes 2022-12` or `Notes 1999-11` + +will be pushed to the top in File Explorer, sorted by most recent modification date + +> NOTE: the sorting order is specified only once after the last of combined groups +> and it applies to the whole superset of items of all combined groups + +### An edge case: two adjacent combined sorting groups + +If you want to define two combined groups one after another +you should add a separator line with some artificial value not matching +any of your folders or files. The text `---+---` was used in the below example: + +```yaml +--- +sorting-spec: | + /+ Zeta + /+ % Gamma + /+ /:files Beta + /+ Alpha + < a-z + ---+--- + /+ Notes \d\d\d\d + /+ Notes \d\d\d\d-\d\d + > advanced modified +--- +``` + +The artificial separator `---+---` defines a sorting group, which will not match any folders or files +and is used here to logically separate the series of combined groups into to logical sets + diff --git a/docs/syntax-reference.md b/docs/syntax-reference.md index 63968f5..69c38af 100644 --- a/docs/syntax-reference.md +++ b/docs/syntax-reference.md @@ -1,5 +1,5 @@ > Document is partial, creation in progress -> Please refer to [README.md](../../README.md) for usage examples +> Please refer to [README.md](../README.md) for usage examples > Check [manual.md](./manual.md), maybe that file has already some content? # Table of contents @@ -97,6 +97,8 @@ Lines starting with `//` are ignored - `< a-z` - alphabetical - `> a-z` - alphabetical reverse, aka alphabetical descending, 'z' goes before 'a' +- `< true a-z` - true alphabetical, to understand the difference between this one and alphabetical refer to [Alphabetical, Natural and True Alphabetical sorting orders](../README.md#alphabetical-natural-and-true-alphabetical-sorting-orders) +- `> true a-z` - true alphabetical reverse, aka true alphabetical descending, 'z' goes before 'a' - `< modified` - by modified time, the long untouched item goes first (modified time of folder is assumed the beginning of the world, so folders go first and alphabetical) - `> modified` - by modified time reverse, the most recently modified item goes first (modified time of folder is assumed the beginning of the world, so folders land in the bottom and alphabetical) - `< created` - by created time, the oldest item goes first (modified time of folder is assumed the beginning of the world, so folders go first and alphabetical) diff --git a/src/custom-sort/custom-sort-types.ts b/src/custom-sort/custom-sort-types.ts index dd39d72..04235b2 100644 --- a/src/custom-sort/custom-sort-types.ts +++ b/src/custom-sort/custom-sort-types.ts @@ -41,15 +41,16 @@ export type NormalizerFn = (s: string) => string | null export interface RegExpSpec { regex: RegExp - normalizerFn: NormalizerFn + normalizerFn?: NormalizerFn } export interface CustomSortGroup { type: CustomSortGroupType - regexSpec?: RegExpSpec exactText?: string exactPrefix?: string + regexPrefix?: RegExpSpec exactSuffix?: string + regexSuffix?: RegExpSpec order?: CustomSortOrder byMetadataField?: string // for 'by-metadata:' sorting if the order is by metadata alphabetical or reverse secondaryOrder?: CustomSortOrder diff --git a/src/custom-sort/custom-sort.spec.ts b/src/custom-sort/custom-sort.spec.ts index 8495416..a482b30 100644 --- a/src/custom-sort/custom-sort.spec.ts +++ b/src/custom-sort/custom-sort.spec.ts @@ -5,10 +5,11 @@ import { determineFolderDatesIfNeeded, determineSortingGroup, FolderItemForSorting, + matchGroupRegex, SorterFn, Sorters } from './custom-sort'; -import {CustomSortGroupType, CustomSortOrder, CustomSortSpec} from './custom-sort-types'; +import {CustomSortGroupType, CustomSortOrder, CustomSortSpec, RegExpSpec} from './custom-sort-types'; import {CompoundDashNumberNormalizerFn, CompoundDotRomanNumberNormalizerFn} from "./sorting-spec-processor"; const mockTFile = (basename: string, ext: string, size?: number, ctime?: number, mtime?: number): TFile => { @@ -103,7 +104,7 @@ describe('determineSortingGroup', () => { // then expect(result).toEqual({ - groupIdx: 1, // This indicates the last+1 idx + groupIdx: 1, // This indicates the last+1 idx (no match) isFolder: false, sortString: "References.md", ctimeNewest: MOCK_TIMESTAMP + 555, @@ -112,14 +113,42 @@ describe('determineSortingGroup', () => { path: 'Some parent folder/References.md' }); }) - it('should not allow overlap of head and tail, when regexp in head', () => { + it('should not allow overlap of head and tail, when simple regexp in head', () => { // given const file: TFile = mockTFile('Part123:-icle', 'md', 444, MOCK_TIMESTAMP + 555, MOCK_TIMESTAMP + 666); const sortSpec: CustomSortSpec = { targetFoldersPaths: ['Some parent folder'], groups: [{ type: CustomSortGroupType.ExactHeadAndTail, - regexSpec: { + regexPrefix: { + regex: /^Part\d\d\d:/i + }, + exactSuffix: ':-icle' + }] + } + + // when + const result = determineSortingGroup(file, sortSpec) + + // then + expect(result).toEqual({ + groupIdx: 1, // This indicates the last+1 idx (no match) + isFolder: false, + sortString: "Part123:-icle.md", + ctimeNewest: MOCK_TIMESTAMP + 555, + ctimeOldest: MOCK_TIMESTAMP + 555, + mtime: MOCK_TIMESTAMP + 666, + path: 'Some parent folder/Part123:-icle.md' + }); + }) + it('should not allow overlap of head and tail, when advanced regexp in head', () => { + // given + const file: TFile = mockTFile('Part123:-icle', 'md', 444, MOCK_TIMESTAMP + 555, MOCK_TIMESTAMP + 666); + const sortSpec: CustomSortSpec = { + targetFoldersPaths: ['Some parent folder'], + groups: [{ + type: CustomSortGroupType.ExactHeadAndTail, + regexPrefix: { regex: /^Part *(\d+(?:-\d+)*):/i, normalizerFn: CompoundDashNumberNormalizerFn }, @@ -141,14 +170,43 @@ describe('determineSortingGroup', () => { path: 'Some parent folder/Part123:-icle.md' }); }) - it('should match head and tail, when regexp in head', () => { + it('should match head and tail, when simple regexp in head', () => { // given const file: TFile = mockTFile('Part123:-icle', 'md', 444, MOCK_TIMESTAMP + 555, MOCK_TIMESTAMP + 666); const sortSpec: CustomSortSpec = { targetFoldersPaths: ['Some parent folder'], groups: [{ type: CustomSortGroupType.ExactHeadAndTail, - regexSpec: { + regexPrefix: { + regex: /^Part\d\d\d:/i, + normalizerFn: CompoundDashNumberNormalizerFn + }, + exactSuffix: '-icle' + }] + } + + // when + const result = determineSortingGroup(file, sortSpec) + + // then + expect(result).toEqual({ + groupIdx: 0, // Matched! + isFolder: false, + sortString: "Part123:-icle.md", + ctimeNewest: MOCK_TIMESTAMP + 555, + ctimeOldest: MOCK_TIMESTAMP + 555, + mtime: MOCK_TIMESTAMP + 666, + path: 'Some parent folder/Part123:-icle.md' + }); + }) + it('should match head and tail, when advanced regexp in head', () => { + // given + const file: TFile = mockTFile('Part123:-icle', 'md', 444, MOCK_TIMESTAMP + 555, MOCK_TIMESTAMP + 666); + const sortSpec: CustomSortSpec = { + targetFoldersPaths: ['Some parent folder'], + groups: [{ + type: CustomSortGroupType.ExactHeadAndTail, + regexPrefix: { regex: /^Part *(\d+(?:-\d+)*):/i, normalizerFn: CompoundDashNumberNormalizerFn }, @@ -179,7 +237,7 @@ describe('determineSortingGroup', () => { groups: [{ type: CustomSortGroupType.ExactHeadAndTail, exactPrefix: 'Part:', - regexSpec: { + regexSuffix: { regex: /: *(\d+(?:-\d+)*)-icle$/i, normalizerFn: CompoundDashNumberNormalizerFn } @@ -200,7 +258,69 @@ describe('determineSortingGroup', () => { path: 'Some parent folder/Part:123-icle.md' }); }); - it('should match head and tail, when regexp in tail', () => { + it('should match head and tail, when simple regexp in head and tail', () => { + // given + const file: TFile = mockTFile('Part:123-icle', 'md', 444, MOCK_TIMESTAMP + 555, MOCK_TIMESTAMP + 666); + const sortSpec: CustomSortSpec = { + targetFoldersPaths: ['Some parent folder'], + groups: [{ + type: CustomSortGroupType.ExactHeadAndTail, + regexPrefix: { + regex: /^Part:\d/i + }, + regexSuffix: { + regex: /\d-icle$/i + } + }] + } + + // when + const result = determineSortingGroup(file, sortSpec) + + // then + expect(result).toEqual({ + groupIdx: 0, // Matched! + isFolder: false, + sortString: "Part:123-icle.md", + ctimeNewest: MOCK_TIMESTAMP + 555, + ctimeOldest: MOCK_TIMESTAMP + 555, + mtime: MOCK_TIMESTAMP + 666, + path: 'Some parent folder/Part:123-icle.md' + }); + }); + it('should match head and tail, when simple regexp in head and and mixed in tail', () => { + // given + const file: TFile = mockTFile('Part:1 1-23.456-icle', 'md', 444, MOCK_TIMESTAMP + 555, MOCK_TIMESTAMP + 666); + const sortSpec: CustomSortSpec = { + targetFoldersPaths: ['Some parent folder'], + groups: [{ + type: CustomSortGroupType.ExactHeadAndTail, + regexPrefix: { + regex: /^Part:\d/i + }, + regexSuffix: { + regex: / *(\d+(?:-\d+)*).\d\d\d-icle$/i, + normalizerFn: CompoundDashNumberNormalizerFn + } + }] + } + + // when + const result = determineSortingGroup(file, sortSpec) + + // then + expect(result).toEqual({ + groupIdx: 0, // Matched! + isFolder: false, + sortString: "00000001|00000023////Part:1 1-23.456-icle.md", + matchGroup: '00000001|00000023//', + ctimeNewest: MOCK_TIMESTAMP + 555, + ctimeOldest: MOCK_TIMESTAMP + 555, + mtime: MOCK_TIMESTAMP + 666, + path: 'Some parent folder/Part:1 1-23.456-icle.md' + }); + }); + it('should match head and tail, when advanced regexp in tail', () => { // given const file: TFile = mockTFile('Part:123-icle', 'md', 444, MOCK_TIMESTAMP + 555, MOCK_TIMESTAMP + 666); const sortSpec: CustomSortSpec = { @@ -208,7 +328,7 @@ describe('determineSortingGroup', () => { groups: [{ type: CustomSortGroupType.ExactHeadAndTail, exactPrefix: 'Part', - regexSpec: { + regexSuffix: { regex: /: *(\d+(?:-\d+)*)-icle$/i, normalizerFn: CompoundDashNumberNormalizerFn } @@ -257,14 +377,41 @@ describe('determineSortingGroup', () => { path: 'Some parent folder/References.md' }); }) - it('should correctly recognize exact prefix, regex variant', () => { + it('should correctly recognize exact simple regex prefix', () => { + // given + const file: TFile = mockTFile('Ref2erences', 'md', 111, MOCK_TIMESTAMP + 222, MOCK_TIMESTAMP + 333); + const sortSpec: CustomSortSpec = { + targetFoldersPaths: ['/'], + groups: [{ + type: CustomSortGroupType.ExactPrefix, + regexPrefix: { + regex: /Ref[0-9]/i + } + }] + } + + // when + const result = determineSortingGroup(file, sortSpec) + + // then + expect(result).toEqual({ + groupIdx: 0, + isFolder: false, + sortString: "Ref2erences.md", + ctimeNewest: MOCK_TIMESTAMP + 222, + ctimeOldest: MOCK_TIMESTAMP + 222, + mtime: MOCK_TIMESTAMP + 333, + path: 'Some parent folder/Ref2erences.md' + }); + }) + it('should correctly recognize exact prefix, regexL variant', () => { // given const file: TFile = mockTFile('Reference i.xxx.vi.mcm', 'md', 111, MOCK_TIMESTAMP + 222, MOCK_TIMESTAMP + 333); const sortSpec: CustomSortSpec = { targetFoldersPaths: ['/'], groups: [{ type: CustomSortGroupType.ExactPrefix, - regexSpec: { + regexPrefix: { regex: /^Reference *([MDCLXVI]+(?:\.[MDCLXVI]+)*)/i, normalizerFn: CompoundDotRomanNumberNormalizerFn } @@ -311,6 +458,272 @@ describe('determineSortingGroup', () => { }); }) }) + describe('CustomSortGroupType.ExactSuffix', () => { + it('should correctly recognize exact suffix', () => { + // given + const file: TFile = mockTFile('References', 'md', 111, MOCK_TIMESTAMP + 222, MOCK_TIMESTAMP + 333); + const sortSpec: CustomSortSpec = { + targetFoldersPaths: ['/'], + groups: [{ + type: CustomSortGroupType.ExactSuffix, + exactSuffix: 'ces' + }] + } + + // when + const result = determineSortingGroup(file, sortSpec) + + // then + expect(result).toEqual({ + groupIdx: 0, + isFolder: false, + sortString: "References.md", + ctimeNewest: MOCK_TIMESTAMP + 222, + ctimeOldest: MOCK_TIMESTAMP + 222, + mtime: MOCK_TIMESTAMP + 333, + path: 'Some parent folder/References.md' + }); + }) + it('should correctly recognize exact simple regex suffix', () => { + // given + const file: TFile = mockTFile('References 12', 'md', 111, MOCK_TIMESTAMP + 222, MOCK_TIMESTAMP + 333); + const sortSpec: CustomSortSpec = { + targetFoldersPaths: ['/'], + groups: [{ + type: CustomSortGroupType.ExactSuffix, + regexSuffix: { + regex: /ces [0-9][0-9]$/i + } + }] + } + + // when + const result = determineSortingGroup(file, sortSpec) + + // then + expect(result).toEqual({ + groupIdx: 0, + isFolder: false, + sortString: "References 12.md", + ctimeNewest: MOCK_TIMESTAMP + 222, + ctimeOldest: MOCK_TIMESTAMP + 222, + mtime: MOCK_TIMESTAMP + 333, + path: 'Some parent folder/References 12.md' + }); + }) + it('should correctly recognize exact suffix, regexL variant', () => { + // given + const file: TFile = mockTFile('Reference i.xxx.vi.mcm', 'md', 111, MOCK_TIMESTAMP + 222, MOCK_TIMESTAMP + 333); + const sortSpec: CustomSortSpec = { + targetFoldersPaths: ['/'], + groups: [{ + type: CustomSortGroupType.ExactSuffix, + regexSuffix: { + regex: / *([MDCLXVI]+(?:\.[MDCLXVI]+)*)$/i, + normalizerFn: CompoundDotRomanNumberNormalizerFn + } + }] + } + + // when + const result = determineSortingGroup(file, sortSpec) + + // then + expect(result).toEqual({ + groupIdx: 0, + isFolder: false, + sortString: '00000001|00000030|00000006|00001900////Reference i.xxx.vi.mcm.md', + matchGroup: "00000001|00000030|00000006|00001900//", + ctimeNewest: MOCK_TIMESTAMP + 222, + ctimeOldest: MOCK_TIMESTAMP + 222, + mtime: MOCK_TIMESTAMP + 333, + path: 'Some parent folder/Reference i.xxx.vi.mcm.md' + }); + }) + it('should correctly process not matching suffix', () => { + // given + const file: TFile = mockTFile('References', 'md', 111, MOCK_TIMESTAMP + 222, MOCK_TIMESTAMP + 333); + const sortSpec: CustomSortSpec = { + targetFoldersPaths: ['/'], + groups: [{ + type: CustomSortGroupType.ExactSuffix, + exactSuffix: 'ence' + }] + } + // when + const result = determineSortingGroup(file, sortSpec) + + // then + expect(result).toEqual({ + groupIdx: 1, // This indicates the last+1 idx + isFolder: false, + sortString: "References.md", + ctimeNewest: MOCK_TIMESTAMP + 222, + ctimeOldest: MOCK_TIMESTAMP + 222, + mtime: MOCK_TIMESTAMP + 333, + path: 'Some parent folder/References.md' + }); + }) + it('should correctly process not matching regex suffix', () => { + // given + const file: TFile = mockTFile('References', 'md', 111, MOCK_TIMESTAMP + 222, MOCK_TIMESTAMP + 333); + const sortSpec: CustomSortSpec = { + targetFoldersPaths: ['/'], + groups: [{ + type: CustomSortGroupType.ExactSuffix, + regexSuffix: { + regex: /ence$/i + } + }] + } + // when + const result = determineSortingGroup(file, sortSpec) + + // then + expect(result).toEqual({ + groupIdx: 1, // This indicates the last+1 idx + isFolder: false, + sortString: "References.md", + ctimeNewest: MOCK_TIMESTAMP + 222, + ctimeOldest: MOCK_TIMESTAMP + 222, + mtime: MOCK_TIMESTAMP + 333, + path: 'Some parent folder/References.md' + }); + }) + }) + describe('CustomSortGroupType.ExactName', () => { + it('should correctly recognize exact name', () => { + // given + const file: TFile = mockTFile('References', 'md', 111, MOCK_TIMESTAMP + 222, MOCK_TIMESTAMP + 333); + const sortSpec: CustomSortSpec = { + targetFoldersPaths: ['/'], + groups: [{ + type: CustomSortGroupType.ExactName, + exactText: 'References' + }] + } + + // when + const result = determineSortingGroup(file, sortSpec) + + // then + expect(result).toEqual({ + groupIdx: 0, + isFolder: false, + sortString: "References.md", + ctimeNewest: MOCK_TIMESTAMP + 222, + ctimeOldest: MOCK_TIMESTAMP + 222, + mtime: MOCK_TIMESTAMP + 333, + path: 'Some parent folder/References.md' + }); + }) + it('should correctly recognize exact simple regex-based name', () => { + // given + const file: TFile = mockTFile('References 12', 'md', 111, MOCK_TIMESTAMP + 222, MOCK_TIMESTAMP + 333); + const sortSpec: CustomSortSpec = { + targetFoldersPaths: ['/'], + groups: [{ + type: CustomSortGroupType.ExactName, + regexPrefix: { + regex: /^References [0-9][0-9]$/i + } + }] + } + + // when + const result = determineSortingGroup(file, sortSpec) + + // then + expect(result).toEqual({ + groupIdx: 0, + isFolder: false, + sortString: "References 12.md", + ctimeNewest: MOCK_TIMESTAMP + 222, + ctimeOldest: MOCK_TIMESTAMP + 222, + mtime: MOCK_TIMESTAMP + 333, + path: 'Some parent folder/References 12.md' + }); + }) + it('should correctly recognize exact name, regexL variant', () => { + // given + const file: TFile = mockTFile('Reference i.xxx.vi.mcm', 'md', 111, MOCK_TIMESTAMP + 222, MOCK_TIMESTAMP + 333); + const sortSpec: CustomSortSpec = { + targetFoldersPaths: ['/'], + groups: [{ + type: CustomSortGroupType.ExactName, + regexPrefix: { + regex: /^Reference *([MDCLXVI]+(?:\.[MDCLXVI]+)*)$/i, + normalizerFn: CompoundDotRomanNumberNormalizerFn + } + }] + } + + // when + const result = determineSortingGroup(file, sortSpec) + + // then + expect(result).toEqual({ + groupIdx: 0, + isFolder: false, + sortString: '00000001|00000030|00000006|00001900////Reference i.xxx.vi.mcm.md', + matchGroup: "00000001|00000030|00000006|00001900//", + ctimeNewest: MOCK_TIMESTAMP + 222, + ctimeOldest: MOCK_TIMESTAMP + 222, + mtime: MOCK_TIMESTAMP + 333, + path: 'Some parent folder/Reference i.xxx.vi.mcm.md' + }); + }) + it('should correctly process not matching name', () => { + // given + const file: TFile = mockTFile('References', 'md', 111, MOCK_TIMESTAMP + 222, MOCK_TIMESTAMP + 333); + const sortSpec: CustomSortSpec = { + targetFoldersPaths: ['/'], + groups: [{ + type: CustomSortGroupType.ExactName, + exactText: 'ence' + }] + } + // when + const result = determineSortingGroup(file, sortSpec) + + // then + expect(result).toEqual({ + groupIdx: 1, // This indicates the last+1 idx + isFolder: false, + sortString: "References.md", + ctimeNewest: MOCK_TIMESTAMP + 222, + ctimeOldest: MOCK_TIMESTAMP + 222, + mtime: MOCK_TIMESTAMP + 333, + path: 'Some parent folder/References.md' + }); + }) + it('should correctly process not matching regex name', () => { + // given + const file: TFile = mockTFile('References', 'md', 111, MOCK_TIMESTAMP + 222, MOCK_TIMESTAMP + 333); + const sortSpec: CustomSortSpec = { + targetFoldersPaths: ['/'], + groups: [{ + type: CustomSortGroupType.ExactName, + regexPrefix: { + regex: /^Reference$/i + } + }] + } + // when + const result = determineSortingGroup(file, sortSpec) + + // then + expect(result).toEqual({ + groupIdx: 1, // This indicates the last+1 idx + isFolder: false, + sortString: "References.md", + ctimeNewest: MOCK_TIMESTAMP + 222, + ctimeOldest: MOCK_TIMESTAMP + 222, + mtime: MOCK_TIMESTAMP + 333, + path: 'Some parent folder/References.md' + }); + }) + }) describe('CustomSortGroupType.byMetadataFieldAlphabetical', () => { it('should ignore the file item if it has no direct metadata', () => { // given @@ -1013,6 +1426,88 @@ describe('determineFolderDatesIfNeeded', () => { }) }) +describe('matchGroupRegex', () => { + it( 'should correctly handle no match', () => { + // given + const regExpSpec: RegExpSpec = { + regex: /a(b)c/i + } + const name: string = 'Abbc' + + // when + const [matched, matchedGroup, entireMatch] = matchGroupRegex(regExpSpec, name) + + // then + expect(matched).toBe(false) + expect(matchedGroup).toBeUndefined() + expect(entireMatch).toBeUndefined() + }) + it('should correctly handle no matching group match and normalizer absent', () => { + // given + const regExpSpec: RegExpSpec = { + regex: /ab+c/i + } + const name: string = 'Abbbc' + + // when + const [matched, matchedGroup, entireMatch] = matchGroupRegex(regExpSpec, name) + + // then + expect(matched).toBe(true) + expect(matchedGroup).toBeUndefined() + expect(entireMatch).toBe('Abbbc') + }) + it('should correctly handle no matching group match and normalizer present', () => { + // given + const regExpSpec: RegExpSpec = { + regex: /ab+c/i, + normalizerFn: jest.fn() + } + const name: string = 'Abc' + + // when + const [matched, matchedGroup, entireMatch] = matchGroupRegex(regExpSpec, name) + + // then + expect(matched).toBe(true) + expect(matchedGroup).toBeUndefined() + expect(entireMatch).toBe('Abc') + expect(regExpSpec.normalizerFn).not.toHaveBeenCalled() + }) + it('should correctly handle matching group match and normalizer absent', () => { + // given + const regExpSpec: RegExpSpec = { + regex: /a(b+)c/i + } + const name: string = 'Abbbc' + + // when + const [matched, matchedGroup, entireMatch] = matchGroupRegex(regExpSpec, name) + + // then + expect(matched).toBe(true) + expect(matchedGroup).toBe('bbb') + expect(entireMatch).toBe('Abbbc') + }) + it('should correctly handle matching group match and normalizer present', () => { + // given + const regExpSpec: RegExpSpec = { + regex: /a(b+)c/i, + normalizerFn: jest.fn((s) => `>>${s}<<`) + } + const name: string = 'Abc' + + // when + const [matched, matchedGroup, entireMatch] = matchGroupRegex(regExpSpec, name) + + // then + expect(matched).toBe(true) + expect(matchedGroup).toBe('>>b<<') + expect(entireMatch).toBe('Abc') + expect(regExpSpec.normalizerFn).toHaveBeenCalledTimes(1) + }) +}) + const SORT_FIRST_GOES_EARLIER: number = -1 const SORT_FIRST_GOES_LATER: number = 1 const SORT_ITEMS_ARE_EQUAL: number = 0 diff --git a/src/custom-sort/custom-sort.ts b/src/custom-sort/custom-sort.ts index 91d7b1b..9ecf4f1 100644 --- a/src/custom-sort/custom-sort.ts +++ b/src/custom-sort/custom-sort.ts @@ -4,7 +4,9 @@ import { CustomSortGroupType, CustomSortOrder, CustomSortSpec, - DEFAULT_METADATA_FIELD_FOR_SORTING + DEFAULT_METADATA_FIELD_FOR_SORTING, + NormalizerFn, + RegExpSpec } from "./custom-sort-types"; import {isDefined} from "../utils/utils"; @@ -26,8 +28,8 @@ export interface FolderItemForSorting { sortString: string // fragment (or full name) to be used for sorting metadataFieldValue?: string // relevant to metadata-based sorting only matchGroup?: string // advanced - used for secondary sorting rule, to recognize 'same regex match' - ctimeOldest: number // for a file, both ctime values are the same. For folder they can be different: - ctimeNewest: number // ctimeOldest = ctime of oldest child file, ctimeNewest = ctime of newest child file + ctimeOldest: number // for a file, both ctime values are the same. For folder, they can be different: + ctimeNewest: number // ctimeOldest = ctime of the oldest child file, ctimeNewest = ctime of the newest child file mtime: number isFolder: boolean folder?: TFolder @@ -90,7 +92,8 @@ function compareTwoItems(itA: FolderItemForSorting, itB: FolderItemForSorting, s if (itA.groupIdx != undefined && itB.groupIdx != undefined) { if (itA.groupIdx === itB.groupIdx) { const group: CustomSortGroup | undefined = sortSpec.groups[itA.groupIdx] - if (group?.regexSpec && group.secondaryOrder && itA.matchGroup === itB.matchGroup) { + const matchingGroupPresentOnBothSidesAndEqual: boolean = itA.matchGroup !== undefined && itA.matchGroup === itB.matchGroup + if (matchingGroupPresentOnBothSidesAndEqual && group.secondaryOrder) { return Sorters[group.secondaryOrder ?? CustomSortOrder.default](itA, itB) } else { return Sorters[group?.order ?? CustomSortOrder.default](itA, itB) @@ -119,6 +122,24 @@ const isByMetadata = (order: CustomSortOrder | undefined) => { export const DEFAULT_FOLDER_MTIME: number = 0 export const DEFAULT_FOLDER_CTIME: number = 0 +type RegexMatchedGroup = string | undefined +type RegexFullMatch = string | undefined +type Matched = boolean + +export const matchGroupRegex = (theRegex: RegExpSpec, nameForMatching: string): [Matched, RegexMatchedGroup, RegexFullMatch] => { + const match: RegExpMatchArray | null | undefined = theRegex.regex.exec(nameForMatching); + if (match) { + const normalizer: NormalizerFn | undefined = theRegex.normalizerFn + const regexMatchedGroup: string | undefined = match[1] + if (regexMatchedGroup) { + return [true, normalizer ? normalizer!(regexMatchedGroup)! : regexMatchedGroup, match[0]] + } else { + return [true, undefined, match[0]] + } + } + return [false, undefined, undefined] +} + export const determineSortingGroup = function (entry: TFile | TFolder, spec: CustomSortSpec): FolderItemForSorting { let groupIdx: number let determined: boolean = false @@ -147,11 +168,7 @@ export const determineSortingGroup = function (entry: TFile | TFolder, spec: Cus determined = true; } } else { // regexp is involved - const match: RegExpMatchArray | null | undefined = group.regexSpec?.regex.exec(nameForMatching); - if (match) { - determined = true - matchedGroup = group.regexSpec?.normalizerFn(match[1]); - } + [determined, matchedGroup] = matchGroupRegex(group.regexPrefix!, nameForMatching) } break; case CustomSortGroupType.ExactSuffix: @@ -160,11 +177,7 @@ export const determineSortingGroup = function (entry: TFile | TFolder, spec: Cus determined = true; } } else { // regexp is involved - const match: RegExpMatchArray | null | undefined = group.regexSpec?.regex.exec(nameForMatching); - if (match) { - determined = true - matchedGroup = group.regexSpec?.normalizerFn(match[1]); - } + [determined, matchedGroup] = matchGroupRegex(group.regexSuffix!, nameForMatching) } break; case CustomSortGroupType.ExactHeadAndTail: @@ -174,22 +187,30 @@ export const determineSortingGroup = function (entry: TFile | TFolder, spec: Cus determined = true; } } - } else { // regexp is involved as the prefix or as the suffix + } else if (group.exactPrefix || group.exactSuffix) { // regexp is involved as the prefix or as the suffix (not both) if ((group.exactPrefix && nameForMatching.startsWith(group.exactPrefix)) || (group.exactSuffix && nameForMatching.endsWith(group.exactSuffix))) { - const match: RegExpMatchArray | null | undefined = group.regexSpec?.regex.exec(nameForMatching); - if (match) { - const fullMatch: string = match[0] - matchedGroup = group.regexSpec?.normalizerFn(match[1]); + let fullMatch: string | undefined + [determined, matchedGroup, fullMatch] = matchGroupRegex(group.exactPrefix ? group.regexSuffix! : group.regexPrefix!, nameForMatching) + if (determined) { // check for overlapping of prefix and suffix match (not allowed) - if ((fullMatch.length + (group.exactPrefix?.length ?? 0) + (group.exactSuffix?.length ?? 0)) <= nameForMatching.length) { - determined = true - } else { + if ((fullMatch!.length + (group.exactPrefix?.length ?? 0) + (group.exactSuffix?.length ?? 0)) > nameForMatching.length) { + determined = false matchedGroup = null // if it falls into Outsiders group, let it use title to sort } } } - } + } else { // regexp is involved both as the prefix and as the suffix + const [matchedLeft, matchedGroupLeft, fullMatchLeft] = matchGroupRegex(group.regexPrefix!, nameForMatching) + const [matchedRight, matchedGroupRight, fullMatchRight] = matchGroupRegex(group.regexSuffix!, nameForMatching) + if (matchedLeft && matchedRight) { + // check for overlapping of prefix and suffix match (not allowed) + if ((fullMatchLeft!.length + fullMatchRight!.length) <= nameForMatching.length) { + determined = true + matchedGroup = matchedGroupLeft ?? matchedGroupRight + } + } + } break; case CustomSortGroupType.ExactName: if (group.exactText) { @@ -197,11 +218,7 @@ export const determineSortingGroup = function (entry: TFile | TFolder, spec: Cus determined = true; } } else { // regexp is involved - const match: RegExpMatchArray | null | undefined = group.regexSpec?.regex.exec(nameForMatching); - if (match) { - determined = true - matchedGroup = group.regexSpec?.normalizerFn(match[1]); - } + [determined, matchedGroup] = matchGroupRegex(group.regexPrefix!, nameForMatching) } break case CustomSortGroupType.HasMetadataField: diff --git a/src/custom-sort/sorting-spec-processor.spec.ts b/src/custom-sort/sorting-spec-processor.spec.ts index a7bce01..88726da 100644 --- a/src/custom-sort/sorting-spec-processor.spec.ts +++ b/src/custom-sort/sorting-spec-processor.spec.ts @@ -2,7 +2,7 @@ import { CompoundDashNumberNormalizerFn, CompoundDashRomanNumberNormalizerFn, CompoundDotNumberNormalizerFn, - convertPlainStringWithNumericSortingSymbolToRegex, + convertPlainStringToRegex, detectNumericSortingSymbols, escapeRegexUnsafeCharacters, extractNumericSortingSymbol, @@ -322,7 +322,7 @@ const expectedSortSpecsExampleNumericSortingSymbols: { [key: string]: CustomSort foldersOnly: true, order: CustomSortOrder.alphabetical, type: CustomSortGroupType.ExactPrefix, - regexSpec: { + regexPrefix: { regex: /^Chapter *(\d+(?:\.\d+)*) /i, normalizerFn: CompoundDotNumberNormalizerFn } @@ -330,14 +330,14 @@ const expectedSortSpecsExampleNumericSortingSymbols: { [key: string]: CustomSort filesOnly: true, order: CustomSortOrder.alphabetical, type: CustomSortGroupType.ExactSuffix, - regexSpec: { + regexSuffix: { regex: /section *([MDCLXVI]+(?:-[MDCLXVI]+)*)\.$/i, normalizerFn: CompoundDashRomanNumberNormalizerFn } }, { order: CustomSortOrder.alphabetical, type: CustomSortGroupType.ExactName, - regexSpec: { + regexPrefix: { regex: /^Appendix *(\d+(?:-\d+)*) \(attachments\)$/i, normalizerFn: CompoundDashNumberNormalizerFn } @@ -345,7 +345,7 @@ const expectedSortSpecsExampleNumericSortingSymbols: { [key: string]: CustomSort order: CustomSortOrder.alphabetical, type: CustomSortGroupType.ExactHeadAndTail, exactSuffix: ' works?', - regexSpec: { + regexPrefix: { regex: /^Plain syntax *([MDCLXVI]+) /i, normalizerFn: RomanNumberNormalizerFn } @@ -353,7 +353,7 @@ const expectedSortSpecsExampleNumericSortingSymbols: { [key: string]: CustomSort order: CustomSortOrder.alphabetical, type: CustomSortGroupType.ExactHeadAndTail, exactPrefix: 'And this kind of', - regexSpec: { + regexSuffix: { regex: / *(\d+)plain syntax\?\?\?$/i, normalizerFn: NumberNormalizerFn } @@ -1365,11 +1365,11 @@ const txtInputErrorSpaceAsValueOfAscendingAttr: string = ` ORDER-ASC: ` const txtInputErrorInvalidValueOfDescendingAttr: string = ` -/Folders: +/folders > definitely not correct ` const txtInputErrorNoSpaceDescendingAttr: string = ` -/files: Chapter ... +/:files Chapter ... Order-DESC:MODIFIED ` const txtInputErrorItemToHideWithNoValue: string = ` @@ -1666,6 +1666,17 @@ describe('SortingSpecProcessor error detection and reporting', () => { `${ERR_PREFIX} 10:NumericalSymbolAdjacentToWildcard Numerical sorting symbol must not be directly adjacent to a wildcard because of potential performance problem. An additional explicit separator helps in such case. ${ERR_SUFFIX_IN_LINE(1)}`) expect(errorsLogger).toHaveBeenNthCalledWith(2, ERR_LINE_TXT(s)) }) + it.each([ + '% \\.d+\\d...', + '% ...[0-9]\\d+', + '% Chapter\\R+\\d... page', + '% Section ...[0-9]\\-r+page' + ])('should not recognize adjacency error in >%s<', (s: string) => { + const inputTxtArr: Array = s.split('\n') + const result = processor.parseSortSpecFromText(inputTxtArr, 'mock-folder', 'custom-name-note.md') + expect(result).not.toBeNull() + expect(errorsLogger).not.toHaveBeenCalled() + }) }) const txtInputTargetFolderCCC: string = ` @@ -1832,6 +1843,7 @@ describe('extractNumericSortingSymbol', () => { ['', null], ['d+', null], [' \\d +', null], + [' [0-9]', null], ['\\ d +', null], [' \\d+', '\\d+'], ['--\\.D+\\d+', '\\.D+'], @@ -1844,38 +1856,71 @@ describe('extractNumericSortingSymbol', () => { describe('convertPlainStringWithNumericSortingSymbolToRegex', () => { it.each([ + // Advanced numeric symbols [' \\d+ ', / *(\d+) /i], - ['--\\.D+\\d+', /\-\- *(\d+(?:\.\d+)*)\\d\+/i], ['Chapter \\D+:', /Chapter *(\d+):/i], ['Section \\.D+ of', /Section *(\d+(?:\.\d+)*) of/i], ['Part\\-D+:', /Part *(\d+(?:-\d+)*):/i], ['Lorem ipsum\\r+:', /Lorem ipsum *([MDCLXVI]+):/i], ['\\.r+', / *([MDCLXVI]+(?:\.[MDCLXVI]+)*)/i], ['\\-r+:Lorem', / *([MDCLXVI]+(?:-[MDCLXVI]+)*):Lorem/i], - ['abc\\d+efg\\d+hij', /abc *(\d+)efg/i], // Double numerical sorting symbol, error case, covered for clarity of implementation detail + // Simple regex + ['\\d-\\[0-9];-)', /\d\-[0-9];\-\)/i], + ['[0-9]\\d[0-9]', /\[0\-9\]\d\[0\-9\]/i], + ['\\[0-9]', /[0-9]/i], + ['[0-9] \\d', /\[0\-9\] \d/i], + [' \\dd ', / \dd /i], + [' \\d\\d \\[0-9] ', / \d\d [0-9] /i], + [' \\d 123 \\[0-9] ', / \d 123 [0-9] /i], + // Advanced numeric symbols in connection with simple regex + ['\\dLorem ipsum\\r+:', /\dLorem ipsum *([MDCLXVI]+):/i], + ['W\\dLorem ipsum\\r+:', /W\dLorem ipsum *([MDCLXVI]+):/i], + ['Lorem \\d\\r+\\dipsum:', /Lorem \d *([MDCLXVI]+)\dipsum:/i], + ['Lorem \\d\\D+\\dipsum:', /Lorem \d *(\d+)\dipsum:/i], + // Edge case to act as spec - actually the three dots ... should never reach conversion to regex + ['% \\.d+\\d...', /% *(\d+(?:\.\d+)*)\d\.\.\./i], + ['% ...[0-9]\\d+', /% \.\.\.\[0\-9\] *(\d+)/i], + ['% Chapter\\R+\\d... page', /% Chapter *([MDCLXVI]+)\d\.\.\. page/i], + ['% Section ...[0-9]\\-r+page', /% Section \.\.\.\[0\-9\] *([MDCLXVI]+(?:-[MDCLXVI]+)*)page/i], + // Edge and error cases, behavior covered by tests to act as specification of the engine here + // even if at run-time the error checking prevents some such expressions + ['abc\\d+efg\\d+hij', /abc *(\d+)efg/i], // Double advanced numerical sorting symbol, error case + ['--\\.D+\\d+', /\-\- *(\d+(?:\.\d+)*)\d\+/i], // Two advanced numerical symbols ])('should correctly extract from >%s< the numeric sorting symbol (%s)', (s: string, regex: RegExp) => { - const result = convertPlainStringWithNumericSortingSymbolToRegex(s, RegexpUsedAs.InUnitTest) + const result = convertPlainStringToRegex(s, RegexpUsedAs.InUnitTest) expect(result?.regexpSpec.regex).toEqual(regex) // No need to examine prefix and suffix fields of result, they are secondary and derived from the returned regexp }) - it('should not process string not containing numeric sorting symbol', () => { - const input = 'abc' - const result = convertPlainStringWithNumericSortingSymbolToRegex(input, RegexpUsedAs.InUnitTest) - expect(result).toBeNull() + it('should not process string not containing numeric sorting symbol nor regex', () => { + const input1 = 'abc' + const input2 = '[0-9]' + const result1 = convertPlainStringToRegex(input1, RegexpUsedAs.InUnitTest) + const result2 = convertPlainStringToRegex(input2, RegexpUsedAs.InUnitTest) + expect(result1).toBeNull() + expect(result2).toBeNull() }) it('should correctly include regex token for string begin', () => { - const input = 'Part\\-D+:' - const result = convertPlainStringWithNumericSortingSymbolToRegex(input, RegexpUsedAs.Prefix) - expect(result?.regexpSpec.regex).toEqual(/^Part *(\d+(?:-\d+)*):/i) + const input1 = 'Part\\-D+:' + const input2 = '\\dPart' + const result1 = convertPlainStringToRegex(input1, RegexpUsedAs.Prefix) + const result2 = convertPlainStringToRegex(input2, RegexpUsedAs.Prefix) + expect(result1?.regexpSpec.regex).toEqual(/^Part *(\d+(?:-\d+)*):/i) + expect(result2?.regexpSpec.regex).toEqual(/^\dPart/i) }) it('should correctly include regex token for string end', () => { - const input = 'Part\\-D+:' - const result = convertPlainStringWithNumericSortingSymbolToRegex(input, RegexpUsedAs.Suffix) - expect(result?.regexpSpec.regex).toEqual(/Part *(\d+(?:-\d+)*):$/i) + const input1 = 'Part\\-D+:' + const input2 = ' \\[0-9]\\-D+' + const result1 = convertPlainStringToRegex(input1, RegexpUsedAs.Suffix) + const result2 = convertPlainStringToRegex(input2, RegexpUsedAs.Suffix) + expect(result1?.regexpSpec.regex).toEqual(/Part *(\d+(?:-\d+)*):$/i) + expect(result2?.regexpSpec.regex).toEqual(/ [0-9] *(\d+(?:-\d+)*)$/i) }) it('should correctly include regex token for string begin and end', () => { - const input = 'Part\\.D+:' - const result = convertPlainStringWithNumericSortingSymbolToRegex(input, RegexpUsedAs.FullMatch) - expect(result?.regexpSpec.regex).toEqual(/^Part *(\d+(?:\.\d+)*):$/i) + const input1 = 'Part\\.D+:' + const input2 = ' \\d \\[0-9] ' + const result1 = convertPlainStringToRegex(input1, RegexpUsedAs.FullMatch) + const result2 = convertPlainStringToRegex(input2, RegexpUsedAs.FullMatch) + expect(result1?.regexpSpec.regex).toEqual(/^Part *(\d+(?:\.\d+)*):$/i) + expect(result2?.regexpSpec.regex).toEqual(/^ \d [0-9] $/i) }) }) diff --git a/src/custom-sort/sorting-spec-processor.ts b/src/custom-sort/sorting-spec-processor.ts index 72c6906..da239e8 100644 --- a/src/custom-sort/sorting-spec-processor.ts +++ b/src/custom-sort/sorting-spec-processor.ts @@ -74,7 +74,8 @@ export enum ProblemCode { OnlyLastCombinedGroupCanSpecifyOrder, TooManyGroupTypePrefixes, PriorityPrefixAfterGroupTypePrefix, - CombinePrefixAfterGroupTypePrefix + CombinePrefixAfterGroupTypePrefix, + InlineRegexInPrefixAndSuffix } const ContextFreeProblems = new Set([ @@ -252,6 +253,10 @@ const NumberRegexSymbol: string = '\\d+' // Plain number const CompoundNumberDotRegexSymbol: string = '\\.d+' // Compound number with dot as separator const CompoundNumberDashRegexSymbol: string = '\\-d+' // Compound number with dash as separator +const InlineRegexSymbol_Digit1: string = '\\d' +const InlineRegexSymbol_Digit2: string = '\\[0-9]' +const InlineRegexSymbol_0_to_3: string = '\\[0-3]' + const UnsafeRegexCharsRegex: RegExp = /[\^$.\-+\[\]{}()|*?=!\\]/g export const escapeRegexUnsafeCharacters = (s: string): string => { @@ -269,6 +274,21 @@ const numericSortingSymbolsArr: Array = [ const numericSortingSymbolsRegex = new RegExp(numericSortingSymbolsArr.join('|'), 'gi') +const inlineRegexSymbolsArrEscapedForRegex: Array = [ + escapeRegexUnsafeCharacters(InlineRegexSymbol_Digit1), + escapeRegexUnsafeCharacters(InlineRegexSymbol_Digit2), + escapeRegexUnsafeCharacters(InlineRegexSymbol_0_to_3) +] + +// Don't be confused if the source lexeme is equal to the resulting regex piece, logically these two distinct spaces +const inlineRegexSymbolsToRegexExpressionsArr: { [key: string]: string} = { + [InlineRegexSymbol_Digit1]: '\\d', + [InlineRegexSymbol_Digit2]: '[0-9]', + [InlineRegexSymbol_0_to_3]: '[0-3]', +} + +const inlineRegexSymbolsDetectionRegex = new RegExp(inlineRegexSymbolsArrEscapedForRegex.join('|'), 'gi') + export const hasMoreThanOneNumericSortingSymbol = (s: string): boolean => { numericSortingSymbolsRegex.lastIndex = 0 return numericSortingSymbolsRegex.test(s) && numericSortingSymbolsRegex.test(s) @@ -278,6 +298,11 @@ export const detectNumericSortingSymbols = (s: string): boolean => { return numericSortingSymbolsRegex.test(s) } +export const detectInlineRegex = (s?: string): boolean => { + inlineRegexSymbolsDetectionRegex.lastIndex = 0 + return s ? inlineRegexSymbolsDetectionRegex.test(s) : false +} + export const extractNumericSortingSymbol = (s?: string): string | null => { if (s) { numericSortingSymbolsRegex.lastIndex = 0 @@ -291,6 +316,7 @@ export const extractNumericSortingSymbol = (s?: string): string | null => { export interface RegExpSpecStr { regexpStr: string normalizerFn: NormalizerFn + advancedRegexType: AdvancedRegexType } // Exposed as named exports to allow unit testing @@ -301,37 +327,64 @@ export const NumberNormalizerFn: NormalizerFn = (s: string) => getNormalizedNumb export const CompoundDotNumberNormalizerFn: NormalizerFn = (s: string) => getNormalizedNumber(s, DOT_SEPARATOR) export const CompoundDashNumberNormalizerFn: NormalizerFn = (s: string) => getNormalizedNumber(s, DASH_SEPARATOR) +export enum AdvancedRegexType { + None, // to allow if (advancedRegex) + Number, + CompoundDotNumber, + CompoundDashNumber, + RomanNumber, + CompoundDotRomanNumber, + CompoundDashRomanNumber +} + const numericSortingSymbolToRegexpStr: { [key: string]: RegExpSpecStr } = { [RomanNumberRegexSymbol.toLowerCase()]: { regexpStr: RomanNumberRegexStr, - normalizerFn: RomanNumberNormalizerFn + normalizerFn: RomanNumberNormalizerFn, + advancedRegexType: AdvancedRegexType.RomanNumber }, [CompoundRomanNumberDotRegexSymbol.toLowerCase()]: { regexpStr: CompoundRomanNumberDotRegexStr, - normalizerFn: CompoundDotRomanNumberNormalizerFn + normalizerFn: CompoundDotRomanNumberNormalizerFn, + advancedRegexType: AdvancedRegexType.CompoundDotRomanNumber }, [CompoundRomanNumberDashRegexSymbol.toLowerCase()]: { regexpStr: CompoundRomanNumberDashRegexStr, - normalizerFn: CompoundDashRomanNumberNormalizerFn + normalizerFn: CompoundDashRomanNumberNormalizerFn, + advancedRegexType: AdvancedRegexType.CompoundDashRomanNumber }, [NumberRegexSymbol.toLowerCase()]: { regexpStr: NumberRegexStr, - normalizerFn: NumberNormalizerFn + normalizerFn: NumberNormalizerFn, + advancedRegexType: AdvancedRegexType.Number }, [CompoundNumberDotRegexSymbol.toLowerCase()]: { regexpStr: CompoundNumberDotRegexStr, - normalizerFn: CompoundDotNumberNormalizerFn + normalizerFn: CompoundDotNumberNormalizerFn, + advancedRegexType: AdvancedRegexType.CompoundDotNumber }, [CompoundNumberDashRegexSymbol.toLowerCase()]: { regexpStr: CompoundNumberDashRegexStr, - normalizerFn: CompoundDashNumberNormalizerFn + normalizerFn: CompoundDashNumberNormalizerFn, + advancedRegexType: AdvancedRegexType.CompoundDashNumber } } -export interface ExtractedNumericSortingSymbolInfo { +// advanced regex is a regex, which: +// - includes a matching group, which is then extracted for sorting needs +// - AND +// - contains variable-length matching regex, e.g. [0-9]+ +// - thus requires the prefix and suffix information to check adjacency (to detect and avoid regex backtracking problems) +// to compare, the non-advanced regex (aka simple regex) is constant-length wildcard, e.g. +// - a single digit +// - a single alphanumeric character (not implemented yet) +// - fixed length number (not implemented yet) +// - overall, guaranteed not to have zero-length matches +export interface RegexMatcherInfo { regexpSpec: RegExpSpec - prefix: string - suffix: string + prefix: string // NOTE! This can also contain regex string, yet w/o matching groups and w/o optional matches + suffix: string // in other words, if there is a regex in prefix or suffix, it is guaranteed to not have zero-length matches + containsAdvancedRegex: AdvancedRegexType } export enum RegexpUsedAs { @@ -341,26 +394,99 @@ export enum RegexpUsedAs { FullMatch } -export const convertPlainStringWithNumericSortingSymbolToRegex = (s?: string, actAs?: RegexpUsedAs): ExtractedNumericSortingSymbolInfo | null => { +export const convertPlainStringToLeftRegex = (s: string): RegexMatcherInfo | null => { + return convertPlainStringToRegex(s, RegexpUsedAs.Prefix) +} + +export const convertPlainStringToRightRegex = (s: string): RegexMatcherInfo | null => { + return convertPlainStringToRegex(s, RegexpUsedAs.Suffix) +} + +export const convertPlainStringToFullMatchRegex = (s: string): RegexMatcherInfo | null => { + return convertPlainStringToRegex(s, RegexpUsedAs.FullMatch) +} + +export const convertPlainStringToRegex = (s: string, actAs: RegexpUsedAs): RegexMatcherInfo | null => { + const regexMatchesStart: boolean = [RegexpUsedAs.Prefix, RegexpUsedAs.FullMatch].includes(actAs) + const regexMatchesEnding: boolean = [RegexpUsedAs.Suffix, RegexpUsedAs.FullMatch].includes(actAs) const detectedSymbol: string | null = extractNumericSortingSymbol(s) if (detectedSymbol) { const replacement: RegExpSpecStr = numericSortingSymbolToRegexpStr[detectedSymbol.toLowerCase()] const [extractedPrefix, extractedSuffix] = s!.split(detectedSymbol) - const regexPrefix: string = actAs === RegexpUsedAs.Prefix || actAs === RegexpUsedAs.FullMatch ? '^' : '' - const regexSuffix: string = actAs === RegexpUsedAs.Suffix || actAs === RegexpUsedAs.FullMatch ? '$' : '' + const regexPrefix: string = regexMatchesStart ? '^' : '' + const regexSuffix: string = regexMatchesEnding ? '$' : '' + const escapedProcessedPrefix: string = convertInlineRegexSymbolsAndEscapeTheRest(extractedPrefix) + const escapedProcessedSuffix: string = convertInlineRegexSymbolsAndEscapeTheRest(extractedSuffix) return { regexpSpec: { - regex: new RegExp(`${regexPrefix}${escapeRegexUnsafeCharacters(extractedPrefix)}${replacement.regexpStr}${escapeRegexUnsafeCharacters(extractedSuffix)}${regexSuffix}`, 'i'), + regex: new RegExp(`${regexPrefix}${escapedProcessedPrefix}${replacement.regexpStr}${escapedProcessedSuffix}${regexSuffix}`, 'i'), normalizerFn: replacement.normalizerFn }, prefix: extractedPrefix, - suffix: extractedSuffix + suffix: extractedSuffix, + containsAdvancedRegex: replacement.advancedRegexType + } + } else if (detectInlineRegex(s)) { + const replacement: RegexAsString = convertInlineRegexSymbolsAndEscapeTheRest(s)! + const regexPrefix: string = regexMatchesStart ? '^' : '' + const regexSuffix: string = regexMatchesEnding ? '$' : '' + return { + regexpSpec: { + regex: new RegExp(`${regexPrefix}${replacement}${regexSuffix}`, 'i') + }, + prefix: '', // shouldn't be used anyway because of the below containsAdvancedRegex: false + suffix: '', // ---- // ---- + containsAdvancedRegex: AdvancedRegexType.None } } else { return null } } +type RegexAsString = string + +export const convertInlineRegexSymbolsAndEscapeTheRest = (s: string): RegexAsString => { + if (s === '') { + return s + } + + let regexAsString: Array = [] + + while (s!.length > 0) { + // detect the first inline regex + let earliestRegexSymbolIdx: number | undefined = undefined + let earliestRegexSymbol: string | undefined = undefined + for (let inlineRegexSymbol of Object.keys(inlineRegexSymbolsToRegexExpressionsArr)) { + const index: number = s!.indexOf(inlineRegexSymbol) + if (index >= 0) { + if (earliestRegexSymbolIdx !== undefined) { + if (index < earliestRegexSymbolIdx) { + earliestRegexSymbolIdx = index + earliestRegexSymbol = inlineRegexSymbol + } + } else { + earliestRegexSymbolIdx = index + earliestRegexSymbol = inlineRegexSymbol + } + } + } + if (earliestRegexSymbolIdx !== undefined) { + if (earliestRegexSymbolIdx > 0) { + const charsBeforeRegexSymbol: string = s!.substring(0, earliestRegexSymbolIdx) + regexAsString.push(escapeRegexUnsafeCharacters(charsBeforeRegexSymbol)) + s = s!.substring(earliestRegexSymbolIdx) + } + regexAsString.push(inlineRegexSymbolsToRegexExpressionsArr[earliestRegexSymbol!]) + s = s!.substring(earliestRegexSymbol!.length) + } else { + regexAsString.push(escapeRegexUnsafeCharacters(s)) + s = '' + } + } + + return regexAsString.join('') +} + export interface FolderPathToSortSpecMap { [key: string]: CustomSortSpec } @@ -375,7 +501,7 @@ interface AdjacencyInfo { noSuffix: boolean } -const checkAdjacency = (sortingSymbolInfo: ExtractedNumericSortingSymbolInfo): AdjacencyInfo => { +const checkAdjacency = (sortingSymbolInfo: RegexMatcherInfo): AdjacencyInfo => { return { noPrefix: sortingSymbolInfo.prefix.length === 0, noSuffix: sortingSymbolInfo.suffix.length === 0 @@ -708,6 +834,14 @@ export class SortingSpecProcessor { return null } + if (containsThreeDots(s)) { + const [prefix, suffix] = s.split(ThreeDots) + if (containsThreeDots(prefix) && containsThreeDots(suffix)) { + this.problem(ProblemCode.InlineRegexInPrefixAndSuffix, 'In current version, inline regex symbols are not allowed both in prefix and suffix.') + return null + } + } + let groupPriority: number | undefined = undefined let groupPriorityPrefixesCount: number = 0 let combineGroup: boolean | undefined = undefined @@ -1266,57 +1400,61 @@ export class SortingSpecProcessor { return null; } + // Returns true if no regex will be involved (hence no adjustment) or if correctly adjusted with regex + private adjustSortingGroupForRegexBasedMatchers = (group: CustomSortGroup): boolean => { + return this.adjustSortingGroupForNumericSortingSymbol(group) + } + // Returns true if no numeric sorting symbol (hence no adjustment) or if correctly adjusted with regex - private adjustSortingGroupForNumericSortingSymbol = (group: CustomSortGroup) => { + private adjustSortingGroupForNumericSortingSymbol = (group: CustomSortGroup): boolean => { switch (group.type) { case CustomSortGroupType.ExactPrefix: - const numSymbolInPrefix = convertPlainStringWithNumericSortingSymbolToRegex(group.exactPrefix, RegexpUsedAs.Prefix) - if (numSymbolInPrefix) { - if (checkAdjacency(numSymbolInPrefix).noSuffix) { + const regexInPrefix = convertPlainStringToLeftRegex(group.exactPrefix!) + if (regexInPrefix) { + if (regexInPrefix.containsAdvancedRegex && checkAdjacency(regexInPrefix).noSuffix) { this.problem(ProblemCode.NumericalSymbolAdjacentToWildcard, ADJACENCY_ERROR) return false; } delete group.exactPrefix - group.regexSpec = numSymbolInPrefix.regexpSpec + group.regexPrefix = regexInPrefix.regexpSpec } break; case CustomSortGroupType.ExactSuffix: - const numSymbolInSuffix = convertPlainStringWithNumericSortingSymbolToRegex(group.exactSuffix, RegexpUsedAs.Suffix) - if (numSymbolInSuffix) { - if (checkAdjacency(numSymbolInSuffix).noPrefix) { + const regexInSuffix = convertPlainStringToRightRegex(group.exactSuffix!) + if (regexInSuffix) { + if (regexInSuffix.containsAdvancedRegex && checkAdjacency(regexInSuffix).noPrefix) { this.problem(ProblemCode.NumericalSymbolAdjacentToWildcard, ADJACENCY_ERROR) return false; } delete group.exactSuffix - group.regexSpec = numSymbolInSuffix.regexpSpec + group.regexSuffix = regexInSuffix.regexpSpec } break; case CustomSortGroupType.ExactHeadAndTail: - const numSymbolInHead = convertPlainStringWithNumericSortingSymbolToRegex(group.exactPrefix, RegexpUsedAs.Prefix) - if (numSymbolInHead) { - if (checkAdjacency(numSymbolInHead).noSuffix) { + const regexInHead = convertPlainStringToLeftRegex(group.exactPrefix!) + if (regexInHead) { + if (regexInHead.containsAdvancedRegex && checkAdjacency(regexInHead).noSuffix) { this.problem(ProblemCode.NumericalSymbolAdjacentToWildcard, ADJACENCY_ERROR) return false; } delete group.exactPrefix - group.regexSpec = numSymbolInHead.regexpSpec - } else { - const numSymbolInTail = convertPlainStringWithNumericSortingSymbolToRegex(group.exactSuffix, RegexpUsedAs.Suffix) - if (numSymbolInTail) { - if (checkAdjacency(numSymbolInTail).noPrefix) { - this.problem(ProblemCode.NumericalSymbolAdjacentToWildcard, ADJACENCY_ERROR) - return false; - } - delete group.exactSuffix - group.regexSpec = numSymbolInTail.regexpSpec + group.regexPrefix = regexInHead.regexpSpec + } + const regexInTail = convertPlainStringToRightRegex(group.exactSuffix!) + if (regexInTail) { + if (regexInTail.containsAdvancedRegex && checkAdjacency(regexInTail).noPrefix) { + this.problem(ProblemCode.NumericalSymbolAdjacentToWildcard, ADJACENCY_ERROR) + return false; } + delete group.exactSuffix + group.regexSuffix = regexInTail.regexpSpec } break; case CustomSortGroupType.ExactName: - const numSymbolInExactMatch = convertPlainStringWithNumericSortingSymbolToRegex(group.exactText, RegexpUsedAs.FullMatch) - if (numSymbolInExactMatch) { + const regexInExactMatch = convertPlainStringToFullMatchRegex(group.exactText!) + if (regexInExactMatch) { delete group.exactText - group.regexSpec = numSymbolInExactMatch.regexpSpec + group.regexPrefix = regexInExactMatch.regexpSpec } break; }