#178 - week-number based date extraction patterns for titles

- more unit tests
#191 - added two obvious date formats yyyy-mm-dd and yyyy-dd-mm
This commit is contained in:
SebastianMC 2025-01-14 17:37:02 +01:00
parent 975f6ee26e
commit b142d1951e
4 changed files with 132 additions and 13 deletions

View File

@ -10,11 +10,15 @@ export const NumberRegexStr: string = ' *(\\d+)'; // Plain number
export const CompoundNumberDotRegexStr: string = ' *(\\d+(?:\\.\\d+)*)'; // Compound number with dot as separator export const CompoundNumberDotRegexStr: string = ' *(\\d+(?:\\.\\d+)*)'; // Compound number with dot as separator
export const CompoundNumberDashRegexStr: string = ' *(\\d+(?:-\\d+)*)'; // Compound number with dash as separator export const CompoundNumberDashRegexStr: string = ' *(\\d+(?:-\\d+)*)'; // Compound number with dash as separator
export const Date_yyyy_mm_dd_RegexStr: string = ' *(\\d{4}-\\d{2}-\\d{2})'
export const Date_yyyy_dd_mm_RegexStr: string = Date_yyyy_mm_dd_RegexStr
export const Date_dd_Mmm_yyyy_RegexStr: string = ' *([0-3]*[0-9]-(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)-\\d{4})'; // Date like 01-Jan-2020 export const Date_dd_Mmm_yyyy_RegexStr: string = ' *([0-3]*[0-9]-(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)-\\d{4})'; // Date like 01-Jan-2020
export const Date_Mmm_dd_yyyy_RegexStr: string = ' *((?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)-[0-3]*[0-9]-\\d{4})'; // Date like Jan-01-2020 export const Date_Mmm_dd_yyyy_RegexStr: string = ' *((?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)-[0-3]*[0-9]-\\d{4})'; // Date like Jan-01-2020
export const Date_yyyy_Www_mm_dd_RegexStr: string = ' *(\\d{4}-W\\d{1,2} \\(\\d{2}-\\d{2}\\))' export const Date_yyyy_Www_mm_dd_RegexStr: string = ' *(\\d{4}-W\\d{1,2} \\(\\d{2}-\\d{2}\\))'
export const Date_yyyy_Www_RegexStr: string = ' *(\\d{4}-W\\d{1,2})' export const Date_yyyy_WwwISO_RegexStr: string = ' *(\\d{4}-W\\d{1,2})'
export const Date_yyyy_Www_RegexStr: string = Date_yyyy_WwwISO_RegexStr
export const DOT_SEPARATOR = '.' export const DOT_SEPARATOR = '.'
export const DASH_SEPARATOR = '-' export const DASH_SEPARATOR = '-'
@ -128,6 +132,8 @@ export function getNormalizedDate_NormalizerFn_for(separator: string, dayIdx: nu
} }
} }
export const getNormalizedDate_yyyy_mm_dd_NormalizerFn = getNormalizedDate_NormalizerFn_for('-', 2, 1, 0)
export const getNormalizedDate_yyyy_dd_mm_NormalizerFn = getNormalizedDate_NormalizerFn_for('-', 1, 2, 0)
export const getNormalizedDate_dd_Mmm_yyyy_NormalizerFn = getNormalizedDate_NormalizerFn_for('-', 0, 1, 2, MONTHS) export const getNormalizedDate_dd_Mmm_yyyy_NormalizerFn = getNormalizedDate_NormalizerFn_for('-', 0, 1, 2, MONTHS)
export const getNormalizedDate_Mmm_dd_yyyy_NormalizerFn = getNormalizedDate_NormalizerFn_for('-', 1, 0, 2, MONTHS) export const getNormalizedDate_Mmm_dd_yyyy_NormalizerFn = getNormalizedDate_NormalizerFn_for('-', 1, 0, 2, MONTHS)

View File

@ -24,6 +24,8 @@ import {
DOT_SEPARATOR, DOT_SEPARATOR,
getNormalizedDate_dd_Mmm_yyyy_NormalizerFn, getNormalizedDate_dd_Mmm_yyyy_NormalizerFn,
getNormalizedDate_Mmm_dd_yyyy_NormalizerFn, getNormalizedDate_Mmm_dd_yyyy_NormalizerFn,
getNormalizedDate_yyyy_mm_dd_NormalizerFn,
getNormalizedDate_yyyy_dd_mm_NormalizerFn,
getNormalizedDate_yyyy_Www_mm_dd_NormalizerFn, getNormalizedDate_yyyy_Www_mm_dd_NormalizerFn,
getNormalizedDate_yyyy_WwwISO_NormalizerFn, getNormalizedDate_yyyy_WwwISO_NormalizerFn,
getNormalizedDate_yyyy_Www_NormalizerFn, getNormalizedDate_yyyy_Www_NormalizerFn,
@ -32,7 +34,7 @@ import {
NumberRegexStr, NumberRegexStr,
RomanNumberRegexStr, RomanNumberRegexStr,
WordInAnyLanguageRegexStr, WordInAnyLanguageRegexStr,
WordInASCIIRegexStr WordInASCIIRegexStr, Date_yyyy_WwwISO_RegexStr, Date_yyyy_mm_dd_RegexStr, Date_yyyy_dd_mm_RegexStr
} from "./matchers"; } from "./matchers";
import { import {
FolderWildcardMatching, FolderWildcardMatching,
@ -357,6 +359,8 @@ const InlineRegexSymbol_Digit1: string = '\\d'
const InlineRegexSymbol_Digit2: string = '\\[0-9]' const InlineRegexSymbol_Digit2: string = '\\[0-9]'
const InlineRegexSymbol_0_to_3: string = '\\[0-3]' const InlineRegexSymbol_0_to_3: string = '\\[0-3]'
const Date_yyyy_mm_dd_RegexSymbol: string = '\\[yyyy-mm-dd]'
const Date_yyyy_dd_mm_RegexSymbol: string = '\\[yyyy-dd-mm]'
const Date_dd_Mmm_yyyy_RegexSymbol: string = '\\[dd-Mmm-yyyy]' const Date_dd_Mmm_yyyy_RegexSymbol: string = '\\[dd-Mmm-yyyy]'
const Date_Mmm_dd_yyyy_RegexSymbol: string = '\\[Mmm-dd-yyyy]' const Date_Mmm_dd_yyyy_RegexSymbol: string = '\\[Mmm-dd-yyyy]'
const Date_yyyy_Www_mm_dd_RegexSymbol: string = '\\[yyyy-Www (mm-dd)]' const Date_yyyy_Www_mm_dd_RegexSymbol: string = '\\[yyyy-Www (mm-dd)]'
@ -381,6 +385,8 @@ const sortingSymbolsArr: Array<string> = [
escapeRegexUnsafeCharacters(CompoundRomanNumberDashRegexSymbol), escapeRegexUnsafeCharacters(CompoundRomanNumberDashRegexSymbol),
escapeRegexUnsafeCharacters(WordInASCIIRegexSymbol), escapeRegexUnsafeCharacters(WordInASCIIRegexSymbol),
escapeRegexUnsafeCharacters(WordInAnyLanguageRegexSymbol), escapeRegexUnsafeCharacters(WordInAnyLanguageRegexSymbol),
escapeRegexUnsafeCharacters(Date_yyyy_mm_dd_RegexSymbol),
escapeRegexUnsafeCharacters(Date_yyyy_dd_mm_RegexSymbol),
escapeRegexUnsafeCharacters(Date_dd_Mmm_yyyy_RegexSymbol), escapeRegexUnsafeCharacters(Date_dd_Mmm_yyyy_RegexSymbol),
escapeRegexUnsafeCharacters(Date_Mmm_dd_yyyy_RegexSymbol), escapeRegexUnsafeCharacters(Date_Mmm_dd_yyyy_RegexSymbol),
escapeRegexUnsafeCharacters(Date_yyyy_Www_mm_dd_RegexSymbol), escapeRegexUnsafeCharacters(Date_yyyy_Www_mm_dd_RegexSymbol),
@ -453,6 +459,8 @@ export const CompoundDashRomanNumberNormalizerFn: NormalizerFn = (s: string) =>
export const NumberNormalizerFn: NormalizerFn = (s: string) => getNormalizedNumber(s) export const NumberNormalizerFn: NormalizerFn = (s: string) => getNormalizedNumber(s)
export const CompoundDotNumberNormalizerFn: NormalizerFn = (s: string) => getNormalizedNumber(s, DOT_SEPARATOR) export const CompoundDotNumberNormalizerFn: NormalizerFn = (s: string) => getNormalizedNumber(s, DOT_SEPARATOR)
export const CompoundDashNumberNormalizerFn: NormalizerFn = (s: string) => getNormalizedNumber(s, DASH_SEPARATOR) export const CompoundDashNumberNormalizerFn: NormalizerFn = (s: string) => getNormalizedNumber(s, DASH_SEPARATOR)
export const Date_yyyy_mm_dd_NormalizerFn: NormalizerFn = (s: string) => getNormalizedDate_yyyy_mm_dd_NormalizerFn(s)
export const Date_yyyy_dd_mm_NormalizerFn: NormalizerFn = (s: string) => getNormalizedDate_yyyy_dd_mm_NormalizerFn(s)
export const Date_dd_Mmm_yyyy_NormalizerFn: NormalizerFn = (s: string) => getNormalizedDate_dd_Mmm_yyyy_NormalizerFn(s) export const Date_dd_Mmm_yyyy_NormalizerFn: NormalizerFn = (s: string) => getNormalizedDate_dd_Mmm_yyyy_NormalizerFn(s)
export const Date_Mmm_dd_yyyy_NormalizerFn: NormalizerFn = (s: string) => getNormalizedDate_Mmm_dd_yyyy_NormalizerFn(s) export const Date_Mmm_dd_yyyy_NormalizerFn: NormalizerFn = (s: string) => getNormalizedDate_Mmm_dd_yyyy_NormalizerFn(s)
export const Date_yyyy_Www_mm_dd_NormalizerFn: NormalizerFn = (s: string) => getNormalizedDate_yyyy_Www_mm_dd_NormalizerFn(s) export const Date_yyyy_Www_mm_dd_NormalizerFn: NormalizerFn = (s: string) => getNormalizedDate_yyyy_Www_mm_dd_NormalizerFn(s)
@ -469,6 +477,8 @@ export enum AdvancedRegexType {
CompoundDashRomanNumber, CompoundDashRomanNumber,
WordInASCII, WordInASCII,
WordInAnyLanguage, WordInAnyLanguage,
Date_yyyy_mm_dd,
Date_yyyy_dd_mm,
Date_dd_Mmm_yyyy, Date_dd_Mmm_yyyy,
Date_Mmm_dd_yyyy, Date_Mmm_dd_yyyy,
Date_yyyy_Www_mm_dd_yyyy, Date_yyyy_Www_mm_dd_yyyy,
@ -518,6 +528,16 @@ const sortingSymbolToRegexpStr: { [key: string]: RegExpSpecStr } = {
advancedRegexType: AdvancedRegexType.WordInAnyLanguage, advancedRegexType: AdvancedRegexType.WordInAnyLanguage,
unicodeRegex: true unicodeRegex: true
}, },
[Date_yyyy_mm_dd_RegexSymbol]: { // Intentionally retain character case
regexpStr: Date_yyyy_mm_dd_RegexStr,
normalizerFn: Date_yyyy_mm_dd_NormalizerFn,
advancedRegexType: AdvancedRegexType.Date_yyyy_mm_dd
},
[Date_yyyy_dd_mm_RegexSymbol]: { // Intentionally retain character case
regexpStr: Date_yyyy_dd_mm_RegexStr,
normalizerFn: Date_yyyy_dd_mm_NormalizerFn,
advancedRegexType: AdvancedRegexType.Date_yyyy_dd_mm
},
[Date_dd_Mmm_yyyy_RegexSymbol]: { // Intentionally retain character case [Date_dd_Mmm_yyyy_RegexSymbol]: { // Intentionally retain character case
regexpStr: Date_dd_Mmm_yyyy_RegexStr, regexpStr: Date_dd_Mmm_yyyy_RegexStr,
normalizerFn: Date_dd_Mmm_yyyy_NormalizerFn, normalizerFn: Date_dd_Mmm_yyyy_NormalizerFn,
@ -534,7 +554,7 @@ const sortingSymbolToRegexpStr: { [key: string]: RegExpSpecStr } = {
advancedRegexType: AdvancedRegexType.Date_yyyy_Www_mm_dd_yyyy advancedRegexType: AdvancedRegexType.Date_yyyy_Www_mm_dd_yyyy
}, },
[Date_yyyy_WwwISO_RegexSymbol]: { // Intentionally retain character case [Date_yyyy_WwwISO_RegexSymbol]: { // Intentionally retain character case
regexpStr: Date_yyyy_Www_RegexStr, regexpStr: Date_yyyy_WwwISO_RegexStr,
normalizerFn: Date_yyyy_WwwISO_NormalizerFn, normalizerFn: Date_yyyy_WwwISO_NormalizerFn,
advancedRegexType: AdvancedRegexType.Date_yyyy_WwwISO advancedRegexType: AdvancedRegexType.Date_yyyy_WwwISO
}, },

View File

@ -7,7 +7,11 @@ import {
DEFAULT_FOLDER_CTIME, DEFAULT_FOLDER_CTIME,
determineFolderDatesIfNeeded, determineFolderDatesIfNeeded,
determineSortingGroup, determineSortingGroup,
FolderItemForSorting, OS_alphabetical, OS_byCreatedTime, ProcessingContext, sortFolderItems FolderItemForSorting,
OS_alphabetical,
OS_byCreatedTime,
ProcessingContext,
sortFolderItems
} from "../../custom-sort/custom-sort"; } from "../../custom-sort/custom-sort";
import { import {
CustomSortGroupType, CustomSortGroupType,
@ -29,11 +33,12 @@ import {
} from "../../custom-sort/sorting-spec-processor"; } from "../../custom-sort/sorting-spec-processor";
describe('sortFolderItems', () => { describe('sortFolderItems', () => {
it('should correctly handle Mmm-dd-yyyy pattern in file names', () => { it('should correctly handle Mmm-dd-yyyy pattern in file and folder names', () => {
// given // given
const processor: SortingSpecProcessor = new SortingSpecProcessor() const processor: SortingSpecProcessor = new SortingSpecProcessor()
const sortSpecTxt = const sortSpecTxt =
` ... \\[Mmm-dd-yyyy] `
... \\[Mmm-dd-yyyy]
> a-z > a-z
` `
const PARENT_PATH = 'parent/folder/path' const PARENT_PATH = 'parent/folder/path'
@ -60,11 +65,12 @@ describe('sortFolderItems', () => {
'AAA Jan-01-2012' 'AAA Jan-01-2012'
]) ])
}) })
it('should correctly handle yyyy-Www (mm-dd) pattern in file names', () => { it('should correctly handle yyyy-Www (mm-dd) pattern in file and folder names', () => {
// given // given
const processor: SortingSpecProcessor = new SortingSpecProcessor() const processor: SortingSpecProcessor = new SortingSpecProcessor()
const sortSpecTxt = const sortSpecTxt =
` ... \\[yyyy-Www (mm-dd)] `
... \\[yyyy-Www (mm-dd)]
< a-z < a-z
------ ------
` `
@ -94,11 +100,12 @@ describe('sortFolderItems', () => {
"------.md" "------.md"
]) ])
}) })
it('should correctly handle yyyy-WwwISO pattern in file names', () => { it('should correctly handle yyyy-WwwISO pattern in file and folder names', () => {
// given // given
const processor: SortingSpecProcessor = new SortingSpecProcessor() const processor: SortingSpecProcessor = new SortingSpecProcessor()
const sortSpecTxt = const sortSpecTxt =
` /+ ... \\[yyyy-Www (mm-dd)] `
/+ ... \\[yyyy-Www (mm-dd)]
/+ ... \\[yyyy-WwwISO] /+ ... \\[yyyy-WwwISO]
< a-z < a-z
` `
@ -132,13 +139,15 @@ describe('sortFolderItems', () => {
"------.md" "------.md"
]) ])
}) })
it('should correctly handle yyyy-Www pattern in file names', () => { it('should correctly handle yyyy-Www pattern in file and folder names', () => {
// given // given
const processor: SortingSpecProcessor = new SortingSpecProcessor() const processor: SortingSpecProcessor = new SortingSpecProcessor()
const sortSpecTxt = const sortSpecTxt =
` /+ ... \\[yyyy-Www (mm-dd)] `
/+ ... \\[yyyy-Www (mm-dd)]
/+ ... \\[yyyy-Www] /+ ... \\[yyyy-Www]
> a-z > a-z
... \\-d+
` `
const PARENT_PATH = 'parent/folder/path' const PARENT_PATH = 'parent/folder/path'
const sortSpecsCollection = processor.parseSortSpecFromText( const sortSpecsCollection = processor.parseSortSpecFromText(
@ -170,6 +179,63 @@ describe('sortFolderItems', () => {
"------.md" "------.md"
]) ])
}) })
it('should correctly mix for sorting different date formats in file and folder names', () => {
// given
const processor: SortingSpecProcessor = new SortingSpecProcessor()
const sortSpecTxt =
`
/+ ... \\[yyyy-Www (mm-dd)]
/+ ... \\[yyyy-Www]
/+ ... mm-dd \\[yyyy-mm-dd]
/+ ... dd-mm \\[yyyy-dd-mm]
/+ ... \\[yyyy-mm-dd]
/+ ... \\[Mmm-dd-yyyy]
/+ \\[dd-Mmm-yyyy] ...
> a-z
`
const PARENT_PATH = 'parent/folder/path'
const sortSpecsCollection = processor.parseSortSpecFromText(
sortSpecTxt.split('\n'),
PARENT_PATH,
'file name with the sorting, irrelevant here'
)
const folder: TFolder = mockTFolderWithDateWeekNamedChildrenForISOvsUSweekNumberingTest(PARENT_PATH)
folder.children.push(...[
mockTFile('File 2021-12-14', 'md'),
mockTFile('File mm-dd 2020-12-30', 'md'), // mm-dd
mockTFile('File dd-mm 2020-31-12', 'md'), // dd-mm
mockTFile('File Mar-08-2021', 'md'),
mockTFile('18-Dec-2021 file', 'md'),
])
const sortSpec: CustomSortSpec = sortSpecsCollection?.sortSpecByPath![PARENT_PATH]!
const ctx: ProcessingContext = {}
// when
const result: Array<TAbstractFile> = sortFolderItems(folder, folder.children, sortSpec, ctx, OS_alphabetical)
// then
// U.S. standard of weeks numbering
const orderedNames = result.map(f => f.name)
expect(orderedNames).toEqual([
'FFF1 ISO:2022-01-03 US:2021-12-27 2021-W53.md',
'FFF2 ISO:2021-12-27 US:2021-12-20 2021-W52.md',
"18-Dec-2021 file.md",
'C 2021-W51 (12-17).md',
"File 2021-12-14.md",
'D ISO:2021-12-20 US:2021-12-13 2021-W51.md',
"File Mar-08-2021.md",
'A 2021-W10 (03-05).md',
'B ISO:2021-03-08 US:2021-03-01 2021-W10',
'E 2021-W1 (01-01)',
"File dd-mm 2020-31-12.md",
"File mm-dd 2020-12-30.md",
'F ISO:2021-01-04 US:2020-12-28 2021-W1',
"------.md"
])
})
}) })

View File

@ -10,7 +10,12 @@ import {
CompoundRomanNumberDotRegexStr, CompoundRomanNumberDotRegexStr,
CompoundRomanNumberDashRegexStr, CompoundRomanNumberDashRegexStr,
WordInASCIIRegexStr, WordInASCIIRegexStr,
WordInAnyLanguageRegexStr, getNormalizedDate_dd_Mmm_yyyy_NormalizerFn WordInAnyLanguageRegexStr,
getNormalizedDate_dd_Mmm_yyyy_NormalizerFn,
getNormalizedDate_yyyy_Www_NormalizerFn,
getNormalizedDate_yyyy_Www_mm_dd_NormalizerFn,
getNormalizedDate_yyyy_dd_mm_NormalizerFn,
getNormalizedDate_yyyy_mm_dd_NormalizerFn
} from "../../custom-sort/matchers"; } from "../../custom-sort/matchers";
describe('Plain numbers regexp', () => { describe('Plain numbers regexp', () => {
@ -431,3 +436,25 @@ describe('getNormalizedDate_dd_Mmm_yyyy_NormalizerFn', () => {
expect(getNormalizedDate_dd_Mmm_yyyy_NormalizerFn(s)).toBe(out) expect(getNormalizedDate_dd_Mmm_yyyy_NormalizerFn(s)).toBe(out)
}) })
}) })
describe('getNormalizedDate_yyyy_dd_mm_NormalizerFn', () => {
const params = [
['2012-13-01', '2012-01-13//', '2012-13-01//'],
['0001-03-02', '0001-02-03//', '0001-03-02//'],
['7777-09-1234', '7777-1234-09//', '7777-09-1234//'],
];
it.each(params)('>%s< should become %s', (s: string, outForDDMM: string, outForMMDD: string) => {
expect(getNormalizedDate_yyyy_dd_mm_NormalizerFn(s)).toBe(outForDDMM)
expect(getNormalizedDate_yyyy_mm_dd_NormalizerFn(s)).toBe(outForMMDD)
})
})
describe('getNormalizedDate_yyyy_Www_mm_dd_NormalizerFn', () => {
const params = [
['2012-W0 (01-13)', '2012-01-13//'],
['0002-W12 (02-03)', '0002-02-03//'],
];
it.each(params)('>%s< should become %s', (s: string, out: string) => {
expect(getNormalizedDate_yyyy_Www_mm_dd_NormalizerFn(s)).toBe(out)
})
})