#178 - week-number based date extraction patterns for titles

- more unit tests
#191 - added two obvious date formats yyyy-mm-dd and yyyy-dd-mm
This commit is contained in:
SebastianMC 2025-01-14 17:37:02 +01:00
parent 975f6ee26e
commit b142d1951e
4 changed files with 132 additions and 13 deletions

View File

@ -10,11 +10,15 @@ export const NumberRegexStr: string = ' *(\\d+)'; // Plain number
export const CompoundNumberDotRegexStr: string = ' *(\\d+(?:\\.\\d+)*)'; // Compound number with dot as separator
export const CompoundNumberDashRegexStr: string = ' *(\\d+(?:-\\d+)*)'; // Compound number with dash as separator
export const Date_yyyy_mm_dd_RegexStr: string = ' *(\\d{4}-\\d{2}-\\d{2})'
export const Date_yyyy_dd_mm_RegexStr: string = Date_yyyy_mm_dd_RegexStr
export const Date_dd_Mmm_yyyy_RegexStr: string = ' *([0-3]*[0-9]-(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)-\\d{4})'; // Date like 01-Jan-2020
export const Date_Mmm_dd_yyyy_RegexStr: string = ' *((?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)-[0-3]*[0-9]-\\d{4})'; // Date like Jan-01-2020
export const Date_yyyy_Www_mm_dd_RegexStr: string = ' *(\\d{4}-W\\d{1,2} \\(\\d{2}-\\d{2}\\))'
export const Date_yyyy_Www_RegexStr: string = ' *(\\d{4}-W\\d{1,2})'
export const Date_yyyy_WwwISO_RegexStr: string = ' *(\\d{4}-W\\d{1,2})'
export const Date_yyyy_Www_RegexStr: string = Date_yyyy_WwwISO_RegexStr
export const DOT_SEPARATOR = '.'
export const DASH_SEPARATOR = '-'
@ -128,6 +132,8 @@ export function getNormalizedDate_NormalizerFn_for(separator: string, dayIdx: nu
}
}
export const getNormalizedDate_yyyy_mm_dd_NormalizerFn = getNormalizedDate_NormalizerFn_for('-', 2, 1, 0)
export const getNormalizedDate_yyyy_dd_mm_NormalizerFn = getNormalizedDate_NormalizerFn_for('-', 1, 2, 0)
export const getNormalizedDate_dd_Mmm_yyyy_NormalizerFn = getNormalizedDate_NormalizerFn_for('-', 0, 1, 2, MONTHS)
export const getNormalizedDate_Mmm_dd_yyyy_NormalizerFn = getNormalizedDate_NormalizerFn_for('-', 1, 0, 2, MONTHS)

View File

@ -24,6 +24,8 @@ import {
DOT_SEPARATOR,
getNormalizedDate_dd_Mmm_yyyy_NormalizerFn,
getNormalizedDate_Mmm_dd_yyyy_NormalizerFn,
getNormalizedDate_yyyy_mm_dd_NormalizerFn,
getNormalizedDate_yyyy_dd_mm_NormalizerFn,
getNormalizedDate_yyyy_Www_mm_dd_NormalizerFn,
getNormalizedDate_yyyy_WwwISO_NormalizerFn,
getNormalizedDate_yyyy_Www_NormalizerFn,
@ -32,7 +34,7 @@ import {
NumberRegexStr,
RomanNumberRegexStr,
WordInAnyLanguageRegexStr,
WordInASCIIRegexStr
WordInASCIIRegexStr, Date_yyyy_WwwISO_RegexStr, Date_yyyy_mm_dd_RegexStr, Date_yyyy_dd_mm_RegexStr
} from "./matchers";
import {
FolderWildcardMatching,
@ -357,6 +359,8 @@ const InlineRegexSymbol_Digit1: string = '\\d'
const InlineRegexSymbol_Digit2: string = '\\[0-9]'
const InlineRegexSymbol_0_to_3: string = '\\[0-3]'
const Date_yyyy_mm_dd_RegexSymbol: string = '\\[yyyy-mm-dd]'
const Date_yyyy_dd_mm_RegexSymbol: string = '\\[yyyy-dd-mm]'
const Date_dd_Mmm_yyyy_RegexSymbol: string = '\\[dd-Mmm-yyyy]'
const Date_Mmm_dd_yyyy_RegexSymbol: string = '\\[Mmm-dd-yyyy]'
const Date_yyyy_Www_mm_dd_RegexSymbol: string = '\\[yyyy-Www (mm-dd)]'
@ -381,6 +385,8 @@ const sortingSymbolsArr: Array<string> = [
escapeRegexUnsafeCharacters(CompoundRomanNumberDashRegexSymbol),
escapeRegexUnsafeCharacters(WordInASCIIRegexSymbol),
escapeRegexUnsafeCharacters(WordInAnyLanguageRegexSymbol),
escapeRegexUnsafeCharacters(Date_yyyy_mm_dd_RegexSymbol),
escapeRegexUnsafeCharacters(Date_yyyy_dd_mm_RegexSymbol),
escapeRegexUnsafeCharacters(Date_dd_Mmm_yyyy_RegexSymbol),
escapeRegexUnsafeCharacters(Date_Mmm_dd_yyyy_RegexSymbol),
escapeRegexUnsafeCharacters(Date_yyyy_Www_mm_dd_RegexSymbol),
@ -453,6 +459,8 @@ export const CompoundDashRomanNumberNormalizerFn: NormalizerFn = (s: string) =>
export const NumberNormalizerFn: NormalizerFn = (s: string) => getNormalizedNumber(s)
export const CompoundDotNumberNormalizerFn: NormalizerFn = (s: string) => getNormalizedNumber(s, DOT_SEPARATOR)
export const CompoundDashNumberNormalizerFn: NormalizerFn = (s: string) => getNormalizedNumber(s, DASH_SEPARATOR)
export const Date_yyyy_mm_dd_NormalizerFn: NormalizerFn = (s: string) => getNormalizedDate_yyyy_mm_dd_NormalizerFn(s)
export const Date_yyyy_dd_mm_NormalizerFn: NormalizerFn = (s: string) => getNormalizedDate_yyyy_dd_mm_NormalizerFn(s)
export const Date_dd_Mmm_yyyy_NormalizerFn: NormalizerFn = (s: string) => getNormalizedDate_dd_Mmm_yyyy_NormalizerFn(s)
export const Date_Mmm_dd_yyyy_NormalizerFn: NormalizerFn = (s: string) => getNormalizedDate_Mmm_dd_yyyy_NormalizerFn(s)
export const Date_yyyy_Www_mm_dd_NormalizerFn: NormalizerFn = (s: string) => getNormalizedDate_yyyy_Www_mm_dd_NormalizerFn(s)
@ -469,6 +477,8 @@ export enum AdvancedRegexType {
CompoundDashRomanNumber,
WordInASCII,
WordInAnyLanguage,
Date_yyyy_mm_dd,
Date_yyyy_dd_mm,
Date_dd_Mmm_yyyy,
Date_Mmm_dd_yyyy,
Date_yyyy_Www_mm_dd_yyyy,
@ -518,6 +528,16 @@ const sortingSymbolToRegexpStr: { [key: string]: RegExpSpecStr } = {
advancedRegexType: AdvancedRegexType.WordInAnyLanguage,
unicodeRegex: true
},
[Date_yyyy_mm_dd_RegexSymbol]: { // Intentionally retain character case
regexpStr: Date_yyyy_mm_dd_RegexStr,
normalizerFn: Date_yyyy_mm_dd_NormalizerFn,
advancedRegexType: AdvancedRegexType.Date_yyyy_mm_dd
},
[Date_yyyy_dd_mm_RegexSymbol]: { // Intentionally retain character case
regexpStr: Date_yyyy_dd_mm_RegexStr,
normalizerFn: Date_yyyy_dd_mm_NormalizerFn,
advancedRegexType: AdvancedRegexType.Date_yyyy_dd_mm
},
[Date_dd_Mmm_yyyy_RegexSymbol]: { // Intentionally retain character case
regexpStr: Date_dd_Mmm_yyyy_RegexStr,
normalizerFn: Date_dd_Mmm_yyyy_NormalizerFn,
@ -534,7 +554,7 @@ const sortingSymbolToRegexpStr: { [key: string]: RegExpSpecStr } = {
advancedRegexType: AdvancedRegexType.Date_yyyy_Www_mm_dd_yyyy
},
[Date_yyyy_WwwISO_RegexSymbol]: { // Intentionally retain character case
regexpStr: Date_yyyy_Www_RegexStr,
regexpStr: Date_yyyy_WwwISO_RegexStr,
normalizerFn: Date_yyyy_WwwISO_NormalizerFn,
advancedRegexType: AdvancedRegexType.Date_yyyy_WwwISO
},

View File

@ -7,7 +7,11 @@ import {
DEFAULT_FOLDER_CTIME,
determineFolderDatesIfNeeded,
determineSortingGroup,
FolderItemForSorting, OS_alphabetical, OS_byCreatedTime, ProcessingContext, sortFolderItems
FolderItemForSorting,
OS_alphabetical,
OS_byCreatedTime,
ProcessingContext,
sortFolderItems
} from "../../custom-sort/custom-sort";
import {
CustomSortGroupType,
@ -29,11 +33,12 @@ import {
} from "../../custom-sort/sorting-spec-processor";
describe('sortFolderItems', () => {
it('should correctly handle Mmm-dd-yyyy pattern in file names', () => {
it('should correctly handle Mmm-dd-yyyy pattern in file and folder names', () => {
// given
const processor: SortingSpecProcessor = new SortingSpecProcessor()
const sortSpecTxt =
` ... \\[Mmm-dd-yyyy]
`
... \\[Mmm-dd-yyyy]
> a-z
`
const PARENT_PATH = 'parent/folder/path'
@ -60,11 +65,12 @@ describe('sortFolderItems', () => {
'AAA Jan-01-2012'
])
})
it('should correctly handle yyyy-Www (mm-dd) pattern in file names', () => {
it('should correctly handle yyyy-Www (mm-dd) pattern in file and folder names', () => {
// given
const processor: SortingSpecProcessor = new SortingSpecProcessor()
const sortSpecTxt =
` ... \\[yyyy-Www (mm-dd)]
`
... \\[yyyy-Www (mm-dd)]
< a-z
------
`
@ -94,11 +100,12 @@ describe('sortFolderItems', () => {
"------.md"
])
})
it('should correctly handle yyyy-WwwISO pattern in file names', () => {
it('should correctly handle yyyy-WwwISO pattern in file and folder names', () => {
// given
const processor: SortingSpecProcessor = new SortingSpecProcessor()
const sortSpecTxt =
` /+ ... \\[yyyy-Www (mm-dd)]
`
/+ ... \\[yyyy-Www (mm-dd)]
/+ ... \\[yyyy-WwwISO]
< a-z
`
@ -132,13 +139,15 @@ describe('sortFolderItems', () => {
"------.md"
])
})
it('should correctly handle yyyy-Www pattern in file names', () => {
it('should correctly handle yyyy-Www pattern in file and folder names', () => {
// given
const processor: SortingSpecProcessor = new SortingSpecProcessor()
const sortSpecTxt =
` /+ ... \\[yyyy-Www (mm-dd)]
`
/+ ... \\[yyyy-Www (mm-dd)]
/+ ... \\[yyyy-Www]
> a-z
... \\-d+
`
const PARENT_PATH = 'parent/folder/path'
const sortSpecsCollection = processor.parseSortSpecFromText(
@ -170,6 +179,63 @@ describe('sortFolderItems', () => {
"------.md"
])
})
it('should correctly mix for sorting different date formats in file and folder names', () => {
// given
const processor: SortingSpecProcessor = new SortingSpecProcessor()
const sortSpecTxt =
`
/+ ... \\[yyyy-Www (mm-dd)]
/+ ... \\[yyyy-Www]
/+ ... mm-dd \\[yyyy-mm-dd]
/+ ... dd-mm \\[yyyy-dd-mm]
/+ ... \\[yyyy-mm-dd]
/+ ... \\[Mmm-dd-yyyy]
/+ \\[dd-Mmm-yyyy] ...
> a-z
`
const PARENT_PATH = 'parent/folder/path'
const sortSpecsCollection = processor.parseSortSpecFromText(
sortSpecTxt.split('\n'),
PARENT_PATH,
'file name with the sorting, irrelevant here'
)
const folder: TFolder = mockTFolderWithDateWeekNamedChildrenForISOvsUSweekNumberingTest(PARENT_PATH)
folder.children.push(...[
mockTFile('File 2021-12-14', 'md'),
mockTFile('File mm-dd 2020-12-30', 'md'), // mm-dd
mockTFile('File dd-mm 2020-31-12', 'md'), // dd-mm
mockTFile('File Mar-08-2021', 'md'),
mockTFile('18-Dec-2021 file', 'md'),
])
const sortSpec: CustomSortSpec = sortSpecsCollection?.sortSpecByPath![PARENT_PATH]!
const ctx: ProcessingContext = {}
// when
const result: Array<TAbstractFile> = sortFolderItems(folder, folder.children, sortSpec, ctx, OS_alphabetical)
// then
// U.S. standard of weeks numbering
const orderedNames = result.map(f => f.name)
expect(orderedNames).toEqual([
'FFF1 ISO:2022-01-03 US:2021-12-27 2021-W53.md',
'FFF2 ISO:2021-12-27 US:2021-12-20 2021-W52.md',
"18-Dec-2021 file.md",
'C 2021-W51 (12-17).md',
"File 2021-12-14.md",
'D ISO:2021-12-20 US:2021-12-13 2021-W51.md',
"File Mar-08-2021.md",
'A 2021-W10 (03-05).md',
'B ISO:2021-03-08 US:2021-03-01 2021-W10',
'E 2021-W1 (01-01)',
"File dd-mm 2020-31-12.md",
"File mm-dd 2020-12-30.md",
'F ISO:2021-01-04 US:2020-12-28 2021-W1',
"------.md"
])
})
})

View File

@ -10,7 +10,12 @@ import {
CompoundRomanNumberDotRegexStr,
CompoundRomanNumberDashRegexStr,
WordInASCIIRegexStr,
WordInAnyLanguageRegexStr, getNormalizedDate_dd_Mmm_yyyy_NormalizerFn
WordInAnyLanguageRegexStr,
getNormalizedDate_dd_Mmm_yyyy_NormalizerFn,
getNormalizedDate_yyyy_Www_NormalizerFn,
getNormalizedDate_yyyy_Www_mm_dd_NormalizerFn,
getNormalizedDate_yyyy_dd_mm_NormalizerFn,
getNormalizedDate_yyyy_mm_dd_NormalizerFn
} from "../../custom-sort/matchers";
describe('Plain numbers regexp', () => {
@ -431,3 +436,25 @@ describe('getNormalizedDate_dd_Mmm_yyyy_NormalizerFn', () => {
expect(getNormalizedDate_dd_Mmm_yyyy_NormalizerFn(s)).toBe(out)
})
})
describe('getNormalizedDate_yyyy_dd_mm_NormalizerFn', () => {
const params = [
['2012-13-01', '2012-01-13//', '2012-13-01//'],
['0001-03-02', '0001-02-03//', '0001-03-02//'],
['7777-09-1234', '7777-1234-09//', '7777-09-1234//'],
];
it.each(params)('>%s< should become %s', (s: string, outForDDMM: string, outForMMDD: string) => {
expect(getNormalizedDate_yyyy_dd_mm_NormalizerFn(s)).toBe(outForDDMM)
expect(getNormalizedDate_yyyy_mm_dd_NormalizerFn(s)).toBe(outForMMDD)
})
})
describe('getNormalizedDate_yyyy_Www_mm_dd_NormalizerFn', () => {
const params = [
['2012-W0 (01-13)', '2012-01-13//'],
['0002-W12 (02-03)', '0002-02-03//'],
];
it.each(params)('>%s< should become %s', (s: string, out: string) => {
expect(getNormalizedDate_yyyy_Www_mm_dd_NormalizerFn(s)).toBe(out)
})
})