diff --git a/src/custom-sort/matchers.ts b/src/custom-sort/matchers.ts index 0f0ae37..0163a6a 100644 --- a/src/custom-sort/matchers.ts +++ b/src/custom-sort/matchers.ts @@ -1,3 +1,5 @@ +import {toString} from "builtin-modules"; + export const RomanNumberRegexStr: string = ' *([MDCLXVI]+)'; // Roman number export const CompoundRomanNumberDotRegexStr: string = ' *([MDCLXVI]+(?:\\.[MDCLXVI]+)*)';// Compound Roman number with dot as separator export const CompoundRomanNumberDashRegexStr: string = ' *([MDCLXVI]+(?:-[MDCLXVI]+)*)'; // Compound Roman number with dash as separator @@ -6,6 +8,8 @@ export const NumberRegexStr: string = ' *(\\d+)'; // Plain number export const CompoundNumberDotRegexStr: string = ' *(\\d+(?:\\.\\d+)*)'; // Compound number with dot as separator export const CompoundNumberDashRegexStr: string = ' *(\\d+(?:-\\d+)*)'; // Compound number with dash as separator +export const Date_dd_Mmm_yyyy_RegexStr: string = ' *[0-3]*[0-9]-{Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec}-\\d{4}'; // Date like 01-Jan-2020 + export const DOT_SEPARATOR = '.' export const DASH_SEPARATOR = '-' @@ -23,12 +27,16 @@ export const WordInAnyLanguageRegexStr = '(\\p{Letter}+)' // remember about th export const WordInASCIIRegexStr = '([a-zA-Z]+)' -export function prependWithZeros(s: string, minLength: number) { - if (s.length < minLength) { - const delta: number = minLength - s.length; - return '000000000000000000000000000'.substring(0, delta) + s; +export function prependWithZeros(s: string|undefined, minLength: number): string { + if ('string' === typeof s) { + if (s.length < minLength) { + const delta: number = minLength - s.length; + return '000000000000000000000000000'.substring(0, delta) + s; + } else { + return s; + } } else { - return s; + return prependWithZeros((s ?? '').toString(), minLength) } } @@ -95,3 +103,18 @@ export function getNormalizedRomanNumber(s: string, separator?: string, places?: return `${prependWithZeros(romanToIntStr(s), places ?? DEFAULT_NORMALIZATION_PLACES)}//` } } + +const DAY_POSITIONS = '00'.length +const MONTH_POSITIONS = '00'.length +const YEAR_POSITIONS = '0000'.length + +const MONTHS = ['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec'] + +export function getNormalizedDate_dd_Mmm_yyyy_NormalizerFn(s: string): string | null { + // Assumption - the regex date matched against input s, no extensive defensive coding needed + const components = s.split('-') + const day = prependWithZeros(components[0], DAY_POSITIONS) + const month = prependWithZeros( `${1 + MONTHS.indexOf(components[1])}`, MONTH_POSITIONS) + const year = prependWithZeros(components[2], YEAR_POSITIONS) + return `${year}-${month}-${day}//` +} diff --git a/src/custom-sort/sorting-spec-processor.ts b/src/custom-sort/sorting-spec-processor.ts index ebf6d08..d7298a5 100644 --- a/src/custom-sort/sorting-spec-processor.ts +++ b/src/custom-sort/sorting-spec-processor.ts @@ -16,7 +16,9 @@ import { CompoundRomanNumberDashRegexStr, CompoundRomanNumberDotRegexStr, DASH_SEPARATOR, + Date_dd_Mmm_yyyy_RegexStr, DOT_SEPARATOR, + getNormalizedDate_dd_Mmm_yyyy_NormalizerFn, getNormalizedNumber, getNormalizedRomanNumber, NumberRegexStr, @@ -348,6 +350,8 @@ const InlineRegexSymbol_Digit1: string = '\\d' const InlineRegexSymbol_Digit2: string = '\\[0-9]' const InlineRegexSymbol_0_to_3: string = '\\[0-3]' +const Date_dd_Mmm_yyyy_RegexSymbol: string = '\\[dd-Mmm-yyyy]' + const InlineRegexSymbol_CapitalLetter: string = '\\C' const InlineRegexSymbol_LowercaseLetter: string = '\\l' @@ -365,7 +369,8 @@ const sortingSymbolsArr: Array = [ escapeRegexUnsafeCharacters(CompoundRomanNumberDotRegexSymbol), escapeRegexUnsafeCharacters(CompoundRomanNumberDashRegexSymbol), escapeRegexUnsafeCharacters(WordInASCIIRegexSymbol), - escapeRegexUnsafeCharacters(WordInAnyLanguageRegexSymbol) + escapeRegexUnsafeCharacters(WordInAnyLanguageRegexSymbol), + escapeRegexUnsafeCharacters(Date_dd_Mmm_yyyy_RegexSymbol) ] const sortingSymbolsRegex = new RegExp(sortingSymbolsArr.join('|'), 'gi') @@ -433,6 +438,7 @@ export const CompoundDashRomanNumberNormalizerFn: NormalizerFn = (s: string) => export const NumberNormalizerFn: NormalizerFn = (s: string) => getNormalizedNumber(s) export const CompoundDotNumberNormalizerFn: NormalizerFn = (s: string) => getNormalizedNumber(s, DOT_SEPARATOR) export const CompoundDashNumberNormalizerFn: NormalizerFn = (s: string) => getNormalizedNumber(s, DASH_SEPARATOR) +export const Date_dd_Mmm_yyyy_NormalizerFn: NormalizerFn = (s: string) => getNormalizedDate_dd_Mmm_yyyy_NormalizerFn(s) export enum AdvancedRegexType { None, // to allow if (advancedRegex) @@ -443,7 +449,8 @@ export enum AdvancedRegexType { CompoundDotRomanNumber, CompoundDashRomanNumber, WordInASCII, - WordInAnyLanguage + WordInAnyLanguage, + Date_dd_Mmm_yyyy } const sortingSymbolToRegexpStr: { [key: string]: RegExpSpecStr } = { @@ -487,6 +494,11 @@ const sortingSymbolToRegexpStr: { [key: string]: RegExpSpecStr } = { normalizerFn: IdentityNormalizerFn, advancedRegexType: AdvancedRegexType.WordInAnyLanguage, unicodeRegex: true + }, + [Date_dd_Mmm_yyyy_RegexSymbol]: { // Intentionally retain character case + regexpStr: Date_dd_Mmm_yyyy_RegexStr, + normalizerFn: Date_dd_Mmm_yyyy_NormalizerFn, + advancedRegexType: AdvancedRegexType.RomanNumber } } diff --git a/src/test/unit/matchers.spec.ts b/src/test/unit/matchers.spec.ts index 12f825f..7515e37 100644 --- a/src/test/unit/matchers.spec.ts +++ b/src/test/unit/matchers.spec.ts @@ -10,7 +10,7 @@ import { CompoundRomanNumberDotRegexStr, CompoundRomanNumberDashRegexStr, WordInASCIIRegexStr, - WordInAnyLanguageRegexStr + WordInAnyLanguageRegexStr, getNormalizedDate_dd_Mmm_yyyy_NormalizerFn } from "../../custom-sort/matchers"; describe('Plain numbers regexp', () => { @@ -405,3 +405,29 @@ describe('getNormalizedRomanNumber', () => { expect(getNormalizedRomanNumber(s, separator, LEN)).toBe(out) }) }) + +describe('getNormalizedDate_dd_Mmm_yyyy_NormalizerFn', () => { + const params = [ + ['13-Jan-2012', '2012-01-13//'], + ['3-Feb-2', '0002-02-03//'], + ['1-Mar-1900', '1900-03-01//'], + ['42-Apr-9999', '9999-04-42//'], + ['0-May-0', '0000-05-00//'], + ['21-Jun-2024', '2024-06-21//'], + ['7-Jul-1872', '1872-07-07//'], + ['15-Aug-1234', '1234-08-15//'], + ['1234-Sep-7777', '7777-09-1234//'], + ['3-Oct-2023', '2023-10-03//'], + ['8-Nov-2022', '2022-11-08//'], + ['18-Dec-2021', '2021-12-18//'], + ['88-Dec-2012', '2012-12-88//'], // Invalid case, Regexp on matcher in the caller should guard against this + ['13-JANUARY-2012', '2012-00-13//'], // Invalid case, Regexp on matcher in the caller should guard against this + ['1 .1', '0000-00-1 .1//'], // Invalid case, Regexp on matcher in the caller should guard against this + ['', '0000-00-00//'], // Invalid case, Regexp on matcher in the caller should guard against this + ['abc', '0000-00-abc//'], // Invalid case, Regexp on matcher in the caller should guard against this + ['def-abc', '0000-00-def//'], // Invalid case, Regexp on matcher in the caller should guard against this + ]; + it.each(params)('>%s< should become %s', (s: string, out: string) => { + expect(getNormalizedDate_dd_Mmm_yyyy_NormalizerFn(s)).toBe(out) + }) +})