import {toString} from "builtin-modules"; export const RomanNumberRegexStr: string = ' *([MDCLXVI]+)'; // Roman number export const CompoundRomanNumberDotRegexStr: string = ' *([MDCLXVI]+(?:\\.[MDCLXVI]+)*)';// Compound Roman number with dot as separator export const CompoundRomanNumberDashRegexStr: string = ' *([MDCLXVI]+(?:-[MDCLXVI]+)*)'; // Compound Roman number with dash as separator export const NumberRegexStr: string = ' *(\\d+)'; // Plain number export const CompoundNumberDotRegexStr: string = ' *(\\d+(?:\\.\\d+)*)'; // Compound number with dot as separator export const CompoundNumberDashRegexStr: string = ' *(\\d+(?:-\\d+)*)'; // Compound number with dash as separator export const Date_dd_Mmm_yyyy_RegexStr: string = ' *[0-3]*[0-9]-{Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec}-\\d{4}'; // Date like 01-Jan-2020 export const DOT_SEPARATOR = '.' export const DASH_SEPARATOR = '-' const SLASH_SEPARATOR = '/' // ASCII 47 const PIPE_SEPARATOR = '|' // ASCII 124 export const DEFAULT_NORMALIZATION_PLACES = 8; // Fixed width of a normalized number (with leading zeros) // Property escapes: // https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions/Unicode_Property_Escapes // https://stackoverflow.com/a/48902765 // // Using Unicode property escapes to express 'a letter in any modern language' export const WordInAnyLanguageRegexStr = '(\\p{Letter}+)' // remember about the /u option -> /\p{Letter}+/u export const WordInASCIIRegexStr = '([a-zA-Z]+)' export function prependWithZeros(s: string|undefined, minLength: number): string { if ('string' === typeof s) { if (s.length < minLength) { const delta: number = minLength - s.length; return '000000000000000000000000000'.substring(0, delta) + s; } else { return s; } } else { return prependWithZeros((s ?? '').toString(), minLength) } } // Accepts trimmed number (compound or not) as parameter. No internal verification!!! export function getNormalizedNumber(s: string = '', separator?: string, places?: number): string | null { // The strange PIPE_SEPARATOR and trailing // are to allow correct sorting of compound numbers: // 1-1 should go before 1-1-1 and 1 should go yet earlier. // That's why the conversion to: // 1// // 1|1// // 1|1|1// // guarantees correct order (/ = ASCII 47, | = ASCII 124) if (separator) { const components: Array = s.split(separator).filter(s => s) return `${components.map((c) => prependWithZeros(c, places ?? DEFAULT_NORMALIZATION_PLACES)).join(PIPE_SEPARATOR)}//` } else { return `${prependWithZeros(s, places ?? DEFAULT_NORMALIZATION_PLACES)}//` } } function RomanCharToInt(c: string): number { const Roman: string = '0iIvVxXlLcCdDmM'; const RomanValues: Array = [0, 1, 1, 5, 5, 10, 10, 50, 50, 100, 100, 500, 500, 1000, 1000]; if (c) { const idx: number = Roman.indexOf(c[0]) return idx > 0 ? RomanValues[idx] : 0; } else { return 0; } } export function romanToIntStr(rs: string): string { if (rs == null) return '0'; let num = RomanCharToInt(rs.charAt(0)); let prev, curr; for (let i = 1; i < rs.length; i++) { curr = RomanCharToInt(rs.charAt(i)); prev = RomanCharToInt(rs.charAt(i - 1)); if (curr <= prev) { num += curr; } else { num = num - prev * 2 + curr; } } return `${num}`; } export function getNormalizedRomanNumber(s: string, separator?: string, places?: number): string | null { // The strange PIPE_SEPARATOR and trailing // are to allow correct sorting of compound numbers: // 1-1 should go before 1-1-1 and 1 should go yet earlier. // That's why the conversion to: // 1// // 1|1// // 1|1|1// // guarantees correct order (/ = ASCII 47, | = ASCII 124) if (separator) { const components: Array = s.split(separator).filter(s => s) return `${components.map((c) => prependWithZeros(romanToIntStr(c), places ?? DEFAULT_NORMALIZATION_PLACES)).join(PIPE_SEPARATOR)}//` } else { return `${prependWithZeros(romanToIntStr(s), places ?? DEFAULT_NORMALIZATION_PLACES)}//` } } const DAY_POSITIONS = '00'.length const MONTH_POSITIONS = '00'.length const YEAR_POSITIONS = '0000'.length const MONTHS = ['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec'] export function getNormalizedDate_dd_Mmm_yyyy_NormalizerFn(s: string): string | null { // Assumption - the regex date matched against input s, no extensive defensive coding needed const components = s.split('-') const day = prependWithZeros(components[0], DAY_POSITIONS) const month = prependWithZeros( `${1 + MONTHS.indexOf(components[1])}`, MONTH_POSITIONS) const year = prependWithZeros(components[2], YEAR_POSITIONS) return `${year}-${month}-${day}//` }