#171 - started creation of a PoC of the idea of metadata value extractors. At a glance a low hanging fruit turned out to be far too complex to be worth it.
This commit is contained in:
parent
b096e4c8b4
commit
42a5f1feb2
|
@ -104,17 +104,35 @@ export function getNormalizedRomanNumber(s: string, separator?: string, places?:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const DAY_POSITIONS = '00'.length
|
export const DAY_POSITIONS = '00'.length
|
||||||
const MONTH_POSITIONS = '00'.length
|
export const MONTH_POSITIONS = '00'.length
|
||||||
const YEAR_POSITIONS = '0000'.length
|
export const YEAR_POSITIONS = '0000'.length
|
||||||
|
|
||||||
const MONTHS = ['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec']
|
const MONTHS = ['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec']
|
||||||
|
|
||||||
export function getNormalizedDate_dd_Mmm_yyyy_NormalizerFn(s: string): string | null {
|
export function getNormalizedDate_NormalizerFn_for(separator: string, dayIdx: number, monthIdx: number, yearIdx: number, months?: string[]) {
|
||||||
|
return (s: string): string | null => {
|
||||||
|
// Assumption - the regex date matched against input s, no extensive defensive coding needed
|
||||||
|
const components = s.split(separator)
|
||||||
|
const day = prependWithZeros(components[dayIdx], DAY_POSITIONS)
|
||||||
|
const monthValue = months ? `${1 + MONTHS.indexOf(components[monthIdx])}` : components[monthIdx]
|
||||||
|
const month = prependWithZeros(monthValue, MONTH_POSITIONS)
|
||||||
|
const year = prependWithZeros(components[yearIdx], YEAR_POSITIONS)
|
||||||
|
return `${year}-${month}-${day}//`
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export const getNormalizedDate_dd_Mmm_yyyy_NormalizerFn = getNormalizedDate_NormalizerFn_for('-', 0, 1, 2, MONTHS)
|
||||||
|
|
||||||
|
/*
|
||||||
// Assumption - the regex date matched against input s, no extensive defensive coding needed
|
// Assumption - the regex date matched against input s, no extensive defensive coding needed
|
||||||
const components = s.split('-')
|
const components = s.split('-')
|
||||||
const day = prependWithZeros(components[0], DAY_POSITIONS)
|
const day = prependWithZeros(components[0], DAY_POSITIONS)
|
||||||
const month = prependWithZeros( `${1 + MONTHS.indexOf(components[1])}`, MONTH_POSITIONS)
|
const month = prependWithZeros( `${1 + MONTHS.indexOf(components[1])}`, MONTH_POSITIONS)
|
||||||
const year = prependWithZeros(components[2], YEAR_POSITIONS)
|
const year = prependWithZeros(components[2], YEAR_POSITIONS)
|
||||||
return `${year}-${month}-${day}//`
|
return `${year}-${month}-${day}//`
|
||||||
}
|
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,54 @@
|
||||||
|
import {
|
||||||
|
getNormalizedDate_NormalizerFn_for
|
||||||
|
} from "./matchers";
|
||||||
|
|
||||||
|
const DateExtractorSpecPattern1 = 'date(dd/mm/yyyy)'
|
||||||
|
const DateExtractorRegex1 = new RegExp('\\d{2}/\\d{2}/\\d{4}')
|
||||||
|
const DateExtractorNormalizer1 = getNormalizedDate_NormalizerFn_for('/', 0, 1, 2)
|
||||||
|
const DateExtractorSpecPattern2 = 'date(mm/dd/yyyy)'
|
||||||
|
const DateExtractorRegex2 = new RegExp('\\d{2}/\\d{2}/\\d{4}')
|
||||||
|
const DateExtractorNormalizer2 = getNormalizedDate_NormalizerFn_for('/', 1, 0, 2)
|
||||||
|
|
||||||
|
export interface MDataExtractor {
|
||||||
|
(mdataValue: string): string|undefined
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface MDataExtractorParseResult {
|
||||||
|
m: MDataExtractor
|
||||||
|
remainder: string
|
||||||
|
}
|
||||||
|
|
||||||
|
export const tryParseAsMDataExtractorSpec = (s: string): MDataExtractorParseResult|undefined => {
|
||||||
|
// Simplistic initial implementation of the idea with hardcoded two extractors
|
||||||
|
if (s.trim().startsWith(DateExtractorSpecPattern1)) {
|
||||||
|
return {
|
||||||
|
m: extractorForPattern1,
|
||||||
|
remainder: s.substring(DateExtractorSpecPattern1.length).trim()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (s.trim().startsWith(DateExtractorSpecPattern2)) {
|
||||||
|
return {
|
||||||
|
m: extractorForPattern2,
|
||||||
|
remainder: s.substring(DateExtractorSpecPattern2.length).trim()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return undefined
|
||||||
|
}
|
||||||
|
|
||||||
|
export function extractorForPattern1(mdataValue: string): string|undefined {
|
||||||
|
const hasDate = mdataValue?.match(DateExtractorRegex1)
|
||||||
|
if (hasDate && hasDate[0]) {
|
||||||
|
return DateExtractorNormalizer1(hasDate[0]) ?? undefined
|
||||||
|
} else {
|
||||||
|
return undefined
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export function extractorForPattern2(mdataValue: string): string|undefined {
|
||||||
|
const hasDate = mdataValue?.match(DateExtractorRegex2)
|
||||||
|
if (hasDate && hasDate[0]) {
|
||||||
|
return DateExtractorNormalizer2(hasDate[0]) ?? undefined
|
||||||
|
} else {
|
||||||
|
return undefined
|
||||||
|
}
|
||||||
|
}
|
|
@ -33,6 +33,10 @@ import {
|
||||||
MATCH_CHILDREN_2_SUFFIX,
|
MATCH_CHILDREN_2_SUFFIX,
|
||||||
NO_PRIORITY
|
NO_PRIORITY
|
||||||
} from "./folder-matching-rules"
|
} from "./folder-matching-rules"
|
||||||
|
import {
|
||||||
|
MDataExtractor,
|
||||||
|
tryParseAsMDataExtractorSpec
|
||||||
|
} from "./mdata-extractors";
|
||||||
|
|
||||||
interface ProcessingContext {
|
interface ProcessingContext {
|
||||||
folderPath: string
|
folderPath: string
|
||||||
|
@ -1497,10 +1501,30 @@ export class SortingSpecProcessor {
|
||||||
orderSpec = hasDirectionPostfix ? orderSpec.substring(hasDirectionPostfix.lexeme.length).trim() : orderSpec
|
orderSpec = hasDirectionPostfix ? orderSpec.substring(hasDirectionPostfix.lexeme.length).trim() : orderSpec
|
||||||
|
|
||||||
let metadataName: string|undefined
|
let metadataName: string|undefined
|
||||||
|
let metadataExtractor: MDataExtractor|undefined
|
||||||
if (orderSpec.startsWith(OrderByMetadataLexeme)) {
|
if (orderSpec.startsWith(OrderByMetadataLexeme)) {
|
||||||
applyToMetadata = true
|
applyToMetadata = true
|
||||||
metadataName = orderSpec.substring(OrderByMetadataLexeme.length).trim() || undefined
|
const metadataNameAndOptionalExtractorSpec = orderSpec.substring(OrderByMetadataLexeme.length).trim() || undefined
|
||||||
orderSpec = '' // metadataName is unparsed, consumes the remainder string, even if malformed, e.g. with infix spaces
|
if (metadataNameAndOptionalExtractorSpec) {
|
||||||
|
if (metadataNameAndOptionalExtractorSpec.indexOf(' ') > -1) {
|
||||||
|
const metadataSpec = metadataNameAndOptionalExtractorSpec.split(' ')
|
||||||
|
metadataName = metadataSpec.shift()
|
||||||
|
const metadataExtractorSpec = metadataSpec?.shift()
|
||||||
|
const hasMetadataExtractor = metadataExtractorSpec ? tryParseAsMDataExtractorSpec(metadataExtractorSpec) : undefined
|
||||||
|
if (hasMetadataExtractor) {
|
||||||
|
metadataExtractor = hasMetadataExtractor.m
|
||||||
|
} else {
|
||||||
|
// TODO: raise error of syntax error - metadata name followed by unrecognized text
|
||||||
|
// take into account all of the texts resulting from the split(' ') - there could be more segments
|
||||||
|
}
|
||||||
|
orderSpec = '' // Intentionally ignore anything beyond the metadata name and extractor
|
||||||
|
} else {
|
||||||
|
metadataName = metadataNameAndOptionalExtractorSpec
|
||||||
|
orderSpec = '' // Intentionally ignore anything beyond the metadata name (and no known extractor)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
orderSpec = ''
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// check for any superfluous text
|
// check for any superfluous text
|
||||||
|
@ -1553,7 +1577,14 @@ export class SortingSpecProcessor {
|
||||||
}
|
}
|
||||||
sortOrderSpec[level] = {
|
sortOrderSpec[level] = {
|
||||||
order: order!,
|
order: order!,
|
||||||
byMetadataField: metadataName
|
byMetadataField: metadataName,
|
||||||
|
|
||||||
|
metadataFieldExtractor: metadataExtractor
|
||||||
|
|
||||||
|
... and the carry the metadataFieldExtractor attribute down the parser, handle correctly in the 4-levels mdata sorting options
|
||||||
|
and execute at runtime
|
||||||
|
|
||||||
|
Seems to be far too complex to be worth it.
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return sortOrderSpec
|
return sortOrderSpec
|
||||||
|
|
|
@ -0,0 +1,38 @@
|
||||||
|
import {
|
||||||
|
extractorForPattern1
|
||||||
|
} from '../../custom-sort/mdata-extractors'
|
||||||
|
|
||||||
|
describe('extractorForPattern1', () => {
|
||||||
|
const params = [
|
||||||
|
// Positive
|
||||||
|
['03/05/2019', '2019-05-03//'],
|
||||||
|
['Created at: 03/05/2019', '2019-05-03//'],
|
||||||
|
['03/05/2019 | 22:00', '2019-05-03//'],
|
||||||
|
['Created at: 03/05/2019 | 22:00', '2019-05-03//'],
|
||||||
|
|
||||||
|
// TODO: more positive then negative examples
|
||||||
|
|
||||||
|
['13-Jan-2012', '2012-01-13//'],
|
||||||
|
['3-Feb-2', '0002-02-03//'],
|
||||||
|
['1-Mar-1900', '1900-03-01//'],
|
||||||
|
['42-Apr-9999', '9999-04-42//'],
|
||||||
|
['0-May-0', '0000-05-00//'],
|
||||||
|
['21-Jun-2024', '2024-06-21//'],
|
||||||
|
['7-Jul-1872', '1872-07-07//'],
|
||||||
|
['15-Aug-1234', '1234-08-15//'],
|
||||||
|
['1234-Sep-7777', '7777-09-1234//'],
|
||||||
|
['3-Oct-2023', '2023-10-03//'],
|
||||||
|
['8-Nov-2022', '2022-11-08//'],
|
||||||
|
['18-Dec-2021', '2021-12-18//'],
|
||||||
|
// Negative
|
||||||
|
['88-Dec-2012', '2012-12-88//'], // Invalid case, Regexp on matcher in the caller should guard against this
|
||||||
|
['13-JANUARY-2012', '2012-00-13//'], // Invalid case, Regexp on matcher in the caller should guard against this
|
||||||
|
['1 .1', '0000-00-1 .1//'], // Invalid case, Regexp on matcher in the caller should guard against this
|
||||||
|
['', '0000-00-00//'], // Invalid case, Regexp on matcher in the caller should guard against this
|
||||||
|
['abc', '0000-00-abc//'], // Invalid case, Regexp on matcher in the caller should guard against this
|
||||||
|
['def-abc', '0000-00-def//'], // Invalid case, Regexp on matcher in the caller should guard against this
|
||||||
|
];
|
||||||
|
it.each(params)('>%s< should become %s', (s: string, out: string) => {
|
||||||
|
expect(extractorForPattern1(s)).toBe(out)
|
||||||
|
})
|
||||||
|
})
|
Loading…
Reference in New Issue