#171 - started creation of a PoC of the idea of metadata value extractors. At a glance a low hanging fruit turned out to be far too complex to be worth it.
This commit is contained in:
parent
b096e4c8b4
commit
42a5f1feb2
|
@ -104,17 +104,35 @@ export function getNormalizedRomanNumber(s: string, separator?: string, places?:
|
|||
}
|
||||
}
|
||||
|
||||
const DAY_POSITIONS = '00'.length
|
||||
const MONTH_POSITIONS = '00'.length
|
||||
const YEAR_POSITIONS = '0000'.length
|
||||
export const DAY_POSITIONS = '00'.length
|
||||
export const MONTH_POSITIONS = '00'.length
|
||||
export const YEAR_POSITIONS = '0000'.length
|
||||
|
||||
const MONTHS = ['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec']
|
||||
|
||||
export function getNormalizedDate_dd_Mmm_yyyy_NormalizerFn(s: string): string | null {
|
||||
export function getNormalizedDate_NormalizerFn_for(separator: string, dayIdx: number, monthIdx: number, yearIdx: number, months?: string[]) {
|
||||
return (s: string): string | null => {
|
||||
// Assumption - the regex date matched against input s, no extensive defensive coding needed
|
||||
const components = s.split(separator)
|
||||
const day = prependWithZeros(components[dayIdx], DAY_POSITIONS)
|
||||
const monthValue = months ? `${1 + MONTHS.indexOf(components[monthIdx])}` : components[monthIdx]
|
||||
const month = prependWithZeros(monthValue, MONTH_POSITIONS)
|
||||
const year = prependWithZeros(components[yearIdx], YEAR_POSITIONS)
|
||||
return `${year}-${month}-${day}//`
|
||||
}
|
||||
}
|
||||
|
||||
export const getNormalizedDate_dd_Mmm_yyyy_NormalizerFn = getNormalizedDate_NormalizerFn_for('-', 0, 1, 2, MONTHS)
|
||||
|
||||
/*
|
||||
// Assumption - the regex date matched against input s, no extensive defensive coding needed
|
||||
const components = s.split('-')
|
||||
const day = prependWithZeros(components[0], DAY_POSITIONS)
|
||||
const month = prependWithZeros( `${1 + MONTHS.indexOf(components[1])}`, MONTH_POSITIONS)
|
||||
const year = prependWithZeros(components[2], YEAR_POSITIONS)
|
||||
return `${year}-${month}-${day}//`
|
||||
}
|
||||
|
||||
*/
|
||||
|
||||
|
||||
|
||||
|
|
|
@ -0,0 +1,54 @@
|
|||
import {
|
||||
getNormalizedDate_NormalizerFn_for
|
||||
} from "./matchers";
|
||||
|
||||
const DateExtractorSpecPattern1 = 'date(dd/mm/yyyy)'
|
||||
const DateExtractorRegex1 = new RegExp('\\d{2}/\\d{2}/\\d{4}')
|
||||
const DateExtractorNormalizer1 = getNormalizedDate_NormalizerFn_for('/', 0, 1, 2)
|
||||
const DateExtractorSpecPattern2 = 'date(mm/dd/yyyy)'
|
||||
const DateExtractorRegex2 = new RegExp('\\d{2}/\\d{2}/\\d{4}')
|
||||
const DateExtractorNormalizer2 = getNormalizedDate_NormalizerFn_for('/', 1, 0, 2)
|
||||
|
||||
export interface MDataExtractor {
|
||||
(mdataValue: string): string|undefined
|
||||
}
|
||||
|
||||
export interface MDataExtractorParseResult {
|
||||
m: MDataExtractor
|
||||
remainder: string
|
||||
}
|
||||
|
||||
export const tryParseAsMDataExtractorSpec = (s: string): MDataExtractorParseResult|undefined => {
|
||||
// Simplistic initial implementation of the idea with hardcoded two extractors
|
||||
if (s.trim().startsWith(DateExtractorSpecPattern1)) {
|
||||
return {
|
||||
m: extractorForPattern1,
|
||||
remainder: s.substring(DateExtractorSpecPattern1.length).trim()
|
||||
}
|
||||
}
|
||||
if (s.trim().startsWith(DateExtractorSpecPattern2)) {
|
||||
return {
|
||||
m: extractorForPattern2,
|
||||
remainder: s.substring(DateExtractorSpecPattern2.length).trim()
|
||||
}
|
||||
}
|
||||
return undefined
|
||||
}
|
||||
|
||||
export function extractorForPattern1(mdataValue: string): string|undefined {
|
||||
const hasDate = mdataValue?.match(DateExtractorRegex1)
|
||||
if (hasDate && hasDate[0]) {
|
||||
return DateExtractorNormalizer1(hasDate[0]) ?? undefined
|
||||
} else {
|
||||
return undefined
|
||||
}
|
||||
}
|
||||
|
||||
export function extractorForPattern2(mdataValue: string): string|undefined {
|
||||
const hasDate = mdataValue?.match(DateExtractorRegex2)
|
||||
if (hasDate && hasDate[0]) {
|
||||
return DateExtractorNormalizer2(hasDate[0]) ?? undefined
|
||||
} else {
|
||||
return undefined
|
||||
}
|
||||
}
|
|
@ -33,6 +33,10 @@ import {
|
|||
MATCH_CHILDREN_2_SUFFIX,
|
||||
NO_PRIORITY
|
||||
} from "./folder-matching-rules"
|
||||
import {
|
||||
MDataExtractor,
|
||||
tryParseAsMDataExtractorSpec
|
||||
} from "./mdata-extractors";
|
||||
|
||||
interface ProcessingContext {
|
||||
folderPath: string
|
||||
|
@ -1497,10 +1501,30 @@ export class SortingSpecProcessor {
|
|||
orderSpec = hasDirectionPostfix ? orderSpec.substring(hasDirectionPostfix.lexeme.length).trim() : orderSpec
|
||||
|
||||
let metadataName: string|undefined
|
||||
let metadataExtractor: MDataExtractor|undefined
|
||||
if (orderSpec.startsWith(OrderByMetadataLexeme)) {
|
||||
applyToMetadata = true
|
||||
metadataName = orderSpec.substring(OrderByMetadataLexeme.length).trim() || undefined
|
||||
orderSpec = '' // metadataName is unparsed, consumes the remainder string, even if malformed, e.g. with infix spaces
|
||||
const metadataNameAndOptionalExtractorSpec = orderSpec.substring(OrderByMetadataLexeme.length).trim() || undefined
|
||||
if (metadataNameAndOptionalExtractorSpec) {
|
||||
if (metadataNameAndOptionalExtractorSpec.indexOf(' ') > -1) {
|
||||
const metadataSpec = metadataNameAndOptionalExtractorSpec.split(' ')
|
||||
metadataName = metadataSpec.shift()
|
||||
const metadataExtractorSpec = metadataSpec?.shift()
|
||||
const hasMetadataExtractor = metadataExtractorSpec ? tryParseAsMDataExtractorSpec(metadataExtractorSpec) : undefined
|
||||
if (hasMetadataExtractor) {
|
||||
metadataExtractor = hasMetadataExtractor.m
|
||||
} else {
|
||||
// TODO: raise error of syntax error - metadata name followed by unrecognized text
|
||||
// take into account all of the texts resulting from the split(' ') - there could be more segments
|
||||
}
|
||||
orderSpec = '' // Intentionally ignore anything beyond the metadata name and extractor
|
||||
} else {
|
||||
metadataName = metadataNameAndOptionalExtractorSpec
|
||||
orderSpec = '' // Intentionally ignore anything beyond the metadata name (and no known extractor)
|
||||
}
|
||||
} else {
|
||||
orderSpec = ''
|
||||
}
|
||||
}
|
||||
|
||||
// check for any superfluous text
|
||||
|
@ -1553,7 +1577,14 @@ export class SortingSpecProcessor {
|
|||
}
|
||||
sortOrderSpec[level] = {
|
||||
order: order!,
|
||||
byMetadataField: metadataName
|
||||
byMetadataField: metadataName,
|
||||
|
||||
metadataFieldExtractor: metadataExtractor
|
||||
|
||||
... and the carry the metadataFieldExtractor attribute down the parser, handle correctly in the 4-levels mdata sorting options
|
||||
and execute at runtime
|
||||
|
||||
Seems to be far too complex to be worth it.
|
||||
}
|
||||
}
|
||||
return sortOrderSpec
|
||||
|
|
|
@ -0,0 +1,38 @@
|
|||
import {
|
||||
extractorForPattern1
|
||||
} from '../../custom-sort/mdata-extractors'
|
||||
|
||||
describe('extractorForPattern1', () => {
|
||||
const params = [
|
||||
// Positive
|
||||
['03/05/2019', '2019-05-03//'],
|
||||
['Created at: 03/05/2019', '2019-05-03//'],
|
||||
['03/05/2019 | 22:00', '2019-05-03//'],
|
||||
['Created at: 03/05/2019 | 22:00', '2019-05-03//'],
|
||||
|
||||
// TODO: more positive then negative examples
|
||||
|
||||
['13-Jan-2012', '2012-01-13//'],
|
||||
['3-Feb-2', '0002-02-03//'],
|
||||
['1-Mar-1900', '1900-03-01//'],
|
||||
['42-Apr-9999', '9999-04-42//'],
|
||||
['0-May-0', '0000-05-00//'],
|
||||
['21-Jun-2024', '2024-06-21//'],
|
||||
['7-Jul-1872', '1872-07-07//'],
|
||||
['15-Aug-1234', '1234-08-15//'],
|
||||
['1234-Sep-7777', '7777-09-1234//'],
|
||||
['3-Oct-2023', '2023-10-03//'],
|
||||
['8-Nov-2022', '2022-11-08//'],
|
||||
['18-Dec-2021', '2021-12-18//'],
|
||||
// Negative
|
||||
['88-Dec-2012', '2012-12-88//'], // Invalid case, Regexp on matcher in the caller should guard against this
|
||||
['13-JANUARY-2012', '2012-00-13//'], // Invalid case, Regexp on matcher in the caller should guard against this
|
||||
['1 .1', '0000-00-1 .1//'], // Invalid case, Regexp on matcher in the caller should guard against this
|
||||
['', '0000-00-00//'], // Invalid case, Regexp on matcher in the caller should guard against this
|
||||
['abc', '0000-00-abc//'], // Invalid case, Regexp on matcher in the caller should guard against this
|
||||
['def-abc', '0000-00-def//'], // Invalid case, Regexp on matcher in the caller should guard against this
|
||||
];
|
||||
it.each(params)('>%s< should become %s', (s: string, out: string) => {
|
||||
expect(extractorForPattern1(s)).toBe(out)
|
||||
})
|
||||
})
|
Loading…
Reference in New Issue