#171 - started creation of a PoC of the idea of metadata value extractors. At a glance a low hanging fruit turned out to be far too complex to be worth it.

2024-11-03 22:56:55 +01:00 · 2024-11-03 22:56:55 +01:00 · 42a5f1feb2
parent b096e4c8b4
commit 42a5f1feb2
4 changed files with 154 additions and 13 deletions
--- a/src/custom-sort/matchers.ts
+++ b/src/custom-sort/matchers.ts
@ -104,17 +104,35 @@ export function getNormalizedRomanNumber(s: string, separator?: string, places?:
 	}
 }
-const DAY_POSITIONS = '00'.length
+export const DAY_POSITIONS = '00'.length
-const MONTH_POSITIONS = '00'.length
+export const MONTH_POSITIONS = '00'.length
-const YEAR_POSITIONS = '0000'.length
+export const YEAR_POSITIONS = '0000'.length
 const MONTHS = ['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec']
-export function getNormalizedDate_dd_Mmm_yyyy_NormalizerFn(s: string): string | null {
+export function getNormalizedDate_NormalizerFn_for(separator: string, dayIdx: number, monthIdx: number, yearIdx: number, months?: string[]) {
 	return (s: string): string | null => {
 		// Assumption - the regex date matched against input s, no extensive defensive coding needed
 		const components = s.split(separator)
 		const day = prependWithZeros(components[dayIdx], DAY_POSITIONS)
 		const monthValue = months ? `${1 + MONTHS.indexOf(components[monthIdx])}` : components[monthIdx]
 		const month = prependWithZeros(monthValue, MONTH_POSITIONS)
 		const year = prependWithZeros(components[yearIdx], YEAR_POSITIONS)
 		return `${year}-${month}-${day}//`
 	}
 }
 export const getNormalizedDate_dd_Mmm_yyyy_NormalizerFn = getNormalizedDate_NormalizerFn_for('-', 0, 1, 2, MONTHS)
 /*
 // Assumption - the regex date matched against input s, no extensive defensive coding needed
 const components = s.split('-')
 const day = prependWithZeros(components[0], DAY_POSITIONS)
 const month = prependWithZeros( `${1 + MONTHS.indexOf(components[1])}`, MONTH_POSITIONS)
 const year = prependWithZeros(components[2], YEAR_POSITIONS)
 return `${year}-${month}-${day}//`
-}
+
 */
--- a/src/custom-sort/mdata-extractors.ts
+++ b/src/custom-sort/mdata-extractors.ts
@ -0,0 +1,54 @@
 import {
    getNormalizedDate_NormalizerFn_for
 } from "./matchers";
 const DateExtractorSpecPattern1 = 'date(dd/mm/yyyy)'
 const DateExtractorRegex1 = new RegExp('\\d{2}/\\d{2}/\\d{4}')
 const DateExtractorNormalizer1 = getNormalizedDate_NormalizerFn_for('/', 0, 1, 2)
 const DateExtractorSpecPattern2 = 'date(mm/dd/yyyy)'
 const DateExtractorRegex2 = new RegExp('\\d{2}/\\d{2}/\\d{4}')
 const DateExtractorNormalizer2 = getNormalizedDate_NormalizerFn_for('/', 1, 0, 2)
 export interface MDataExtractor {
    (mdataValue: string): string|undefined
 }
 export interface MDataExtractorParseResult {
    m: MDataExtractor
    remainder: string
 }
 export const tryParseAsMDataExtractorSpec = (s: string): MDataExtractorParseResult|undefined => {
    // Simplistic initial implementation of the idea with hardcoded two extractors
    if (s.trim().startsWith(DateExtractorSpecPattern1)) {
        return {
            m: extractorForPattern1,
            remainder: s.substring(DateExtractorSpecPattern1.length).trim()
        }
    }
    if (s.trim().startsWith(DateExtractorSpecPattern2)) {
        return {
            m: extractorForPattern2,
            remainder: s.substring(DateExtractorSpecPattern2.length).trim()
        }
    }
    return undefined
 }
 export function extractorForPattern1(mdataValue: string): string|undefined {
    const hasDate = mdataValue?.match(DateExtractorRegex1)
    if (hasDate && hasDate[0]) {
        return DateExtractorNormalizer1(hasDate[0]) ?? undefined
    } else {
        return undefined
    }
 }
 export function extractorForPattern2(mdataValue: string): string|undefined {
    const hasDate = mdataValue?.match(DateExtractorRegex2)
    if (hasDate && hasDate[0]) {
        return DateExtractorNormalizer2(hasDate[0]) ?? undefined
    } else {
        return undefined
    }
 }
--- a/src/custom-sort/sorting-spec-processor.ts
+++ b/src/custom-sort/sorting-spec-processor.ts
@ -33,6 +33,10 @@ import {
 	MATCH_CHILDREN_2_SUFFIX,
 	NO_PRIORITY
 } from "./folder-matching-rules"
 import {
 	MDataExtractor,
 	tryParseAsMDataExtractorSpec
 } from "./mdata-extractors";
 interface ProcessingContext {
 	folderPath: string
@ -1497,10 +1501,30 @@ export class SortingSpecProcessor {
 			orderSpec = hasDirectionPostfix ? orderSpec.substring(hasDirectionPostfix.lexeme.length).trim() : orderSpec
 			let metadataName: string|undefined
 			let metadataExtractor: MDataExtractor|undefined
 			if (orderSpec.startsWith(OrderByMetadataLexeme)) {
 				applyToMetadata = true
-				metadataName = orderSpec.substring(OrderByMetadataLexeme.length).trim() || undefined
+				const metadataNameAndOptionalExtractorSpec = orderSpec.substring(OrderByMetadataLexeme.length).trim() || undefined
-				orderSpec = '' // metadataName is unparsed, consumes the remainder string, even if malformed, e.g. with infix spaces
+				if (metadataNameAndOptionalExtractorSpec) {
 					if (metadataNameAndOptionalExtractorSpec.indexOf(' ') > -1) {
 						const metadataSpec = metadataNameAndOptionalExtractorSpec.split(' ')
 						metadataName = metadataSpec.shift()
 						const metadataExtractorSpec = metadataSpec?.shift()
 						const hasMetadataExtractor = metadataExtractorSpec ? tryParseAsMDataExtractorSpec(metadataExtractorSpec) : undefined
 						if (hasMetadataExtractor) {
 							metadataExtractor = hasMetadataExtractor.m
 						} else {
 							// TODO: raise error of syntax error - metadata name followed by unrecognized text
 							//       take into account all of the texts resulting from the split(' ') - there could be more segments
 						}
 						orderSpec = '' // Intentionally ignore anything beyond the metadata name and extractor
 					} else {
 						metadataName = metadataNameAndOptionalExtractorSpec
 						orderSpec = '' // Intentionally ignore anything beyond the metadata name (and no known extractor)
 					}
 				} else {
 					orderSpec = ''
 				}
 			}
 			// check for any superfluous text
@ -1553,7 +1577,14 @@ export class SortingSpecProcessor {
 			}
 			sortOrderSpec[level] = {
 				order: order!,
-				byMetadataField: metadataName
+				byMetadataField: metadataName,
 				metadataFieldExtractor: metadataExtractor
 					... and the carry the metadataFieldExtractor attribute down the parser, handle correctly in the 4-levels mdata sorting options
 				        and execute at runtime
 				    Seems to be far too complex to be worth it.
 			}
 		}
 		return sortOrderSpec
--- a/src/test/unit/mdata-extractors.spec.ts
+++ b/src/test/unit/mdata-extractors.spec.ts
@ -0,0 +1,38 @@
 import {
    extractorForPattern1
 } from '../../custom-sort/mdata-extractors'
 describe('extractorForPattern1', () => {
    const params = [
        // Positive
        ['03/05/2019', '2019-05-03//'],
        ['Created at: 03/05/2019', '2019-05-03//'],
        ['03/05/2019 | 22:00', '2019-05-03//'],
        ['Created at: 03/05/2019 | 22:00', '2019-05-03//'],
        // TODO: more positive then negative examples
        ['13-Jan-2012', '2012-01-13//'],
        ['3-Feb-2', '0002-02-03//'],
        ['1-Mar-1900', '1900-03-01//'],
        ['42-Apr-9999', '9999-04-42//'],
        ['0-May-0', '0000-05-00//'],
        ['21-Jun-2024', '2024-06-21//'],
        ['7-Jul-1872', '1872-07-07//'],
        ['15-Aug-1234', '1234-08-15//'],
        ['1234-Sep-7777', '7777-09-1234//'],
        ['3-Oct-2023', '2023-10-03//'],
        ['8-Nov-2022', '2022-11-08//'],
        ['18-Dec-2021', '2021-12-18//'],
        // Negative
        ['88-Dec-2012', '2012-12-88//'], // Invalid case, Regexp on matcher in the caller should guard against this
        ['13-JANUARY-2012', '2012-00-13//'], // Invalid case, Regexp on matcher in the caller should guard against this
        ['1 .1', '0000-00-1 .1//'],  // Invalid case, Regexp on matcher in the caller should guard against this
        ['', '0000-00-00//'],  // Invalid case, Regexp on matcher in the caller should guard against this
        ['abc', '0000-00-abc//'],  // Invalid case, Regexp on matcher in the caller should guard against this
        ['def-abc', '0000-00-def//'],  // Invalid case, Regexp on matcher in the caller should guard against this
    ];
    it.each(params)('>%s< should become %s', (s: string, out: string) => {
        expect(extractorForPattern1(s)).toBe(out)
    })
 })