#171 - a PoC of the idea of metadata value extractors. Extended syntax, unit tests, error handling

2024-11-05 20:12:47 +01:00 · 2024-11-05 20:12:47 +01:00 · 9e2e12046d
parent f210a412d3
commit 9e2e12046d
4 changed files with 166 additions and 75 deletions
--- a/src/custom-sort/mdata-extractors.ts
+++ b/src/custom-sort/mdata-extractors.ts
@ -1,13 +1,14 @@
 import {
    getNormalizedDate_NormalizerFn_for
 } from "./matchers";
+import {NormalizerFn} from "./custom-sort-types";

-const DateExtractorSpecPattern1 = 'date(dd/mm/yyyy)'
-const DateExtractorRegex1 = new RegExp('\\d{2}/\\d{2}/\\d{4}')
-const DateExtractorNormalizer1 = getNormalizedDate_NormalizerFn_for('/', 0, 1, 2)
-const DateExtractorSpecPattern2 = 'date(mm/dd/yyyy)'
-const DateExtractorRegex2 = new RegExp('\\d{2}/\\d{2}/\\d{4}')
-const DateExtractorNormalizer2 = getNormalizedDate_NormalizerFn_for('/', 1, 0, 2)
+type ExtractorFn = (mdataValue: string) => string|undefined
+
+interface DateExtractorSpec {
+    specPattern: string|RegExp,
+    extractorFn: ExtractorFn
+}

 export interface MDataExtractor {
    (mdataValue: string): string|undefined
@ -18,37 +19,46 @@ export interface MDataExtractorParseResult {
    remainder: string
 }

+function getGenericPlainRegexpExtractorFn(extractorRegexp: RegExp, extractedValueNormalizer: NormalizerFn) {
+    return (mdataValue: string): string | undefined => {
+        const hasMatch = mdataValue?.match(extractorRegexp)
+        if (hasMatch && hasMatch[0]) {
+            return extractedValueNormalizer(hasMatch[0]) ?? undefined
+        } else {
+            return undefined
+        }
+    }
+}
+
+const Extractors: DateExtractorSpec[] = [
+    {   specPattern: 'date(dd/mm/yyyy)',
+        extractorFn: getGenericPlainRegexpExtractorFn(
+            new RegExp('\\d{2}/\\d{2}/\\d{4}'),
+            getNormalizedDate_NormalizerFn_for('/', 0, 1, 2)
+        )
+    }, {
+        specPattern: 'date(mm/dd/yyyy)',
+        extractorFn: getGenericPlainRegexpExtractorFn(
+            new RegExp('\\d{2}/\\d{2}/\\d{4}'),
+            getNormalizedDate_NormalizerFn_for('/', 1, 0, 2)
+        )
+    }
+]
+
 export const tryParseAsMDataExtractorSpec = (s: string): MDataExtractorParseResult|undefined => {
    // Simplistic initial implementation of the idea with hardcoded two extractors
-    if (s.trim().startsWith(DateExtractorSpecPattern1)) {
+    for (const extrSpec of Extractors) {
+        if ('string' === typeof extrSpec.specPattern && s.trim().startsWith(extrSpec.specPattern)) {
            return {
-            m: extractorForPattern1,
-            remainder: s.substring(DateExtractorSpecPattern1.length).trim()
+                m: extrSpec.extractorFn,
+                remainder: s.substring(extrSpec.specPattern.length).trim()
            }
        }
-    if (s.trim().startsWith(DateExtractorSpecPattern2)) {
-        return {
-            m: extractorForPattern2,
-            remainder: s.substring(DateExtractorSpecPattern2.length).trim()
-        }
    }
    return undefined
 }

-export function extractorForPattern1(mdataValue: string): string|undefined {
-    const hasDate = mdataValue?.match(DateExtractorRegex1)
-    if (hasDate && hasDate[0]) {
-        return DateExtractorNormalizer1(hasDate[0]) ?? undefined
-    } else {
-        return undefined
-    }
-}
-
-export function extractorForPattern2(mdataValue: string): string|undefined {
-    const hasDate = mdataValue?.match(DateExtractorRegex2)
-    if (hasDate && hasDate[0]) {
-        return DateExtractorNormalizer2(hasDate[0]) ?? undefined
-    } else {
-        return undefined
-    }
+export const _unitTests = {
+    extractorFnForDate_ddmmyyyy: Extractors.find((it) => it.specPattern === 'date(dd/mm/yyyy)')?.extractorFn!,
+    extractorFnForDate_mmddyyyy: Extractors.find((it) => it.specPattern === 'date(mm/dd/yyyy)')?.extractorFn!,
 }
--- a/src/custom-sort/sorting-spec-processor.ts
+++ b/src/custom-sort/sorting-spec-processor.ts
@ -144,6 +144,8 @@ const OrderLiterals: { [key: string]: CustomSortOrderAscDescPair } = {

 const OrderByMetadataLexeme: string = 'by-metadata:'

+const ValueExtractorLexeme: string = 'using-extractor:'
+
 const OrderLevelsSeparator: string = ','

 enum Attribute {
@ -1511,24 +1513,23 @@ export class SortingSpecProcessor {
 				applyToMetadata = true
 				const metadataNameAndOptionalExtractorSpec = orderSpec.substring(OrderByMetadataLexeme.length).trim() || undefined
 				if (metadataNameAndOptionalExtractorSpec) {
-					if (metadataNameAndOptionalExtractorSpec.indexOf(' ') > -1) {
-						const metadataSpec = metadataNameAndOptionalExtractorSpec.split(' ')
-						metadataName = metadataSpec.shift()
-						const metadataExtractorSpec = metadataSpec?.shift()
+					if (metadataNameAndOptionalExtractorSpec.indexOf(ValueExtractorLexeme) > -1) {
+						const metadataSpec = metadataNameAndOptionalExtractorSpec.split(ValueExtractorLexeme)
+						metadataName = metadataSpec.shift()?.trim()
+						const metadataExtractorSpec = metadataSpec?.shift()?.trim()
 						const hasMetadataExtractor = metadataExtractorSpec ? tryParseAsMDataExtractorSpec(metadataExtractorSpec) : undefined
 						if (hasMetadataExtractor) {
 							metadataExtractor = hasMetadataExtractor.m
 						} else {
-							// TODO: raise error of syntax error - metadata name followed by unrecognized text
-							//       take into account all of the texts resulting from the split(' ') - there could be more segments
+							return new AttrError(`${orderNameForErrorMsg} sorting order contains unrecognized value extractor: >>> ${metadataExtractorSpec} <<<`)
 						}
-						orderSpec = '' // Intentionally ignore anything beyond the metadata name and extractor
+						orderSpec = '' // all consumed as metadata and extractor
 					} else {
 						metadataName = metadataNameAndOptionalExtractorSpec
-						orderSpec = '' // Intentionally ignore anything beyond the metadata name (and no known extractor)
+						orderSpec = '' // all consumed as metadata name
 					}
 				} else {
-					orderSpec = ''
+					orderSpec = '' // no metadata name found
 				}
 			}

--- a/src/test/unit/mdata-extractors.spec.ts
+++ b/src/test/unit/mdata-extractors.spec.ts
@ -1,38 +1,29 @@
 import {
-    extractorForPattern1
+    _unitTests
 } from '../../custom-sort/mdata-extractors'

-describe('extractorForPattern1', () => {
+describe('extractor for date(dd/mm/yyyy)', () => {
    const params = [
        // Positive
        ['03/05/2019', '2019-05-03//'],
+        ['103/05/2019', '2019-05-03//'],
+        ['103/05/20193232', '2019-05-03//'],
+        ['99/99/9999', '9999-99-99//'],
+        ['00/00/0000', '0000-00-00//'],
        ['Created at: 03/05/2019', '2019-05-03//'],
        ['03/05/2019 | 22:00', '2019-05-03//'],
        ['Created at: 03/05/2019 | 22:00', '2019-05-03//'],

-        // TODO: more positive then negative examples
-
-        ['13-Jan-2012', '2012-01-13//'],
-        ['3-Feb-2', '0002-02-03//'],
-        ['1-Mar-1900', '1900-03-01//'],
-        ['42-Apr-9999', '9999-04-42//'],
-        ['0-May-0', '0000-05-00//'],
-        ['21-Jun-2024', '2024-06-21//'],
-        ['7-Jul-1872', '1872-07-07//'],
-        ['15-Aug-1234', '1234-08-15//'],
-        ['1234-Sep-7777', '7777-09-1234//'],
-        ['3-Oct-2023', '2023-10-03//'],
-        ['8-Nov-2022', '2022-11-08//'],
-        ['18-Dec-2021', '2021-12-18//'],
        // Negative
-        ['88-Dec-2012', '2012-12-88//'], // Invalid case, Regexp on matcher in the caller should guard against this
-        ['13-JANUARY-2012', '2012-00-13//'], // Invalid case, Regexp on matcher in the caller should guard against this
-        ['1 .1', '0000-00-1 .1//'],  // Invalid case, Regexp on matcher in the caller should guard against this
-        ['', '0000-00-00//'],  // Invalid case, Regexp on matcher in the caller should guard against this
-        ['abc', '0000-00-abc//'],  // Invalid case, Regexp on matcher in the caller should guard against this
-        ['def-abc', '0000-00-def//'],  // Invalid case, Regexp on matcher in the caller should guard against this
+        ['88-Dec-2012', undefined],
+        ['13-JANUARY-2012', undefined],
+        ['1 .1', undefined],
+        ['', undefined],
+        ['abc', undefined],
+        ['def-abc', undefined],
+        ['3/5/2019', undefined],
    ];
    it.each(params)('>%s< should become %s', (s: string, out: string) => {
-        expect(extractorForPattern1(s)).toBe(out)
+        expect(_unitTests.extractorFnForDate_ddmmyyyy(s)).toBe(out)
    })
 })
--- a/src/test/unit/sorting-spec-processor.spec.ts
+++ b/src/test/unit/sorting-spec-processor.spec.ts
@ -4,7 +4,8 @@ import {
 	CompoundDotNumberNormalizerFn,
 	ConsumedFolderMatchingRegexp,
 	consumeFolderByRegexpExpression,
-	convertPlainStringToRegex, Date_dd_Mmm_yyyy_NormalizerFn,
+	convertPlainStringToRegex,
+	Date_dd_Mmm_yyyy_NormalizerFn,
 	detectSortingSymbols,
 	escapeRegexUnsafeCharacters,
 	extractSortingSymbol,
@ -14,8 +15,14 @@ import {
 	RomanNumberNormalizerFn,
 	SortingSpecProcessor
 } from "../../custom-sort/sorting-spec-processor"
-import {CustomSortGroupType, CustomSortOrder, CustomSortSpec, IdentityNormalizerFn} from "../../custom-sort/custom-sort-types";
+import {
+	CustomSortGroupType,
+	CustomSortOrder,
+	CustomSortSpec,
+	IdentityNormalizerFn
+} from "../../custom-sort/custom-sort-types";
 import {FolderMatchingRegexp, FolderMatchingTreeNode} from "../../custom-sort/folder-matching-rules";
+import {_unitTests} from "../../custom-sort/mdata-extractors";

 const txtInputExampleA: string = `
 order-asc: a-z
@ -356,6 +363,17 @@ const expectedSortSpecsExampleA: { [key: string]: CustomSortSpec } = {
 	}
 }

+const txtInputExampleSortingSymbols: string = `
+/folders Chapter \\.d+ ...  
+/:files ...section \\-r+.
+% Appendix \\-d+ (attachments)
+Plain syntax\\R+ ... works?
+And this kind of... \\D+plain syntax???
+Here goes ASCII word \\a+
+\\A+. is for any modern language word
+\\[dd-Mmm-yyyy] for the specific date format of 12-Apr-2024
+`
+
 const expectedSortSpecsExampleSortingSymbols: { [key: string]: CustomSortSpec } = {
 	"mock-folder": {
 		groups: [{
@ -418,17 +436,67 @@ const expectedSortSpecsExampleSortingSymbols: { [key: string]: CustomSortSpec }
 	}
 }

-const txtInputExampleSortingSymbols: string = `
-/folders Chapter \\.d+ ...  
-/:files ...section \\-r+.
-% Appendix \\-d+ (attachments)
-Plain syntax\\R+ ... works?
-And this kind of... \\D+plain syntax???
-Here goes ASCII word \\a+
-\\A+. is for any modern language word
-\\[dd-Mmm-yyyy] for the specific date format of 12-Apr-2024
+const txtInputExampleMDataExtractors1: string = `
+< a-z by-metadata: created-by using-extractor: date(dd/mm/yyyy)
+/folders Chapter...
+  > a-z by-metadata: updated-on using-extractor: date(mm/dd/yyyy)
 `

+// Tricky elements captured:
+// - Order a-z. for by metadata is transformed to a-z (there is no notion of 'file extension' in metadata values)
+
+const txtInputExampleMDataExtractors2: string = `
+< a-z. by-metadata: created by using-extractor: date(mm/dd/yyyy), < true a-z. by-metadata: using-extractor: date(dd/mm/yyyy)
+/folders ...Chapter
+  > a-z. by-metadata: updated-on using-extractor: date(dd/mm/yyyy), > true a-z by-metadata: md2 using-extractor: date(mm/dd/yyyy) 
+`
+
+const expectedSortSpecsExampleMDataExtractors1: { [key: string]: CustomSortSpec } = {
+	"mock-folder": {
+		defaultOrder: CustomSortOrder.byMetadataFieldAlphabetical,
+		byMetadataField: 'created-by',
+		metadataFieldValueExtractor: _unitTests.extractorFnForDate_ddmmyyyy,
+		groups: [{
+			foldersOnly: true,
+			type: CustomSortGroupType.ExactPrefix,
+			exactPrefix: 'Chapter',
+			order: CustomSortOrder.byMetadataFieldAlphabeticalReverse,
+			byMetadataField: 'updated-on',
+			metadataFieldValueExtractor: _unitTests.extractorFnForDate_mmddyyyy
+		}, {
+			type: CustomSortGroupType.Outsiders
+		}],
+		targetFoldersPaths: ['mock-folder'],
+		outsidersGroupIdx: 1
+	}
+}
+
+const expectedSortSpecsExampleMDataExtractors2: { [key: string]: CustomSortSpec } = {
+	"mock-folder": {
+		defaultOrder: CustomSortOrder.byMetadataFieldAlphabetical,
+		byMetadataField: 'created by',
+		metadataFieldValueExtractor: _unitTests.extractorFnForDate_mmddyyyy,
+		defaultSecondaryOrder: CustomSortOrder.byMetadataFieldTrueAlphabetical,
+		byMetadataFieldSecondary: '',
+		metadataFieldSecondaryValueExtractor: _unitTests.extractorFnForDate_ddmmyyyy,
+		groups: [{
+			foldersOnly: true,
+			type: CustomSortGroupType.ExactSuffix,
+			exactSuffix: 'Chapter',
+			order: CustomSortOrder.byMetadataFieldAlphabeticalReverse,
+			byMetadataField: 'updated-on',
+			metadataFieldValueExtractor: _unitTests.extractorFnForDate_ddmmyyyy,
+			secondaryOrder: CustomSortOrder.byMetadataFieldTrueAlphabeticalReverse,
+			byMetadataFieldSecondary: 'md2',
+			metadataFieldSecondaryValueExtractor: _unitTests.extractorFnForDate_mmddyyyy
+		}, {
+			type: CustomSortGroupType.Outsiders
+		}],
+		targetFoldersPaths: ['mock-folder'],
+		outsidersGroupIdx: 1
+	}
+}
+
 describe('SortingSpecProcessor', () => {
 	let processor: SortingSpecProcessor;
 	beforeEach(() => {
@ -449,6 +517,16 @@ describe('SortingSpecProcessor', () => {
 		const result = processor.parseSortSpecFromText(inputTxtArr, 'mock-folder', 'custom-name-note.md')
 		expect(result?.sortSpecByPath).toEqual(expectedSortSpecsExampleSortingSymbols)
 	})
+	it('should generate correct SortSpecs (example with mdata extractors)', () => {
+		const inputTxtArr: Array<string> = txtInputExampleMDataExtractors1.split('\n')
+		const result = processor.parseSortSpecFromText(inputTxtArr, 'mock-folder', 'custom-name-note.md')
+		expect(result?.sortSpecByPath).toEqual(expectedSortSpecsExampleMDataExtractors1)
+	})
+	it('should generate correct SortSpecs (example with mdata extractors, advanced)', () => {
+		const inputTxtArr: Array<string> = txtInputExampleMDataExtractors2.split('\n')
+		const result = processor.parseSortSpecFromText(inputTxtArr, 'mock-folder', 'custom-name-note.md')
+		expect(result?.sortSpecByPath).toEqual(expectedSortSpecsExampleMDataExtractors2)
+	})
 })

 const txtInputNotDuplicatedSortSpec: string = `
@ -2922,6 +3000,17 @@ describe('SortingSpecProcessor error detection and reporting', () => {
 			`${ERR_PREFIX} 7:InvalidAttributeValue Secondary sorting direction order-asc: and desc are contradicting ${ERR_SUFFIX_IN_LINE(2)}`)
 		expect(errorsLogger).toHaveBeenNthCalledWith(2, ERR_LINE_TXT('sorting: standard, order-asc: modified desc by-metadata: xyz // <-- and it is checked earlier than the by-metadata incompatible order'))
 	})
+	it('should reject unknown value extractor', () => {
+		const inputTxtArr: Array<string> = `
+		< a-z. by-metadata: created by using-extractor: date(mm/dd/YYYY)
+		`.replace(/\t/gi, '').split('\n')
+		const result = processor.parseSortSpecFromText(inputTxtArr, 'mock-folder', 'custom-name-note.md')
+		expect(result).toBeNull()
+		expect(errorsLogger).toHaveBeenCalledTimes(2)
+		expect(errorsLogger).toHaveBeenNthCalledWith(1,
+			`${ERR_PREFIX} 7:InvalidAttributeValue Primary sorting order contains unrecognized value extractor: >>> date(mm/dd/YYYY) <<< ${ERR_SUFFIX_IN_LINE(2)}`)
+		expect(errorsLogger).toHaveBeenNthCalledWith(2, ERR_LINE_TXT('< a-z. by-metadata: created by using-extractor: date(mm/dd/YYYY)'))
+	})
 })

 const txtInputTargetFolderCCC: string = `