From 9e2e12046d957b1256867bfce67c77b83041a617 Mon Sep 17 00:00:00 2001 From: SebastianMC <23032356+SebastianMC@users.noreply.github.com> Date: Tue, 5 Nov 2024 20:12:47 +0100 Subject: [PATCH] #171 - a PoC of the idea of metadata value extractors. Extended syntax, unit tests, error handling --- src/custom-sort/mdata-extractors.ts | 74 +++++++------ src/custom-sort/sorting-spec-processor.ts | 19 ++-- src/test/unit/mdata-extractors.spec.ts | 37 +++---- src/test/unit/sorting-spec-processor.spec.ts | 111 +++++++++++++++++-- 4 files changed, 166 insertions(+), 75 deletions(-) diff --git a/src/custom-sort/mdata-extractors.ts b/src/custom-sort/mdata-extractors.ts index 6e761c0..8d37ad4 100644 --- a/src/custom-sort/mdata-extractors.ts +++ b/src/custom-sort/mdata-extractors.ts @@ -1,13 +1,14 @@ import { getNormalizedDate_NormalizerFn_for } from "./matchers"; +import {NormalizerFn} from "./custom-sort-types"; -const DateExtractorSpecPattern1 = 'date(dd/mm/yyyy)' -const DateExtractorRegex1 = new RegExp('\\d{2}/\\d{2}/\\d{4}') -const DateExtractorNormalizer1 = getNormalizedDate_NormalizerFn_for('/', 0, 1, 2) -const DateExtractorSpecPattern2 = 'date(mm/dd/yyyy)' -const DateExtractorRegex2 = new RegExp('\\d{2}/\\d{2}/\\d{4}') -const DateExtractorNormalizer2 = getNormalizedDate_NormalizerFn_for('/', 1, 0, 2) +type ExtractorFn = (mdataValue: string) => string|undefined + +interface DateExtractorSpec { + specPattern: string|RegExp, + extractorFn: ExtractorFn +} export interface MDataExtractor { (mdataValue: string): string|undefined @@ -18,37 +19,46 @@ export interface MDataExtractorParseResult { remainder: string } -export const tryParseAsMDataExtractorSpec = (s: string): MDataExtractorParseResult|undefined => { - // Simplistic initial implementation of the idea with hardcoded two extractors - if (s.trim().startsWith(DateExtractorSpecPattern1)) { - return { - m: extractorForPattern1, - remainder: s.substring(DateExtractorSpecPattern1.length).trim() +function getGenericPlainRegexpExtractorFn(extractorRegexp: RegExp, extractedValueNormalizer: NormalizerFn) { + return (mdataValue: string): string | undefined => { + const hasMatch = mdataValue?.match(extractorRegexp) + if (hasMatch && hasMatch[0]) { + return extractedValueNormalizer(hasMatch[0]) ?? undefined + } else { + return undefined } } - if (s.trim().startsWith(DateExtractorSpecPattern2)) { - return { - m: extractorForPattern2, - remainder: s.substring(DateExtractorSpecPattern2.length).trim() +} + +const Extractors: DateExtractorSpec[] = [ + { specPattern: 'date(dd/mm/yyyy)', + extractorFn: getGenericPlainRegexpExtractorFn( + new RegExp('\\d{2}/\\d{2}/\\d{4}'), + getNormalizedDate_NormalizerFn_for('/', 0, 1, 2) + ) + }, { + specPattern: 'date(mm/dd/yyyy)', + extractorFn: getGenericPlainRegexpExtractorFn( + new RegExp('\\d{2}/\\d{2}/\\d{4}'), + getNormalizedDate_NormalizerFn_for('/', 1, 0, 2) + ) + } +] + +export const tryParseAsMDataExtractorSpec = (s: string): MDataExtractorParseResult|undefined => { + // Simplistic initial implementation of the idea with hardcoded two extractors + for (const extrSpec of Extractors) { + if ('string' === typeof extrSpec.specPattern && s.trim().startsWith(extrSpec.specPattern)) { + return { + m: extrSpec.extractorFn, + remainder: s.substring(extrSpec.specPattern.length).trim() + } } } return undefined } -export function extractorForPattern1(mdataValue: string): string|undefined { - const hasDate = mdataValue?.match(DateExtractorRegex1) - if (hasDate && hasDate[0]) { - return DateExtractorNormalizer1(hasDate[0]) ?? undefined - } else { - return undefined - } -} - -export function extractorForPattern2(mdataValue: string): string|undefined { - const hasDate = mdataValue?.match(DateExtractorRegex2) - if (hasDate && hasDate[0]) { - return DateExtractorNormalizer2(hasDate[0]) ?? undefined - } else { - return undefined - } +export const _unitTests = { + extractorFnForDate_ddmmyyyy: Extractors.find((it) => it.specPattern === 'date(dd/mm/yyyy)')?.extractorFn!, + extractorFnForDate_mmddyyyy: Extractors.find((it) => it.specPattern === 'date(mm/dd/yyyy)')?.extractorFn!, } diff --git a/src/custom-sort/sorting-spec-processor.ts b/src/custom-sort/sorting-spec-processor.ts index 2aabef5..591fea5 100644 --- a/src/custom-sort/sorting-spec-processor.ts +++ b/src/custom-sort/sorting-spec-processor.ts @@ -144,6 +144,8 @@ const OrderLiterals: { [key: string]: CustomSortOrderAscDescPair } = { const OrderByMetadataLexeme: string = 'by-metadata:' +const ValueExtractorLexeme: string = 'using-extractor:' + const OrderLevelsSeparator: string = ',' enum Attribute { @@ -1511,24 +1513,23 @@ export class SortingSpecProcessor { applyToMetadata = true const metadataNameAndOptionalExtractorSpec = orderSpec.substring(OrderByMetadataLexeme.length).trim() || undefined if (metadataNameAndOptionalExtractorSpec) { - if (metadataNameAndOptionalExtractorSpec.indexOf(' ') > -1) { - const metadataSpec = metadataNameAndOptionalExtractorSpec.split(' ') - metadataName = metadataSpec.shift() - const metadataExtractorSpec = metadataSpec?.shift() + if (metadataNameAndOptionalExtractorSpec.indexOf(ValueExtractorLexeme) > -1) { + const metadataSpec = metadataNameAndOptionalExtractorSpec.split(ValueExtractorLexeme) + metadataName = metadataSpec.shift()?.trim() + const metadataExtractorSpec = metadataSpec?.shift()?.trim() const hasMetadataExtractor = metadataExtractorSpec ? tryParseAsMDataExtractorSpec(metadataExtractorSpec) : undefined if (hasMetadataExtractor) { metadataExtractor = hasMetadataExtractor.m } else { - // TODO: raise error of syntax error - metadata name followed by unrecognized text - // take into account all of the texts resulting from the split(' ') - there could be more segments + return new AttrError(`${orderNameForErrorMsg} sorting order contains unrecognized value extractor: >>> ${metadataExtractorSpec} <<<`) } - orderSpec = '' // Intentionally ignore anything beyond the metadata name and extractor + orderSpec = '' // all consumed as metadata and extractor } else { metadataName = metadataNameAndOptionalExtractorSpec - orderSpec = '' // Intentionally ignore anything beyond the metadata name (and no known extractor) + orderSpec = '' // all consumed as metadata name } } else { - orderSpec = '' + orderSpec = '' // no metadata name found } } diff --git a/src/test/unit/mdata-extractors.spec.ts b/src/test/unit/mdata-extractors.spec.ts index 1a114d2..58973fd 100644 --- a/src/test/unit/mdata-extractors.spec.ts +++ b/src/test/unit/mdata-extractors.spec.ts @@ -1,38 +1,29 @@ import { - extractorForPattern1 + _unitTests } from '../../custom-sort/mdata-extractors' -describe('extractorForPattern1', () => { +describe('extractor for date(dd/mm/yyyy)', () => { const params = [ // Positive ['03/05/2019', '2019-05-03//'], + ['103/05/2019', '2019-05-03//'], + ['103/05/20193232', '2019-05-03//'], + ['99/99/9999', '9999-99-99//'], + ['00/00/0000', '0000-00-00//'], ['Created at: 03/05/2019', '2019-05-03//'], ['03/05/2019 | 22:00', '2019-05-03//'], ['Created at: 03/05/2019 | 22:00', '2019-05-03//'], - // TODO: more positive then negative examples - - ['13-Jan-2012', '2012-01-13//'], - ['3-Feb-2', '0002-02-03//'], - ['1-Mar-1900', '1900-03-01//'], - ['42-Apr-9999', '9999-04-42//'], - ['0-May-0', '0000-05-00//'], - ['21-Jun-2024', '2024-06-21//'], - ['7-Jul-1872', '1872-07-07//'], - ['15-Aug-1234', '1234-08-15//'], - ['1234-Sep-7777', '7777-09-1234//'], - ['3-Oct-2023', '2023-10-03//'], - ['8-Nov-2022', '2022-11-08//'], - ['18-Dec-2021', '2021-12-18//'], // Negative - ['88-Dec-2012', '2012-12-88//'], // Invalid case, Regexp on matcher in the caller should guard against this - ['13-JANUARY-2012', '2012-00-13//'], // Invalid case, Regexp on matcher in the caller should guard against this - ['1 .1', '0000-00-1 .1//'], // Invalid case, Regexp on matcher in the caller should guard against this - ['', '0000-00-00//'], // Invalid case, Regexp on matcher in the caller should guard against this - ['abc', '0000-00-abc//'], // Invalid case, Regexp on matcher in the caller should guard against this - ['def-abc', '0000-00-def//'], // Invalid case, Regexp on matcher in the caller should guard against this + ['88-Dec-2012', undefined], + ['13-JANUARY-2012', undefined], + ['1 .1', undefined], + ['', undefined], + ['abc', undefined], + ['def-abc', undefined], + ['3/5/2019', undefined], ]; it.each(params)('>%s< should become %s', (s: string, out: string) => { - expect(extractorForPattern1(s)).toBe(out) + expect(_unitTests.extractorFnForDate_ddmmyyyy(s)).toBe(out) }) }) diff --git a/src/test/unit/sorting-spec-processor.spec.ts b/src/test/unit/sorting-spec-processor.spec.ts index 3a7fbd1..922affb 100644 --- a/src/test/unit/sorting-spec-processor.spec.ts +++ b/src/test/unit/sorting-spec-processor.spec.ts @@ -4,7 +4,8 @@ import { CompoundDotNumberNormalizerFn, ConsumedFolderMatchingRegexp, consumeFolderByRegexpExpression, - convertPlainStringToRegex, Date_dd_Mmm_yyyy_NormalizerFn, + convertPlainStringToRegex, + Date_dd_Mmm_yyyy_NormalizerFn, detectSortingSymbols, escapeRegexUnsafeCharacters, extractSortingSymbol, @@ -14,8 +15,14 @@ import { RomanNumberNormalizerFn, SortingSpecProcessor } from "../../custom-sort/sorting-spec-processor" -import {CustomSortGroupType, CustomSortOrder, CustomSortSpec, IdentityNormalizerFn} from "../../custom-sort/custom-sort-types"; +import { + CustomSortGroupType, + CustomSortOrder, + CustomSortSpec, + IdentityNormalizerFn +} from "../../custom-sort/custom-sort-types"; import {FolderMatchingRegexp, FolderMatchingTreeNode} from "../../custom-sort/folder-matching-rules"; +import {_unitTests} from "../../custom-sort/mdata-extractors"; const txtInputExampleA: string = ` order-asc: a-z @@ -356,6 +363,17 @@ const expectedSortSpecsExampleA: { [key: string]: CustomSortSpec } = { } } +const txtInputExampleSortingSymbols: string = ` +/folders Chapter \\.d+ ... +/:files ...section \\-r+. +% Appendix \\-d+ (attachments) +Plain syntax\\R+ ... works? +And this kind of... \\D+plain syntax??? +Here goes ASCII word \\a+ +\\A+. is for any modern language word +\\[dd-Mmm-yyyy] for the specific date format of 12-Apr-2024 +` + const expectedSortSpecsExampleSortingSymbols: { [key: string]: CustomSortSpec } = { "mock-folder": { groups: [{ @@ -418,17 +436,67 @@ const expectedSortSpecsExampleSortingSymbols: { [key: string]: CustomSortSpec } } } -const txtInputExampleSortingSymbols: string = ` -/folders Chapter \\.d+ ... -/:files ...section \\-r+. -% Appendix \\-d+ (attachments) -Plain syntax\\R+ ... works? -And this kind of... \\D+plain syntax??? -Here goes ASCII word \\a+ -\\A+. is for any modern language word -\\[dd-Mmm-yyyy] for the specific date format of 12-Apr-2024 +const txtInputExampleMDataExtractors1: string = ` +< a-z by-metadata: created-by using-extractor: date(dd/mm/yyyy) +/folders Chapter... + > a-z by-metadata: updated-on using-extractor: date(mm/dd/yyyy) ` +// Tricky elements captured: +// - Order a-z. for by metadata is transformed to a-z (there is no notion of 'file extension' in metadata values) + +const txtInputExampleMDataExtractors2: string = ` +< a-z. by-metadata: created by using-extractor: date(mm/dd/yyyy), < true a-z. by-metadata: using-extractor: date(dd/mm/yyyy) +/folders ...Chapter + > a-z. by-metadata: updated-on using-extractor: date(dd/mm/yyyy), > true a-z by-metadata: md2 using-extractor: date(mm/dd/yyyy) +` + +const expectedSortSpecsExampleMDataExtractors1: { [key: string]: CustomSortSpec } = { + "mock-folder": { + defaultOrder: CustomSortOrder.byMetadataFieldAlphabetical, + byMetadataField: 'created-by', + metadataFieldValueExtractor: _unitTests.extractorFnForDate_ddmmyyyy, + groups: [{ + foldersOnly: true, + type: CustomSortGroupType.ExactPrefix, + exactPrefix: 'Chapter', + order: CustomSortOrder.byMetadataFieldAlphabeticalReverse, + byMetadataField: 'updated-on', + metadataFieldValueExtractor: _unitTests.extractorFnForDate_mmddyyyy + }, { + type: CustomSortGroupType.Outsiders + }], + targetFoldersPaths: ['mock-folder'], + outsidersGroupIdx: 1 + } +} + +const expectedSortSpecsExampleMDataExtractors2: { [key: string]: CustomSortSpec } = { + "mock-folder": { + defaultOrder: CustomSortOrder.byMetadataFieldAlphabetical, + byMetadataField: 'created by', + metadataFieldValueExtractor: _unitTests.extractorFnForDate_mmddyyyy, + defaultSecondaryOrder: CustomSortOrder.byMetadataFieldTrueAlphabetical, + byMetadataFieldSecondary: '', + metadataFieldSecondaryValueExtractor: _unitTests.extractorFnForDate_ddmmyyyy, + groups: [{ + foldersOnly: true, + type: CustomSortGroupType.ExactSuffix, + exactSuffix: 'Chapter', + order: CustomSortOrder.byMetadataFieldAlphabeticalReverse, + byMetadataField: 'updated-on', + metadataFieldValueExtractor: _unitTests.extractorFnForDate_ddmmyyyy, + secondaryOrder: CustomSortOrder.byMetadataFieldTrueAlphabeticalReverse, + byMetadataFieldSecondary: 'md2', + metadataFieldSecondaryValueExtractor: _unitTests.extractorFnForDate_mmddyyyy + }, { + type: CustomSortGroupType.Outsiders + }], + targetFoldersPaths: ['mock-folder'], + outsidersGroupIdx: 1 + } +} + describe('SortingSpecProcessor', () => { let processor: SortingSpecProcessor; beforeEach(() => { @@ -449,6 +517,16 @@ describe('SortingSpecProcessor', () => { const result = processor.parseSortSpecFromText(inputTxtArr, 'mock-folder', 'custom-name-note.md') expect(result?.sortSpecByPath).toEqual(expectedSortSpecsExampleSortingSymbols) }) + it('should generate correct SortSpecs (example with mdata extractors)', () => { + const inputTxtArr: Array = txtInputExampleMDataExtractors1.split('\n') + const result = processor.parseSortSpecFromText(inputTxtArr, 'mock-folder', 'custom-name-note.md') + expect(result?.sortSpecByPath).toEqual(expectedSortSpecsExampleMDataExtractors1) + }) + it('should generate correct SortSpecs (example with mdata extractors, advanced)', () => { + const inputTxtArr: Array = txtInputExampleMDataExtractors2.split('\n') + const result = processor.parseSortSpecFromText(inputTxtArr, 'mock-folder', 'custom-name-note.md') + expect(result?.sortSpecByPath).toEqual(expectedSortSpecsExampleMDataExtractors2) + }) }) const txtInputNotDuplicatedSortSpec: string = ` @@ -2922,6 +3000,17 @@ describe('SortingSpecProcessor error detection and reporting', () => { `${ERR_PREFIX} 7:InvalidAttributeValue Secondary sorting direction order-asc: and desc are contradicting ${ERR_SUFFIX_IN_LINE(2)}`) expect(errorsLogger).toHaveBeenNthCalledWith(2, ERR_LINE_TXT('sorting: standard, order-asc: modified desc by-metadata: xyz // <-- and it is checked earlier than the by-metadata incompatible order')) }) + it('should reject unknown value extractor', () => { + const inputTxtArr: Array = ` + < a-z. by-metadata: created by using-extractor: date(mm/dd/YYYY) + `.replace(/\t/gi, '').split('\n') + const result = processor.parseSortSpecFromText(inputTxtArr, 'mock-folder', 'custom-name-note.md') + expect(result).toBeNull() + expect(errorsLogger).toHaveBeenCalledTimes(2) + expect(errorsLogger).toHaveBeenNthCalledWith(1, + `${ERR_PREFIX} 7:InvalidAttributeValue Primary sorting order contains unrecognized value extractor: >>> date(mm/dd/YYYY) <<< ${ERR_SUFFIX_IN_LINE(2)}`) + expect(errorsLogger).toHaveBeenNthCalledWith(2, ERR_LINE_TXT('< a-z. by-metadata: created by using-extractor: date(mm/dd/YYYY)')) + }) }) const txtInputTargetFolderCCC: string = `