From 42a5f1feb27a8bbb7767bc0b8f11f89eda17215c Mon Sep 17 00:00:00 2001 From: SebastianMC <23032356+SebastianMC@users.noreply.github.com> Date: Sun, 3 Nov 2024 22:56:55 +0100 Subject: [PATCH 1/5] #171 - started creation of a PoC of the idea of metadata value extractors. At a glance a low hanging fruit turned out to be far too complex to be worth it. --- src/custom-sort/matchers.ts | 38 +++++++++++----- src/custom-sort/mdata-extractors.ts | 54 +++++++++++++++++++++++ src/custom-sort/sorting-spec-processor.ts | 37 ++++++++++++++-- src/test/unit/mdata-extractors.spec.ts | 38 ++++++++++++++++ 4 files changed, 154 insertions(+), 13 deletions(-) create mode 100644 src/custom-sort/mdata-extractors.ts create mode 100644 src/test/unit/mdata-extractors.spec.ts diff --git a/src/custom-sort/matchers.ts b/src/custom-sort/matchers.ts index b92c65c..6fda71e 100644 --- a/src/custom-sort/matchers.ts +++ b/src/custom-sort/matchers.ts @@ -104,17 +104,35 @@ export function getNormalizedRomanNumber(s: string, separator?: string, places?: } } -const DAY_POSITIONS = '00'.length -const MONTH_POSITIONS = '00'.length -const YEAR_POSITIONS = '0000'.length +export const DAY_POSITIONS = '00'.length +export const MONTH_POSITIONS = '00'.length +export const YEAR_POSITIONS = '0000'.length const MONTHS = ['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec'] -export function getNormalizedDate_dd_Mmm_yyyy_NormalizerFn(s: string): string | null { - // Assumption - the regex date matched against input s, no extensive defensive coding needed - const components = s.split('-') - const day = prependWithZeros(components[0], DAY_POSITIONS) - const month = prependWithZeros( `${1 + MONTHS.indexOf(components[1])}`, MONTH_POSITIONS) - const year = prependWithZeros(components[2], YEAR_POSITIONS) - return `${year}-${month}-${day}//` +export function getNormalizedDate_NormalizerFn_for(separator: string, dayIdx: number, monthIdx: number, yearIdx: number, months?: string[]) { + return (s: string): string | null => { + // Assumption - the regex date matched against input s, no extensive defensive coding needed + const components = s.split(separator) + const day = prependWithZeros(components[dayIdx], DAY_POSITIONS) + const monthValue = months ? `${1 + MONTHS.indexOf(components[monthIdx])}` : components[monthIdx] + const month = prependWithZeros(monthValue, MONTH_POSITIONS) + const year = prependWithZeros(components[yearIdx], YEAR_POSITIONS) + return `${year}-${month}-${day}//` + } } + +export const getNormalizedDate_dd_Mmm_yyyy_NormalizerFn = getNormalizedDate_NormalizerFn_for('-', 0, 1, 2, MONTHS) + +/* +// Assumption - the regex date matched against input s, no extensive defensive coding needed +const components = s.split('-') +const day = prependWithZeros(components[0], DAY_POSITIONS) +const month = prependWithZeros( `${1 + MONTHS.indexOf(components[1])}`, MONTH_POSITIONS) +const year = prependWithZeros(components[2], YEAR_POSITIONS) +return `${year}-${month}-${day}//` + + */ + + + diff --git a/src/custom-sort/mdata-extractors.ts b/src/custom-sort/mdata-extractors.ts new file mode 100644 index 0000000..6e761c0 --- /dev/null +++ b/src/custom-sort/mdata-extractors.ts @@ -0,0 +1,54 @@ +import { + getNormalizedDate_NormalizerFn_for +} from "./matchers"; + +const DateExtractorSpecPattern1 = 'date(dd/mm/yyyy)' +const DateExtractorRegex1 = new RegExp('\\d{2}/\\d{2}/\\d{4}') +const DateExtractorNormalizer1 = getNormalizedDate_NormalizerFn_for('/', 0, 1, 2) +const DateExtractorSpecPattern2 = 'date(mm/dd/yyyy)' +const DateExtractorRegex2 = new RegExp('\\d{2}/\\d{2}/\\d{4}') +const DateExtractorNormalizer2 = getNormalizedDate_NormalizerFn_for('/', 1, 0, 2) + +export interface MDataExtractor { + (mdataValue: string): string|undefined +} + +export interface MDataExtractorParseResult { + m: MDataExtractor + remainder: string +} + +export const tryParseAsMDataExtractorSpec = (s: string): MDataExtractorParseResult|undefined => { + // Simplistic initial implementation of the idea with hardcoded two extractors + if (s.trim().startsWith(DateExtractorSpecPattern1)) { + return { + m: extractorForPattern1, + remainder: s.substring(DateExtractorSpecPattern1.length).trim() + } + } + if (s.trim().startsWith(DateExtractorSpecPattern2)) { + return { + m: extractorForPattern2, + remainder: s.substring(DateExtractorSpecPattern2.length).trim() + } + } + return undefined +} + +export function extractorForPattern1(mdataValue: string): string|undefined { + const hasDate = mdataValue?.match(DateExtractorRegex1) + if (hasDate && hasDate[0]) { + return DateExtractorNormalizer1(hasDate[0]) ?? undefined + } else { + return undefined + } +} + +export function extractorForPattern2(mdataValue: string): string|undefined { + const hasDate = mdataValue?.match(DateExtractorRegex2) + if (hasDate && hasDate[0]) { + return DateExtractorNormalizer2(hasDate[0]) ?? undefined + } else { + return undefined + } +} diff --git a/src/custom-sort/sorting-spec-processor.ts b/src/custom-sort/sorting-spec-processor.ts index 537dcc1..4850b74 100644 --- a/src/custom-sort/sorting-spec-processor.ts +++ b/src/custom-sort/sorting-spec-processor.ts @@ -33,6 +33,10 @@ import { MATCH_CHILDREN_2_SUFFIX, NO_PRIORITY } from "./folder-matching-rules" +import { + MDataExtractor, + tryParseAsMDataExtractorSpec +} from "./mdata-extractors"; interface ProcessingContext { folderPath: string @@ -1497,10 +1501,30 @@ export class SortingSpecProcessor { orderSpec = hasDirectionPostfix ? orderSpec.substring(hasDirectionPostfix.lexeme.length).trim() : orderSpec let metadataName: string|undefined + let metadataExtractor: MDataExtractor|undefined if (orderSpec.startsWith(OrderByMetadataLexeme)) { applyToMetadata = true - metadataName = orderSpec.substring(OrderByMetadataLexeme.length).trim() || undefined - orderSpec = '' // metadataName is unparsed, consumes the remainder string, even if malformed, e.g. with infix spaces + const metadataNameAndOptionalExtractorSpec = orderSpec.substring(OrderByMetadataLexeme.length).trim() || undefined + if (metadataNameAndOptionalExtractorSpec) { + if (metadataNameAndOptionalExtractorSpec.indexOf(' ') > -1) { + const metadataSpec = metadataNameAndOptionalExtractorSpec.split(' ') + metadataName = metadataSpec.shift() + const metadataExtractorSpec = metadataSpec?.shift() + const hasMetadataExtractor = metadataExtractorSpec ? tryParseAsMDataExtractorSpec(metadataExtractorSpec) : undefined + if (hasMetadataExtractor) { + metadataExtractor = hasMetadataExtractor.m + } else { + // TODO: raise error of syntax error - metadata name followed by unrecognized text + // take into account all of the texts resulting from the split(' ') - there could be more segments + } + orderSpec = '' // Intentionally ignore anything beyond the metadata name and extractor + } else { + metadataName = metadataNameAndOptionalExtractorSpec + orderSpec = '' // Intentionally ignore anything beyond the metadata name (and no known extractor) + } + } else { + orderSpec = '' + } } // check for any superfluous text @@ -1553,7 +1577,14 @@ export class SortingSpecProcessor { } sortOrderSpec[level] = { order: order!, - byMetadataField: metadataName + byMetadataField: metadataName, + + metadataFieldExtractor: metadataExtractor + + ... and the carry the metadataFieldExtractor attribute down the parser, handle correctly in the 4-levels mdata sorting options + and execute at runtime + + Seems to be far too complex to be worth it. } } return sortOrderSpec diff --git a/src/test/unit/mdata-extractors.spec.ts b/src/test/unit/mdata-extractors.spec.ts new file mode 100644 index 0000000..1a114d2 --- /dev/null +++ b/src/test/unit/mdata-extractors.spec.ts @@ -0,0 +1,38 @@ +import { + extractorForPattern1 +} from '../../custom-sort/mdata-extractors' + +describe('extractorForPattern1', () => { + const params = [ + // Positive + ['03/05/2019', '2019-05-03//'], + ['Created at: 03/05/2019', '2019-05-03//'], + ['03/05/2019 | 22:00', '2019-05-03//'], + ['Created at: 03/05/2019 | 22:00', '2019-05-03//'], + + // TODO: more positive then negative examples + + ['13-Jan-2012', '2012-01-13//'], + ['3-Feb-2', '0002-02-03//'], + ['1-Mar-1900', '1900-03-01//'], + ['42-Apr-9999', '9999-04-42//'], + ['0-May-0', '0000-05-00//'], + ['21-Jun-2024', '2024-06-21//'], + ['7-Jul-1872', '1872-07-07//'], + ['15-Aug-1234', '1234-08-15//'], + ['1234-Sep-7777', '7777-09-1234//'], + ['3-Oct-2023', '2023-10-03//'], + ['8-Nov-2022', '2022-11-08//'], + ['18-Dec-2021', '2021-12-18//'], + // Negative + ['88-Dec-2012', '2012-12-88//'], // Invalid case, Regexp on matcher in the caller should guard against this + ['13-JANUARY-2012', '2012-00-13//'], // Invalid case, Regexp on matcher in the caller should guard against this + ['1 .1', '0000-00-1 .1//'], // Invalid case, Regexp on matcher in the caller should guard against this + ['', '0000-00-00//'], // Invalid case, Regexp on matcher in the caller should guard against this + ['abc', '0000-00-abc//'], // Invalid case, Regexp on matcher in the caller should guard against this + ['def-abc', '0000-00-def//'], // Invalid case, Regexp on matcher in the caller should guard against this + ]; + it.each(params)('>%s< should become %s', (s: string, out: string) => { + expect(extractorForPattern1(s)).toBe(out) + }) +}) From d82a80c0633f36be48df920664d3ee69627750a5 Mon Sep 17 00:00:00 2001 From: SebastianMC <23032356+SebastianMC@users.noreply.github.com> Date: Sun, 3 Nov 2024 23:16:57 +0100 Subject: [PATCH 2/5] #171 - a PoC of the idea of metadata value extractors. Completed the sorting spec parser part. --- src/custom-sort/custom-sort-types.ts | 4 ++++ src/custom-sort/sorting-spec-processor.ts | 11 ++++------- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/src/custom-sort/custom-sort-types.ts b/src/custom-sort/custom-sort-types.ts index 678b432..33d7d26 100644 --- a/src/custom-sort/custom-sort-types.ts +++ b/src/custom-sort/custom-sort-types.ts @@ -1,3 +1,5 @@ +import {MDataExtractor} from "./mdata-extractors"; + export enum CustomSortGroupType { Outsiders, // Not belonging to any of other groups MatchAll, // like a wildard *, used in connection with foldersOnly or filesOnly. The difference between the MatchAll and Outsiders is @@ -51,8 +53,10 @@ export enum CustomSortOrder { export interface RecognizedOrderValue { order: CustomSortOrder applyToMetadataField?: string + metadataValueExtractor?: MDataExtractor secondaryOrder?: CustomSortOrder secondaryApplyToMetadataField?: string + secondaryMetadataValueExtractor?: MDataExtractor } export type NormalizerFn = (s: string) => string | null diff --git a/src/custom-sort/sorting-spec-processor.ts b/src/custom-sort/sorting-spec-processor.ts index 4850b74..8929d82 100644 --- a/src/custom-sort/sorting-spec-processor.ts +++ b/src/custom-sort/sorting-spec-processor.ts @@ -116,6 +116,7 @@ interface CustomSortOrderAscDescPair { interface CustomSortOrderSpec { order: CustomSortOrder byMetadataField?: string + metadataFieldExtractor?: MDataExtractor } const MAX_SORT_LEVEL: number = 1 @@ -1578,13 +1579,7 @@ export class SortingSpecProcessor { sortOrderSpec[level] = { order: order!, byMetadataField: metadataName, - metadataFieldExtractor: metadataExtractor - - ... and the carry the metadataFieldExtractor attribute down the parser, handle correctly in the 4-levels mdata sorting options - and execute at runtime - - Seems to be far too complex to be worth it. } } return sortOrderSpec @@ -1595,8 +1590,10 @@ export class SortingSpecProcessor { return recognized ? (recognized instanceof AttrError ? recognized : { order: recognized[0].order, applyToMetadataField: recognized[0].byMetadataField, + metadataValueExtractor: recognized[0].metadataFieldExtractor, secondaryOrder: recognized[1]?.order, - secondaryApplyToMetadataField: recognized[1]?.byMetadataField + secondaryApplyToMetadataField: recognized[1]?.byMetadataField, + secondaryMetadataValueExtractor: recognized[1]?.metadataFieldExtractor }) : null; } From f210a412d378b02bf8655bdc2ce419a23eb0726f Mon Sep 17 00:00:00 2001 From: SebastianMC <23032356+SebastianMC@users.noreply.github.com> Date: Mon, 4 Nov 2024 14:30:49 +0100 Subject: [PATCH 3/5] #171 - a PoC of the idea of metadata value extractors. Working version, missing errors detection for new scenarios --- manifest.json | 2 +- src/custom-sort/custom-sort-types.ts | 8 ++++-- src/custom-sort/custom-sort.ts | 30 ++++++++++++++++++----- src/custom-sort/sorting-spec-processor.ts | 4 +++ 4 files changed, 35 insertions(+), 9 deletions(-) diff --git a/manifest.json b/manifest.json index 2bd3af2..a90d7fa 100644 --- a/manifest.json +++ b/manifest.json @@ -1,7 +1,7 @@ { "id": "custom-sort", "name": "Custom File Explorer sorting", - "version": "2.1.14", + "version": "2.1.14-beta-171", "minAppVersion": "0.16.2", "description": "Allows for manual and automatic, config-driven reordering and sorting of files and folders in File Explorer", "author": "SebastianMC", diff --git a/src/custom-sort/custom-sort-types.ts b/src/custom-sort/custom-sort-types.ts index 33d7d26..e2afd46 100644 --- a/src/custom-sort/custom-sort-types.ts +++ b/src/custom-sort/custom-sort-types.ts @@ -75,9 +75,11 @@ export interface CustomSortGroup { exactSuffix?: string regexSuffix?: RegExpSpec order?: CustomSortOrder - byMetadataField?: string // for 'by-metadata:' sorting if the order is by metadata alphabetical or reverse + byMetadataField?: string // for 'by-metadata:' sorting if the order is by metadata alphabetical or reverse + metadataFieldValueExtractor?: MDataExtractor // and its sorting value extractor secondaryOrder?: CustomSortOrder byMetadataFieldSecondary?: string // for 'by-metadata:' sorting if the order is by metadata alphabetical or reverse + metadataFieldSecondaryValueExtractor?: MDataExtractor filesOnly?: boolean matchFilenameWithExt?: boolean foldersOnly?: boolean @@ -92,8 +94,10 @@ export interface CustomSortSpec { targetFoldersPaths: Array // For root use '/' defaultOrder?: CustomSortOrder defaultSecondaryOrder?: CustomSortOrder - byMetadataField?: string // for 'by-metadata:' if the defaultOrder is by metadata + byMetadataField?: string // for 'by-metadata:' if the defaultOrder is by metadata + metadataFieldValueExtractor?: MDataExtractor // and its sorting value extractor byMetadataFieldSecondary?: string + metadataFieldSecondaryValueExtractor?: MDataExtractor groups: Array groupsShadow?: Array // A shallow copy of groups, used at applying sorting for items in a folder. // Stores folder-specific values (e.g. macros expanded with folder-specific values) diff --git a/src/custom-sort/custom-sort.ts b/src/custom-sort/custom-sort.ts index d320f56..6da64a2 100644 --- a/src/custom-sort/custom-sort.ts +++ b/src/custom-sort/custom-sort.ts @@ -35,6 +35,7 @@ import { BookmarksPluginInterface } from "../utils/BookmarksCorePluginSignature"; import {CustomSortPluginAPI} from "../custom-sort-plugin"; +import {MDataExtractor} from "./mdata-extractors"; export interface ProcessingContext { // For internal transient use @@ -372,13 +373,14 @@ export const matchGroupRegex = (theRegex: RegExpSpec, nameForMatching: string): return [false, undefined, undefined] } -const mdataValueFromFMCaches = (mdataFieldName: string, fc?: FrontMatterCache, fcPrio?: FrontMatterCache): any => { +const mdataValueFromFMCaches = (mdataFieldName: string, mdataExtractor?: MDataExtractor, fc?: FrontMatterCache, fcPrio?: FrontMatterCache): any => { let prioValue = undefined if (fcPrio) { prioValue = fcPrio?.[mdataFieldName] } - return prioValue ?? fc?.[mdataFieldName] + const rawMDataValue = prioValue ?? fc?.[mdataFieldName] + return mdataExtractor ? mdataExtractor(rawMDataValue) : rawMDataValue } export const determineSortingGroup = function (entry: TFile | TFolder, spec: CustomSortSpec, ctx?: ProcessingContext): FolderItemForSorting { @@ -583,13 +585,29 @@ export const determineSortingGroup = function (entry: TFile | TFolder, spec: Cus } } if (isPrimaryOrderByMetadata) metadataValueToSortBy = - mdataValueFromFMCaches (group?.byMetadataField || group?.withMetadataFieldName || DEFAULT_METADATA_FIELD_FOR_SORTING, frontMatterCache, prioFrontMatterCache) + mdataValueFromFMCaches ( + group?.byMetadataField || group?.withMetadataFieldName || DEFAULT_METADATA_FIELD_FOR_SORTING, + group?.metadataFieldValueExtractor, + frontMatterCache, + prioFrontMatterCache) if (isSecondaryOrderByMetadata) metadataValueSecondaryToSortBy = - mdataValueFromFMCaches (group?.byMetadataFieldSecondary || group?.withMetadataFieldName || DEFAULT_METADATA_FIELD_FOR_SORTING, frontMatterCache, prioFrontMatterCache) + mdataValueFromFMCaches ( + group?.byMetadataFieldSecondary || group?.withMetadataFieldName || DEFAULT_METADATA_FIELD_FOR_SORTING, + group?.metadataFieldSecondaryValueExtractor, + frontMatterCache, + prioFrontMatterCache) if (isDerivedPrimaryByMetadata) metadataValueDerivedPrimaryToSortBy = - mdataValueFromFMCaches (spec.byMetadataField || DEFAULT_METADATA_FIELD_FOR_SORTING, frontMatterCache, prioFrontMatterCache) + mdataValueFromFMCaches ( + spec.byMetadataField || DEFAULT_METADATA_FIELD_FOR_SORTING, + spec.metadataFieldValueExtractor, + frontMatterCache, + prioFrontMatterCache) if (isDerivedSecondaryByMetadata) metadataValueDerivedSecondaryToSortBy = - mdataValueFromFMCaches (spec.byMetadataFieldSecondary || DEFAULT_METADATA_FIELD_FOR_SORTING, frontMatterCache, prioFrontMatterCache) + mdataValueFromFMCaches ( + spec.byMetadataFieldSecondary || DEFAULT_METADATA_FIELD_FOR_SORTING, + spec.metadataFieldSecondaryValueExtractor, + frontMatterCache, + prioFrontMatterCache) } } } diff --git a/src/custom-sort/sorting-spec-processor.ts b/src/custom-sort/sorting-spec-processor.ts index 8929d82..2aabef5 100644 --- a/src/custom-sort/sorting-spec-processor.ts +++ b/src/custom-sort/sorting-spec-processor.ts @@ -1086,8 +1086,10 @@ export class SortingSpecProcessor { } this.ctx.currentSpec.defaultOrder = (attr.value as RecognizedOrderValue).order this.ctx.currentSpec.byMetadataField = (attr.value as RecognizedOrderValue).applyToMetadataField + this.ctx.currentSpec.metadataFieldValueExtractor = (attr.value as RecognizedOrderValue).metadataValueExtractor this.ctx.currentSpec.defaultSecondaryOrder = (attr.value as RecognizedOrderValue).secondaryOrder this.ctx.currentSpec.byMetadataFieldSecondary = (attr.value as RecognizedOrderValue).secondaryApplyToMetadataField + this.ctx.currentSpec.metadataFieldSecondaryValueExtractor = (attr.value as RecognizedOrderValue).secondaryMetadataValueExtractor return true; } else if (attr.nesting > 0) { // For now only distinguishing nested (indented) and not-nested (not-indented), the depth doesn't matter if (!this.ctx.currentSpec || !this.ctx.currentSpecGroup) { @@ -1101,8 +1103,10 @@ export class SortingSpecProcessor { } this.ctx.currentSpecGroup.order = (attr.value as RecognizedOrderValue).order this.ctx.currentSpecGroup.byMetadataField = (attr.value as RecognizedOrderValue).applyToMetadataField + this.ctx.currentSpecGroup.metadataFieldValueExtractor = (attr.value as RecognizedOrderValue).metadataValueExtractor this.ctx.currentSpecGroup.secondaryOrder = (attr.value as RecognizedOrderValue).secondaryOrder this.ctx.currentSpecGroup.byMetadataFieldSecondary = (attr.value as RecognizedOrderValue).secondaryApplyToMetadataField + this.ctx.currentSpecGroup.metadataFieldSecondaryValueExtractor = (attr.value as RecognizedOrderValue).secondaryMetadataValueExtractor return true; } } From 9e2e12046d957b1256867bfce67c77b83041a617 Mon Sep 17 00:00:00 2001 From: SebastianMC <23032356+SebastianMC@users.noreply.github.com> Date: Tue, 5 Nov 2024 20:12:47 +0100 Subject: [PATCH 4/5] #171 - a PoC of the idea of metadata value extractors. Extended syntax, unit tests, error handling --- src/custom-sort/mdata-extractors.ts | 74 +++++++------ src/custom-sort/sorting-spec-processor.ts | 19 ++-- src/test/unit/mdata-extractors.spec.ts | 37 +++---- src/test/unit/sorting-spec-processor.spec.ts | 111 +++++++++++++++++-- 4 files changed, 166 insertions(+), 75 deletions(-) diff --git a/src/custom-sort/mdata-extractors.ts b/src/custom-sort/mdata-extractors.ts index 6e761c0..8d37ad4 100644 --- a/src/custom-sort/mdata-extractors.ts +++ b/src/custom-sort/mdata-extractors.ts @@ -1,13 +1,14 @@ import { getNormalizedDate_NormalizerFn_for } from "./matchers"; +import {NormalizerFn} from "./custom-sort-types"; -const DateExtractorSpecPattern1 = 'date(dd/mm/yyyy)' -const DateExtractorRegex1 = new RegExp('\\d{2}/\\d{2}/\\d{4}') -const DateExtractorNormalizer1 = getNormalizedDate_NormalizerFn_for('/', 0, 1, 2) -const DateExtractorSpecPattern2 = 'date(mm/dd/yyyy)' -const DateExtractorRegex2 = new RegExp('\\d{2}/\\d{2}/\\d{4}') -const DateExtractorNormalizer2 = getNormalizedDate_NormalizerFn_for('/', 1, 0, 2) +type ExtractorFn = (mdataValue: string) => string|undefined + +interface DateExtractorSpec { + specPattern: string|RegExp, + extractorFn: ExtractorFn +} export interface MDataExtractor { (mdataValue: string): string|undefined @@ -18,37 +19,46 @@ export interface MDataExtractorParseResult { remainder: string } -export const tryParseAsMDataExtractorSpec = (s: string): MDataExtractorParseResult|undefined => { - // Simplistic initial implementation of the idea with hardcoded two extractors - if (s.trim().startsWith(DateExtractorSpecPattern1)) { - return { - m: extractorForPattern1, - remainder: s.substring(DateExtractorSpecPattern1.length).trim() +function getGenericPlainRegexpExtractorFn(extractorRegexp: RegExp, extractedValueNormalizer: NormalizerFn) { + return (mdataValue: string): string | undefined => { + const hasMatch = mdataValue?.match(extractorRegexp) + if (hasMatch && hasMatch[0]) { + return extractedValueNormalizer(hasMatch[0]) ?? undefined + } else { + return undefined } } - if (s.trim().startsWith(DateExtractorSpecPattern2)) { - return { - m: extractorForPattern2, - remainder: s.substring(DateExtractorSpecPattern2.length).trim() +} + +const Extractors: DateExtractorSpec[] = [ + { specPattern: 'date(dd/mm/yyyy)', + extractorFn: getGenericPlainRegexpExtractorFn( + new RegExp('\\d{2}/\\d{2}/\\d{4}'), + getNormalizedDate_NormalizerFn_for('/', 0, 1, 2) + ) + }, { + specPattern: 'date(mm/dd/yyyy)', + extractorFn: getGenericPlainRegexpExtractorFn( + new RegExp('\\d{2}/\\d{2}/\\d{4}'), + getNormalizedDate_NormalizerFn_for('/', 1, 0, 2) + ) + } +] + +export const tryParseAsMDataExtractorSpec = (s: string): MDataExtractorParseResult|undefined => { + // Simplistic initial implementation of the idea with hardcoded two extractors + for (const extrSpec of Extractors) { + if ('string' === typeof extrSpec.specPattern && s.trim().startsWith(extrSpec.specPattern)) { + return { + m: extrSpec.extractorFn, + remainder: s.substring(extrSpec.specPattern.length).trim() + } } } return undefined } -export function extractorForPattern1(mdataValue: string): string|undefined { - const hasDate = mdataValue?.match(DateExtractorRegex1) - if (hasDate && hasDate[0]) { - return DateExtractorNormalizer1(hasDate[0]) ?? undefined - } else { - return undefined - } -} - -export function extractorForPattern2(mdataValue: string): string|undefined { - const hasDate = mdataValue?.match(DateExtractorRegex2) - if (hasDate && hasDate[0]) { - return DateExtractorNormalizer2(hasDate[0]) ?? undefined - } else { - return undefined - } +export const _unitTests = { + extractorFnForDate_ddmmyyyy: Extractors.find((it) => it.specPattern === 'date(dd/mm/yyyy)')?.extractorFn!, + extractorFnForDate_mmddyyyy: Extractors.find((it) => it.specPattern === 'date(mm/dd/yyyy)')?.extractorFn!, } diff --git a/src/custom-sort/sorting-spec-processor.ts b/src/custom-sort/sorting-spec-processor.ts index 2aabef5..591fea5 100644 --- a/src/custom-sort/sorting-spec-processor.ts +++ b/src/custom-sort/sorting-spec-processor.ts @@ -144,6 +144,8 @@ const OrderLiterals: { [key: string]: CustomSortOrderAscDescPair } = { const OrderByMetadataLexeme: string = 'by-metadata:' +const ValueExtractorLexeme: string = 'using-extractor:' + const OrderLevelsSeparator: string = ',' enum Attribute { @@ -1511,24 +1513,23 @@ export class SortingSpecProcessor { applyToMetadata = true const metadataNameAndOptionalExtractorSpec = orderSpec.substring(OrderByMetadataLexeme.length).trim() || undefined if (metadataNameAndOptionalExtractorSpec) { - if (metadataNameAndOptionalExtractorSpec.indexOf(' ') > -1) { - const metadataSpec = metadataNameAndOptionalExtractorSpec.split(' ') - metadataName = metadataSpec.shift() - const metadataExtractorSpec = metadataSpec?.shift() + if (metadataNameAndOptionalExtractorSpec.indexOf(ValueExtractorLexeme) > -1) { + const metadataSpec = metadataNameAndOptionalExtractorSpec.split(ValueExtractorLexeme) + metadataName = metadataSpec.shift()?.trim() + const metadataExtractorSpec = metadataSpec?.shift()?.trim() const hasMetadataExtractor = metadataExtractorSpec ? tryParseAsMDataExtractorSpec(metadataExtractorSpec) : undefined if (hasMetadataExtractor) { metadataExtractor = hasMetadataExtractor.m } else { - // TODO: raise error of syntax error - metadata name followed by unrecognized text - // take into account all of the texts resulting from the split(' ') - there could be more segments + return new AttrError(`${orderNameForErrorMsg} sorting order contains unrecognized value extractor: >>> ${metadataExtractorSpec} <<<`) } - orderSpec = '' // Intentionally ignore anything beyond the metadata name and extractor + orderSpec = '' // all consumed as metadata and extractor } else { metadataName = metadataNameAndOptionalExtractorSpec - orderSpec = '' // Intentionally ignore anything beyond the metadata name (and no known extractor) + orderSpec = '' // all consumed as metadata name } } else { - orderSpec = '' + orderSpec = '' // no metadata name found } } diff --git a/src/test/unit/mdata-extractors.spec.ts b/src/test/unit/mdata-extractors.spec.ts index 1a114d2..58973fd 100644 --- a/src/test/unit/mdata-extractors.spec.ts +++ b/src/test/unit/mdata-extractors.spec.ts @@ -1,38 +1,29 @@ import { - extractorForPattern1 + _unitTests } from '../../custom-sort/mdata-extractors' -describe('extractorForPattern1', () => { +describe('extractor for date(dd/mm/yyyy)', () => { const params = [ // Positive ['03/05/2019', '2019-05-03//'], + ['103/05/2019', '2019-05-03//'], + ['103/05/20193232', '2019-05-03//'], + ['99/99/9999', '9999-99-99//'], + ['00/00/0000', '0000-00-00//'], ['Created at: 03/05/2019', '2019-05-03//'], ['03/05/2019 | 22:00', '2019-05-03//'], ['Created at: 03/05/2019 | 22:00', '2019-05-03//'], - // TODO: more positive then negative examples - - ['13-Jan-2012', '2012-01-13//'], - ['3-Feb-2', '0002-02-03//'], - ['1-Mar-1900', '1900-03-01//'], - ['42-Apr-9999', '9999-04-42//'], - ['0-May-0', '0000-05-00//'], - ['21-Jun-2024', '2024-06-21//'], - ['7-Jul-1872', '1872-07-07//'], - ['15-Aug-1234', '1234-08-15//'], - ['1234-Sep-7777', '7777-09-1234//'], - ['3-Oct-2023', '2023-10-03//'], - ['8-Nov-2022', '2022-11-08//'], - ['18-Dec-2021', '2021-12-18//'], // Negative - ['88-Dec-2012', '2012-12-88//'], // Invalid case, Regexp on matcher in the caller should guard against this - ['13-JANUARY-2012', '2012-00-13//'], // Invalid case, Regexp on matcher in the caller should guard against this - ['1 .1', '0000-00-1 .1//'], // Invalid case, Regexp on matcher in the caller should guard against this - ['', '0000-00-00//'], // Invalid case, Regexp on matcher in the caller should guard against this - ['abc', '0000-00-abc//'], // Invalid case, Regexp on matcher in the caller should guard against this - ['def-abc', '0000-00-def//'], // Invalid case, Regexp on matcher in the caller should guard against this + ['88-Dec-2012', undefined], + ['13-JANUARY-2012', undefined], + ['1 .1', undefined], + ['', undefined], + ['abc', undefined], + ['def-abc', undefined], + ['3/5/2019', undefined], ]; it.each(params)('>%s< should become %s', (s: string, out: string) => { - expect(extractorForPattern1(s)).toBe(out) + expect(_unitTests.extractorFnForDate_ddmmyyyy(s)).toBe(out) }) }) diff --git a/src/test/unit/sorting-spec-processor.spec.ts b/src/test/unit/sorting-spec-processor.spec.ts index 3a7fbd1..922affb 100644 --- a/src/test/unit/sorting-spec-processor.spec.ts +++ b/src/test/unit/sorting-spec-processor.spec.ts @@ -4,7 +4,8 @@ import { CompoundDotNumberNormalizerFn, ConsumedFolderMatchingRegexp, consumeFolderByRegexpExpression, - convertPlainStringToRegex, Date_dd_Mmm_yyyy_NormalizerFn, + convertPlainStringToRegex, + Date_dd_Mmm_yyyy_NormalizerFn, detectSortingSymbols, escapeRegexUnsafeCharacters, extractSortingSymbol, @@ -14,8 +15,14 @@ import { RomanNumberNormalizerFn, SortingSpecProcessor } from "../../custom-sort/sorting-spec-processor" -import {CustomSortGroupType, CustomSortOrder, CustomSortSpec, IdentityNormalizerFn} from "../../custom-sort/custom-sort-types"; +import { + CustomSortGroupType, + CustomSortOrder, + CustomSortSpec, + IdentityNormalizerFn +} from "../../custom-sort/custom-sort-types"; import {FolderMatchingRegexp, FolderMatchingTreeNode} from "../../custom-sort/folder-matching-rules"; +import {_unitTests} from "../../custom-sort/mdata-extractors"; const txtInputExampleA: string = ` order-asc: a-z @@ -356,6 +363,17 @@ const expectedSortSpecsExampleA: { [key: string]: CustomSortSpec } = { } } +const txtInputExampleSortingSymbols: string = ` +/folders Chapter \\.d+ ... +/:files ...section \\-r+. +% Appendix \\-d+ (attachments) +Plain syntax\\R+ ... works? +And this kind of... \\D+plain syntax??? +Here goes ASCII word \\a+ +\\A+. is for any modern language word +\\[dd-Mmm-yyyy] for the specific date format of 12-Apr-2024 +` + const expectedSortSpecsExampleSortingSymbols: { [key: string]: CustomSortSpec } = { "mock-folder": { groups: [{ @@ -418,17 +436,67 @@ const expectedSortSpecsExampleSortingSymbols: { [key: string]: CustomSortSpec } } } -const txtInputExampleSortingSymbols: string = ` -/folders Chapter \\.d+ ... -/:files ...section \\-r+. -% Appendix \\-d+ (attachments) -Plain syntax\\R+ ... works? -And this kind of... \\D+plain syntax??? -Here goes ASCII word \\a+ -\\A+. is for any modern language word -\\[dd-Mmm-yyyy] for the specific date format of 12-Apr-2024 +const txtInputExampleMDataExtractors1: string = ` +< a-z by-metadata: created-by using-extractor: date(dd/mm/yyyy) +/folders Chapter... + > a-z by-metadata: updated-on using-extractor: date(mm/dd/yyyy) ` +// Tricky elements captured: +// - Order a-z. for by metadata is transformed to a-z (there is no notion of 'file extension' in metadata values) + +const txtInputExampleMDataExtractors2: string = ` +< a-z. by-metadata: created by using-extractor: date(mm/dd/yyyy), < true a-z. by-metadata: using-extractor: date(dd/mm/yyyy) +/folders ...Chapter + > a-z. by-metadata: updated-on using-extractor: date(dd/mm/yyyy), > true a-z by-metadata: md2 using-extractor: date(mm/dd/yyyy) +` + +const expectedSortSpecsExampleMDataExtractors1: { [key: string]: CustomSortSpec } = { + "mock-folder": { + defaultOrder: CustomSortOrder.byMetadataFieldAlphabetical, + byMetadataField: 'created-by', + metadataFieldValueExtractor: _unitTests.extractorFnForDate_ddmmyyyy, + groups: [{ + foldersOnly: true, + type: CustomSortGroupType.ExactPrefix, + exactPrefix: 'Chapter', + order: CustomSortOrder.byMetadataFieldAlphabeticalReverse, + byMetadataField: 'updated-on', + metadataFieldValueExtractor: _unitTests.extractorFnForDate_mmddyyyy + }, { + type: CustomSortGroupType.Outsiders + }], + targetFoldersPaths: ['mock-folder'], + outsidersGroupIdx: 1 + } +} + +const expectedSortSpecsExampleMDataExtractors2: { [key: string]: CustomSortSpec } = { + "mock-folder": { + defaultOrder: CustomSortOrder.byMetadataFieldAlphabetical, + byMetadataField: 'created by', + metadataFieldValueExtractor: _unitTests.extractorFnForDate_mmddyyyy, + defaultSecondaryOrder: CustomSortOrder.byMetadataFieldTrueAlphabetical, + byMetadataFieldSecondary: '', + metadataFieldSecondaryValueExtractor: _unitTests.extractorFnForDate_ddmmyyyy, + groups: [{ + foldersOnly: true, + type: CustomSortGroupType.ExactSuffix, + exactSuffix: 'Chapter', + order: CustomSortOrder.byMetadataFieldAlphabeticalReverse, + byMetadataField: 'updated-on', + metadataFieldValueExtractor: _unitTests.extractorFnForDate_ddmmyyyy, + secondaryOrder: CustomSortOrder.byMetadataFieldTrueAlphabeticalReverse, + byMetadataFieldSecondary: 'md2', + metadataFieldSecondaryValueExtractor: _unitTests.extractorFnForDate_mmddyyyy + }, { + type: CustomSortGroupType.Outsiders + }], + targetFoldersPaths: ['mock-folder'], + outsidersGroupIdx: 1 + } +} + describe('SortingSpecProcessor', () => { let processor: SortingSpecProcessor; beforeEach(() => { @@ -449,6 +517,16 @@ describe('SortingSpecProcessor', () => { const result = processor.parseSortSpecFromText(inputTxtArr, 'mock-folder', 'custom-name-note.md') expect(result?.sortSpecByPath).toEqual(expectedSortSpecsExampleSortingSymbols) }) + it('should generate correct SortSpecs (example with mdata extractors)', () => { + const inputTxtArr: Array = txtInputExampleMDataExtractors1.split('\n') + const result = processor.parseSortSpecFromText(inputTxtArr, 'mock-folder', 'custom-name-note.md') + expect(result?.sortSpecByPath).toEqual(expectedSortSpecsExampleMDataExtractors1) + }) + it('should generate correct SortSpecs (example with mdata extractors, advanced)', () => { + const inputTxtArr: Array = txtInputExampleMDataExtractors2.split('\n') + const result = processor.parseSortSpecFromText(inputTxtArr, 'mock-folder', 'custom-name-note.md') + expect(result?.sortSpecByPath).toEqual(expectedSortSpecsExampleMDataExtractors2) + }) }) const txtInputNotDuplicatedSortSpec: string = ` @@ -2922,6 +3000,17 @@ describe('SortingSpecProcessor error detection and reporting', () => { `${ERR_PREFIX} 7:InvalidAttributeValue Secondary sorting direction order-asc: and desc are contradicting ${ERR_SUFFIX_IN_LINE(2)}`) expect(errorsLogger).toHaveBeenNthCalledWith(2, ERR_LINE_TXT('sorting: standard, order-asc: modified desc by-metadata: xyz // <-- and it is checked earlier than the by-metadata incompatible order')) }) + it('should reject unknown value extractor', () => { + const inputTxtArr: Array = ` + < a-z. by-metadata: created by using-extractor: date(mm/dd/YYYY) + `.replace(/\t/gi, '').split('\n') + const result = processor.parseSortSpecFromText(inputTxtArr, 'mock-folder', 'custom-name-note.md') + expect(result).toBeNull() + expect(errorsLogger).toHaveBeenCalledTimes(2) + expect(errorsLogger).toHaveBeenNthCalledWith(1, + `${ERR_PREFIX} 7:InvalidAttributeValue Primary sorting order contains unrecognized value extractor: >>> date(mm/dd/YYYY) <<< ${ERR_SUFFIX_IN_LINE(2)}`) + expect(errorsLogger).toHaveBeenNthCalledWith(2, ERR_LINE_TXT('< a-z. by-metadata: created by using-extractor: date(mm/dd/YYYY)')) + }) }) const txtInputTargetFolderCCC: string = ` From 99cea923221814be5c127efffe67fa58ca2d9350 Mon Sep 17 00:00:00 2001 From: SebastianMC <23032356+SebastianMC@users.noreply.github.com> Date: Wed, 6 Nov 2024 10:00:23 +0100 Subject: [PATCH 5/5] #171 - a PoC of the idea of metadata value extractors. Unit tests. --- src/test/unit/mdata-extractors.spec.ts | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/src/test/unit/mdata-extractors.spec.ts b/src/test/unit/mdata-extractors.spec.ts index 58973fd..721e623 100644 --- a/src/test/unit/mdata-extractors.spec.ts +++ b/src/test/unit/mdata-extractors.spec.ts @@ -27,3 +27,29 @@ describe('extractor for date(dd/mm/yyyy)', () => { expect(_unitTests.extractorFnForDate_ddmmyyyy(s)).toBe(out) }) }) + +describe('extractor for date(mm/dd/yyyy)', () => { + const params = [ + // Positive + ['03/05/2019', '2019-03-05//'], + ['103/05/2019', '2019-03-05//'], + ['103/05/20193232', '2019-03-05//'], + ['99/99/9999', '9999-99-99//'], + ['00/00/0000', '0000-00-00//'], + ['Created at: 03/05/2019', '2019-03-05//'], + ['03/05/2019 | 22:00', '2019-03-05//'], + ['Created at: 03/05/2019 | 22:00', '2019-03-05//'], + + // Negative + ['88-Dec-2012', undefined], + ['13-JANUARY-2012', undefined], + ['1 .1', undefined], + ['', undefined], + ['abc', undefined], + ['def-abc', undefined], + ['3/5/2019', undefined], + ]; + it.each(params)('>%s< should become %s', (s: string, out: string) => { + expect(_unitTests.extractorFnForDate_mmddyyyy(s)).toBe(out) + }) +})