Merge pull request #172 from SebastianMC/171-poc-metadata-value-extractors-idea

#171 metadata value extractors
This commit is contained in:
SebastianMC 2025-01-03 20:10:38 +01:00 committed by GitHub
commit de15f48add
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 279 additions and 13 deletions

View File

@ -1,3 +1,5 @@
import {MDataExtractor} from "./mdata-extractors";
export enum CustomSortGroupType {
Outsiders, // Not belonging to any of other groups
MatchAll, // like a wildard *, used in connection with foldersOnly or filesOnly. The difference between the MatchAll and Outsiders is
@ -50,8 +52,10 @@ export enum CustomSortOrder {
export interface RecognizedOrderValue {
order: CustomSortOrder
applyToMetadataField?: string
metadataValueExtractor?: MDataExtractor
secondaryOrder?: CustomSortOrder
secondaryApplyToMetadataField?: string
secondaryMetadataValueExtractor?: MDataExtractor
}
export type NormalizerFn = (s: string) => string | null
@ -71,8 +75,10 @@ export interface CustomSortGroup {
regexSuffix?: RegExpSpec
order?: CustomSortOrder
byMetadataField?: string // for 'by-metadata:' sorting if the order is by metadata alphabetical or reverse
metadataFieldValueExtractor?: MDataExtractor // and its sorting value extractor
secondaryOrder?: CustomSortOrder
byMetadataFieldSecondary?: string // for 'by-metadata:' sorting if the order is by metadata alphabetical or reverse
metadataFieldSecondaryValueExtractor?: MDataExtractor
filesOnly?: boolean
matchFilenameWithExt?: boolean
foldersOnly?: boolean
@ -88,7 +94,9 @@ export interface CustomSortSpec {
defaultOrder?: CustomSortOrder
defaultSecondaryOrder?: CustomSortOrder
byMetadataField?: string // for 'by-metadata:' if the defaultOrder is by metadata
metadataFieldValueExtractor?: MDataExtractor // and its sorting value extractor
byMetadataFieldSecondary?: string
metadataFieldSecondaryValueExtractor?: MDataExtractor
groups: Array<CustomSortGroup>
groupsShadow?: Array<CustomSortGroup> // A shallow copy of groups, used at applying sorting for items in a folder.
// Stores folder-specific values (e.g. macros expanded with folder-specific values)

View File

@ -29,6 +29,7 @@ import {
BookmarksPluginInterface
} from "../utils/BookmarksCorePluginSignature";
import {CustomSortPluginAPI} from "../custom-sort-plugin";
import {MDataExtractor} from "./mdata-extractors";
export interface ProcessingContext {
// For internal transient use
@ -365,13 +366,14 @@ export const matchGroupRegex = (theRegex: RegExpSpec, nameForMatching: string):
return [false, undefined, undefined]
}
const mdataValueFromFMCaches = (mdataFieldName: string, fc?: FrontMatterCache, fcPrio?: FrontMatterCache): any => {
const mdataValueFromFMCaches = (mdataFieldName: string, mdataExtractor?: MDataExtractor, fc?: FrontMatterCache, fcPrio?: FrontMatterCache): any => {
let prioValue = undefined
if (fcPrio) {
prioValue = fcPrio?.[mdataFieldName]
}
return prioValue ?? fc?.[mdataFieldName]
const rawMDataValue = prioValue ?? fc?.[mdataFieldName]
return mdataExtractor ? mdataExtractor(rawMDataValue) : rawMDataValue
}
export const determineSortingGroup = function (entry: TFile | TFolder, spec: CustomSortSpec, ctx?: ProcessingContext): FolderItemForSorting {
@ -568,13 +570,29 @@ export const determineSortingGroup = function (entry: TFile | TFolder, spec: Cus
}
}
if (isPrimaryOrderByMetadata) metadataValueToSortBy =
mdataValueFromFMCaches (group?.byMetadataField || group?.withMetadataFieldName || DEFAULT_METADATA_FIELD_FOR_SORTING, frontMatterCache, prioFrontMatterCache)
mdataValueFromFMCaches (
group?.byMetadataField || group?.withMetadataFieldName || DEFAULT_METADATA_FIELD_FOR_SORTING,
group?.metadataFieldValueExtractor,
frontMatterCache,
prioFrontMatterCache)
if (isSecondaryOrderByMetadata) metadataValueSecondaryToSortBy =
mdataValueFromFMCaches (group?.byMetadataFieldSecondary || group?.withMetadataFieldName || DEFAULT_METADATA_FIELD_FOR_SORTING, frontMatterCache, prioFrontMatterCache)
mdataValueFromFMCaches (
group?.byMetadataFieldSecondary || group?.withMetadataFieldName || DEFAULT_METADATA_FIELD_FOR_SORTING,
group?.metadataFieldSecondaryValueExtractor,
frontMatterCache,
prioFrontMatterCache)
if (isDerivedPrimaryByMetadata) metadataValueDerivedPrimaryToSortBy =
mdataValueFromFMCaches (spec.byMetadataField || DEFAULT_METADATA_FIELD_FOR_SORTING, frontMatterCache, prioFrontMatterCache)
mdataValueFromFMCaches (
spec.byMetadataField || DEFAULT_METADATA_FIELD_FOR_SORTING,
spec.metadataFieldValueExtractor,
frontMatterCache,
prioFrontMatterCache)
if (isDerivedSecondaryByMetadata) metadataValueDerivedSecondaryToSortBy =
mdataValueFromFMCaches (spec.byMetadataFieldSecondary || DEFAULT_METADATA_FIELD_FOR_SORTING, frontMatterCache, prioFrontMatterCache)
mdataValueFromFMCaches (
spec.byMetadataFieldSecondary || DEFAULT_METADATA_FIELD_FOR_SORTING,
spec.metadataFieldSecondaryValueExtractor,
frontMatterCache,
prioFrontMatterCache)
}
}
}

View File

@ -0,0 +1,64 @@
import {
getNormalizedDate_NormalizerFn_for
} from "./matchers";
import {NormalizerFn} from "./custom-sort-types";
type ExtractorFn = (mdataValue: string) => string|undefined
interface DateExtractorSpec {
specPattern: string|RegExp,
extractorFn: ExtractorFn
}
export interface MDataExtractor {
(mdataValue: string): string|undefined
}
export interface MDataExtractorParseResult {
m: MDataExtractor
remainder: string
}
function getGenericPlainRegexpExtractorFn(extractorRegexp: RegExp, extractedValueNormalizer: NormalizerFn) {
return (mdataValue: string): string | undefined => {
const hasMatch = mdataValue?.match(extractorRegexp)
if (hasMatch && hasMatch[0]) {
return extractedValueNormalizer(hasMatch[0]) ?? undefined
} else {
return undefined
}
}
}
const Extractors: DateExtractorSpec[] = [
{ specPattern: 'date(dd/mm/yyyy)',
extractorFn: getGenericPlainRegexpExtractorFn(
new RegExp('\\d{2}/\\d{2}/\\d{4}'),
getNormalizedDate_NormalizerFn_for('/', 0, 1, 2)
)
}, {
specPattern: 'date(mm/dd/yyyy)',
extractorFn: getGenericPlainRegexpExtractorFn(
new RegExp('\\d{2}/\\d{2}/\\d{4}'),
getNormalizedDate_NormalizerFn_for('/', 1, 0, 2)
)
}
]
export const tryParseAsMDataExtractorSpec = (s: string): MDataExtractorParseResult|undefined => {
// Simplistic initial implementation of the idea with hardcoded two extractors
for (const extrSpec of Extractors) {
if ('string' === typeof extrSpec.specPattern && s.trim().startsWith(extrSpec.specPattern)) {
return {
m: extrSpec.extractorFn,
remainder: s.substring(extrSpec.specPattern.length).trim()
}
}
}
return undefined
}
export const _unitTests = {
extractorFnForDate_ddmmyyyy: Extractors.find((it) => it.specPattern === 'date(dd/mm/yyyy)')?.extractorFn!,
extractorFnForDate_mmddyyyy: Extractors.find((it) => it.specPattern === 'date(mm/dd/yyyy)')?.extractorFn!,
}

View File

@ -35,6 +35,10 @@ import {
MATCH_CHILDREN_2_SUFFIX,
NO_PRIORITY
} from "./folder-matching-rules"
import {
MDataExtractor,
tryParseAsMDataExtractorSpec
} from "./mdata-extractors";
interface ProcessingContext {
folderPath: string
@ -114,6 +118,7 @@ interface CustomSortOrderAscDescPair {
interface CustomSortOrderSpec {
order: CustomSortOrder
byMetadataField?: string
metadataFieldExtractor?: MDataExtractor
}
const MAX_SORT_LEVEL: number = 1
@ -141,6 +146,8 @@ const OrderLiterals: { [key: string]: CustomSortOrderAscDescPair } = {
const OrderByMetadataLexeme: string = 'by-metadata:'
const ValueExtractorLexeme: string = 'using-extractor:'
const OrderLevelsSeparator: string = ','
enum Attribute {
@ -1090,8 +1097,10 @@ export class SortingSpecProcessor {
}
this.ctx.currentSpec.defaultOrder = (attr.value as RecognizedOrderValue).order
this.ctx.currentSpec.byMetadataField = (attr.value as RecognizedOrderValue).applyToMetadataField
this.ctx.currentSpec.metadataFieldValueExtractor = (attr.value as RecognizedOrderValue).metadataValueExtractor
this.ctx.currentSpec.defaultSecondaryOrder = (attr.value as RecognizedOrderValue).secondaryOrder
this.ctx.currentSpec.byMetadataFieldSecondary = (attr.value as RecognizedOrderValue).secondaryApplyToMetadataField
this.ctx.currentSpec.metadataFieldSecondaryValueExtractor = (attr.value as RecognizedOrderValue).secondaryMetadataValueExtractor
return true;
} else if (attr.nesting > 0) { // For now only distinguishing nested (indented) and not-nested (not-indented), the depth doesn't matter
if (!this.ctx.currentSpec || !this.ctx.currentSpecGroup) {
@ -1105,8 +1114,10 @@ export class SortingSpecProcessor {
}
this.ctx.currentSpecGroup.order = (attr.value as RecognizedOrderValue).order
this.ctx.currentSpecGroup.byMetadataField = (attr.value as RecognizedOrderValue).applyToMetadataField
this.ctx.currentSpecGroup.metadataFieldValueExtractor = (attr.value as RecognizedOrderValue).metadataValueExtractor
this.ctx.currentSpecGroup.secondaryOrder = (attr.value as RecognizedOrderValue).secondaryOrder
this.ctx.currentSpecGroup.byMetadataFieldSecondary = (attr.value as RecognizedOrderValue).secondaryApplyToMetadataField
this.ctx.currentSpecGroup.metadataFieldSecondaryValueExtractor = (attr.value as RecognizedOrderValue).secondaryMetadataValueExtractor
return true;
}
}
@ -1506,10 +1517,29 @@ export class SortingSpecProcessor {
orderSpec = hasDirectionPostfix ? orderSpec.substring(hasDirectionPostfix.lexeme.length).trim() : orderSpec
let metadataName: string|undefined
let metadataExtractor: MDataExtractor|undefined
if (orderSpec.startsWith(OrderByMetadataLexeme)) {
applyToMetadata = true
metadataName = orderSpec.substring(OrderByMetadataLexeme.length).trim() || undefined
orderSpec = '' // metadataName is unparsed, consumes the remainder string, even if malformed, e.g. with infix spaces
const metadataNameAndOptionalExtractorSpec = orderSpec.substring(OrderByMetadataLexeme.length).trim() || undefined
if (metadataNameAndOptionalExtractorSpec) {
if (metadataNameAndOptionalExtractorSpec.indexOf(ValueExtractorLexeme) > -1) {
const metadataSpec = metadataNameAndOptionalExtractorSpec.split(ValueExtractorLexeme)
metadataName = metadataSpec.shift()?.trim()
const metadataExtractorSpec = metadataSpec?.shift()?.trim()
const hasMetadataExtractor = metadataExtractorSpec ? tryParseAsMDataExtractorSpec(metadataExtractorSpec) : undefined
if (hasMetadataExtractor) {
metadataExtractor = hasMetadataExtractor.m
} else {
return new AttrError(`${orderNameForErrorMsg} sorting order contains unrecognized value extractor: >>> ${metadataExtractorSpec} <<<`)
}
orderSpec = '' // all consumed as metadata and extractor
} else {
metadataName = metadataNameAndOptionalExtractorSpec
orderSpec = '' // all consumed as metadata name
}
} else {
orderSpec = '' // no metadata name found
}
}
// check for any superfluous text
@ -1562,7 +1592,8 @@ export class SortingSpecProcessor {
}
sortOrderSpec[level] = {
order: order!,
byMetadataField: metadataName
byMetadataField: metadataName,
metadataFieldExtractor: metadataExtractor
}
}
return sortOrderSpec
@ -1573,8 +1604,10 @@ export class SortingSpecProcessor {
return recognized ? (recognized instanceof AttrError ? recognized : {
order: recognized[0].order,
applyToMetadataField: recognized[0].byMetadataField,
metadataValueExtractor: recognized[0].metadataFieldExtractor,
secondaryOrder: recognized[1]?.order,
secondaryApplyToMetadataField: recognized[1]?.byMetadataField
secondaryApplyToMetadataField: recognized[1]?.byMetadataField,
secondaryMetadataValueExtractor: recognized[1]?.metadataFieldExtractor
}) : null;
}

View File

@ -0,0 +1,55 @@
import {
_unitTests
} from '../../custom-sort/mdata-extractors'
describe('extractor for date(dd/mm/yyyy)', () => {
const params = [
// Positive
['03/05/2019', '2019-05-03//'],
['103/05/2019', '2019-05-03//'],
['103/05/20193232', '2019-05-03//'],
['99/99/9999', '9999-99-99//'],
['00/00/0000', '0000-00-00//'],
['Created at: 03/05/2019', '2019-05-03//'],
['03/05/2019 | 22:00', '2019-05-03//'],
['Created at: 03/05/2019 | 22:00', '2019-05-03//'],
// Negative
['88-Dec-2012', undefined],
['13-JANUARY-2012', undefined],
['1 .1', undefined],
['', undefined],
['abc', undefined],
['def-abc', undefined],
['3/5/2019', undefined],
];
it.each(params)('>%s< should become %s', (s: string, out: string) => {
expect(_unitTests.extractorFnForDate_ddmmyyyy(s)).toBe(out)
})
})
describe('extractor for date(mm/dd/yyyy)', () => {
const params = [
// Positive
['03/05/2019', '2019-03-05//'],
['103/05/2019', '2019-03-05//'],
['103/05/20193232', '2019-03-05//'],
['99/99/9999', '9999-99-99//'],
['00/00/0000', '0000-00-00//'],
['Created at: 03/05/2019', '2019-03-05//'],
['03/05/2019 | 22:00', '2019-03-05//'],
['Created at: 03/05/2019 | 22:00', '2019-03-05//'],
// Negative
['88-Dec-2012', undefined],
['13-JANUARY-2012', undefined],
['1 .1', undefined],
['', undefined],
['abc', undefined],
['def-abc', undefined],
['3/5/2019', undefined],
];
it.each(params)('>%s< should become %s', (s: string, out: string) => {
expect(_unitTests.extractorFnForDate_mmddyyyy(s)).toBe(out)
})
})

View File

@ -16,8 +16,14 @@ import {
RomanNumberNormalizerFn,
SortingSpecProcessor
} from "../../custom-sort/sorting-spec-processor"
import {CustomSortGroupType, CustomSortOrder, CustomSortSpec, IdentityNormalizerFn} from "../../custom-sort/custom-sort-types";
import {
CustomSortGroupType,
CustomSortOrder,
CustomSortSpec,
IdentityNormalizerFn
} from "../../custom-sort/custom-sort-types";
import {FolderMatchingRegexp, FolderMatchingTreeNode} from "../../custom-sort/folder-matching-rules";
import {_unitTests} from "../../custom-sort/mdata-extractors";
const txtInputExampleA: string = `
order-asc: a-z
@ -438,6 +444,67 @@ Here goes ASCII word \\a+
\\[Mmm-dd-yyyy] for the specific date format of Apr-01-2024
`
const txtInputExampleMDataExtractors1: string = `
< a-z by-metadata: created-by using-extractor: date(dd/mm/yyyy)
/folders Chapter...
> a-z by-metadata: updated-on using-extractor: date(mm/dd/yyyy)
`
// Tricky elements captured:
// - Order a-z. for by metadata is transformed to a-z (there is no notion of 'file extension' in metadata values)
const txtInputExampleMDataExtractors2: string = `
< a-z. by-metadata: created by using-extractor: date(mm/dd/yyyy), < true a-z. by-metadata: using-extractor: date(dd/mm/yyyy)
/folders ...Chapter
> a-z. by-metadata: updated-on using-extractor: date(dd/mm/yyyy), > true a-z by-metadata: md2 using-extractor: date(mm/dd/yyyy)
`
const expectedSortSpecsExampleMDataExtractors1: { [key: string]: CustomSortSpec } = {
"mock-folder": {
defaultOrder: CustomSortOrder.byMetadataFieldAlphabetical,
byMetadataField: 'created-by',
metadataFieldValueExtractor: _unitTests.extractorFnForDate_ddmmyyyy,
groups: [{
foldersOnly: true,
type: CustomSortGroupType.ExactPrefix,
exactPrefix: 'Chapter',
order: CustomSortOrder.byMetadataFieldAlphabeticalReverse,
byMetadataField: 'updated-on',
metadataFieldValueExtractor: _unitTests.extractorFnForDate_mmddyyyy
}, {
type: CustomSortGroupType.Outsiders
}],
targetFoldersPaths: ['mock-folder'],
outsidersGroupIdx: 1
}
}
const expectedSortSpecsExampleMDataExtractors2: { [key: string]: CustomSortSpec } = {
"mock-folder": {
defaultOrder: CustomSortOrder.byMetadataFieldAlphabetical,
byMetadataField: 'created by',
metadataFieldValueExtractor: _unitTests.extractorFnForDate_mmddyyyy,
defaultSecondaryOrder: CustomSortOrder.byMetadataFieldTrueAlphabetical,
byMetadataFieldSecondary: '',
metadataFieldSecondaryValueExtractor: _unitTests.extractorFnForDate_ddmmyyyy,
groups: [{
foldersOnly: true,
type: CustomSortGroupType.ExactSuffix,
exactSuffix: 'Chapter',
order: CustomSortOrder.byMetadataFieldAlphabeticalReverse,
byMetadataField: 'updated-on',
metadataFieldValueExtractor: _unitTests.extractorFnForDate_ddmmyyyy,
secondaryOrder: CustomSortOrder.byMetadataFieldTrueAlphabeticalReverse,
byMetadataFieldSecondary: 'md2',
metadataFieldSecondaryValueExtractor: _unitTests.extractorFnForDate_mmddyyyy
}, {
type: CustomSortGroupType.Outsiders
}],
targetFoldersPaths: ['mock-folder'],
outsidersGroupIdx: 1
}
}
describe('SortingSpecProcessor', () => {
let processor: SortingSpecProcessor;
beforeEach(() => {
@ -458,6 +525,16 @@ describe('SortingSpecProcessor', () => {
const result = processor.parseSortSpecFromText(inputTxtArr, 'mock-folder', 'custom-name-note.md')
expect(result?.sortSpecByPath).toEqual(expectedSortSpecsExampleSortingSymbols)
})
it('should generate correct SortSpecs (example with mdata extractors)', () => {
const inputTxtArr: Array<string> = txtInputExampleMDataExtractors1.split('\n')
const result = processor.parseSortSpecFromText(inputTxtArr, 'mock-folder', 'custom-name-note.md')
expect(result?.sortSpecByPath).toEqual(expectedSortSpecsExampleMDataExtractors1)
})
it('should generate correct SortSpecs (example with mdata extractors, advanced)', () => {
const inputTxtArr: Array<string> = txtInputExampleMDataExtractors2.split('\n')
const result = processor.parseSortSpecFromText(inputTxtArr, 'mock-folder', 'custom-name-note.md')
expect(result?.sortSpecByPath).toEqual(expectedSortSpecsExampleMDataExtractors2)
})
})
const txtInputNotDuplicatedSortSpec: string = `
@ -2931,6 +3008,17 @@ describe('SortingSpecProcessor error detection and reporting', () => {
`${ERR_PREFIX} 7:InvalidAttributeValue Secondary sorting direction order-asc: and desc are contradicting ${ERR_SUFFIX_IN_LINE(2)}`)
expect(errorsLogger).toHaveBeenNthCalledWith(2, ERR_LINE_TXT('sorting: standard, order-asc: modified desc by-metadata: xyz // <-- and it is checked earlier than the by-metadata incompatible order'))
})
it('should reject unknown value extractor', () => {
const inputTxtArr: Array<string> = `
< a-z. by-metadata: created by using-extractor: date(mm/dd/YYYY)
`.replace(/\t/gi, '').split('\n')
const result = processor.parseSortSpecFromText(inputTxtArr, 'mock-folder', 'custom-name-note.md')
expect(result).toBeNull()
expect(errorsLogger).toHaveBeenCalledTimes(2)
expect(errorsLogger).toHaveBeenNthCalledWith(1,
`${ERR_PREFIX} 7:InvalidAttributeValue Primary sorting order contains unrecognized value extractor: >>> date(mm/dd/YYYY) <<< ${ERR_SUFFIX_IN_LINE(2)}`)
expect(errorsLogger).toHaveBeenNthCalledWith(2, ERR_LINE_TXT('< a-z. by-metadata: created by using-extractor: date(mm/dd/YYYY)'))
})
})
const txtInputTargetFolderCCC: string = `