#171 - a PoC of the idea of metadata value extractors. Extended syntax, unit tests, error handling
This commit is contained in:
parent
f210a412d3
commit
9e2e12046d
|
@ -1,13 +1,14 @@
|
|||
import {
|
||||
getNormalizedDate_NormalizerFn_for
|
||||
} from "./matchers";
|
||||
import {NormalizerFn} from "./custom-sort-types";
|
||||
|
||||
const DateExtractorSpecPattern1 = 'date(dd/mm/yyyy)'
|
||||
const DateExtractorRegex1 = new RegExp('\\d{2}/\\d{2}/\\d{4}')
|
||||
const DateExtractorNormalizer1 = getNormalizedDate_NormalizerFn_for('/', 0, 1, 2)
|
||||
const DateExtractorSpecPattern2 = 'date(mm/dd/yyyy)'
|
||||
const DateExtractorRegex2 = new RegExp('\\d{2}/\\d{2}/\\d{4}')
|
||||
const DateExtractorNormalizer2 = getNormalizedDate_NormalizerFn_for('/', 1, 0, 2)
|
||||
type ExtractorFn = (mdataValue: string) => string|undefined
|
||||
|
||||
interface DateExtractorSpec {
|
||||
specPattern: string|RegExp,
|
||||
extractorFn: ExtractorFn
|
||||
}
|
||||
|
||||
export interface MDataExtractor {
|
||||
(mdataValue: string): string|undefined
|
||||
|
@ -18,37 +19,46 @@ export interface MDataExtractorParseResult {
|
|||
remainder: string
|
||||
}
|
||||
|
||||
function getGenericPlainRegexpExtractorFn(extractorRegexp: RegExp, extractedValueNormalizer: NormalizerFn) {
|
||||
return (mdataValue: string): string | undefined => {
|
||||
const hasMatch = mdataValue?.match(extractorRegexp)
|
||||
if (hasMatch && hasMatch[0]) {
|
||||
return extractedValueNormalizer(hasMatch[0]) ?? undefined
|
||||
} else {
|
||||
return undefined
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const Extractors: DateExtractorSpec[] = [
|
||||
{ specPattern: 'date(dd/mm/yyyy)',
|
||||
extractorFn: getGenericPlainRegexpExtractorFn(
|
||||
new RegExp('\\d{2}/\\d{2}/\\d{4}'),
|
||||
getNormalizedDate_NormalizerFn_for('/', 0, 1, 2)
|
||||
)
|
||||
}, {
|
||||
specPattern: 'date(mm/dd/yyyy)',
|
||||
extractorFn: getGenericPlainRegexpExtractorFn(
|
||||
new RegExp('\\d{2}/\\d{2}/\\d{4}'),
|
||||
getNormalizedDate_NormalizerFn_for('/', 1, 0, 2)
|
||||
)
|
||||
}
|
||||
]
|
||||
|
||||
export const tryParseAsMDataExtractorSpec = (s: string): MDataExtractorParseResult|undefined => {
|
||||
// Simplistic initial implementation of the idea with hardcoded two extractors
|
||||
if (s.trim().startsWith(DateExtractorSpecPattern1)) {
|
||||
for (const extrSpec of Extractors) {
|
||||
if ('string' === typeof extrSpec.specPattern && s.trim().startsWith(extrSpec.specPattern)) {
|
||||
return {
|
||||
m: extractorForPattern1,
|
||||
remainder: s.substring(DateExtractorSpecPattern1.length).trim()
|
||||
m: extrSpec.extractorFn,
|
||||
remainder: s.substring(extrSpec.specPattern.length).trim()
|
||||
}
|
||||
}
|
||||
if (s.trim().startsWith(DateExtractorSpecPattern2)) {
|
||||
return {
|
||||
m: extractorForPattern2,
|
||||
remainder: s.substring(DateExtractorSpecPattern2.length).trim()
|
||||
}
|
||||
}
|
||||
return undefined
|
||||
}
|
||||
|
||||
export function extractorForPattern1(mdataValue: string): string|undefined {
|
||||
const hasDate = mdataValue?.match(DateExtractorRegex1)
|
||||
if (hasDate && hasDate[0]) {
|
||||
return DateExtractorNormalizer1(hasDate[0]) ?? undefined
|
||||
} else {
|
||||
return undefined
|
||||
}
|
||||
}
|
||||
|
||||
export function extractorForPattern2(mdataValue: string): string|undefined {
|
||||
const hasDate = mdataValue?.match(DateExtractorRegex2)
|
||||
if (hasDate && hasDate[0]) {
|
||||
return DateExtractorNormalizer2(hasDate[0]) ?? undefined
|
||||
} else {
|
||||
return undefined
|
||||
}
|
||||
export const _unitTests = {
|
||||
extractorFnForDate_ddmmyyyy: Extractors.find((it) => it.specPattern === 'date(dd/mm/yyyy)')?.extractorFn!,
|
||||
extractorFnForDate_mmddyyyy: Extractors.find((it) => it.specPattern === 'date(mm/dd/yyyy)')?.extractorFn!,
|
||||
}
|
||||
|
|
|
@ -144,6 +144,8 @@ const OrderLiterals: { [key: string]: CustomSortOrderAscDescPair } = {
|
|||
|
||||
const OrderByMetadataLexeme: string = 'by-metadata:'
|
||||
|
||||
const ValueExtractorLexeme: string = 'using-extractor:'
|
||||
|
||||
const OrderLevelsSeparator: string = ','
|
||||
|
||||
enum Attribute {
|
||||
|
@ -1511,24 +1513,23 @@ export class SortingSpecProcessor {
|
|||
applyToMetadata = true
|
||||
const metadataNameAndOptionalExtractorSpec = orderSpec.substring(OrderByMetadataLexeme.length).trim() || undefined
|
||||
if (metadataNameAndOptionalExtractorSpec) {
|
||||
if (metadataNameAndOptionalExtractorSpec.indexOf(' ') > -1) {
|
||||
const metadataSpec = metadataNameAndOptionalExtractorSpec.split(' ')
|
||||
metadataName = metadataSpec.shift()
|
||||
const metadataExtractorSpec = metadataSpec?.shift()
|
||||
if (metadataNameAndOptionalExtractorSpec.indexOf(ValueExtractorLexeme) > -1) {
|
||||
const metadataSpec = metadataNameAndOptionalExtractorSpec.split(ValueExtractorLexeme)
|
||||
metadataName = metadataSpec.shift()?.trim()
|
||||
const metadataExtractorSpec = metadataSpec?.shift()?.trim()
|
||||
const hasMetadataExtractor = metadataExtractorSpec ? tryParseAsMDataExtractorSpec(metadataExtractorSpec) : undefined
|
||||
if (hasMetadataExtractor) {
|
||||
metadataExtractor = hasMetadataExtractor.m
|
||||
} else {
|
||||
// TODO: raise error of syntax error - metadata name followed by unrecognized text
|
||||
// take into account all of the texts resulting from the split(' ') - there could be more segments
|
||||
return new AttrError(`${orderNameForErrorMsg} sorting order contains unrecognized value extractor: >>> ${metadataExtractorSpec} <<<`)
|
||||
}
|
||||
orderSpec = '' // Intentionally ignore anything beyond the metadata name and extractor
|
||||
orderSpec = '' // all consumed as metadata and extractor
|
||||
} else {
|
||||
metadataName = metadataNameAndOptionalExtractorSpec
|
||||
orderSpec = '' // Intentionally ignore anything beyond the metadata name (and no known extractor)
|
||||
orderSpec = '' // all consumed as metadata name
|
||||
}
|
||||
} else {
|
||||
orderSpec = ''
|
||||
orderSpec = '' // no metadata name found
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -1,38 +1,29 @@
|
|||
import {
|
||||
extractorForPattern1
|
||||
_unitTests
|
||||
} from '../../custom-sort/mdata-extractors'
|
||||
|
||||
describe('extractorForPattern1', () => {
|
||||
describe('extractor for date(dd/mm/yyyy)', () => {
|
||||
const params = [
|
||||
// Positive
|
||||
['03/05/2019', '2019-05-03//'],
|
||||
['103/05/2019', '2019-05-03//'],
|
||||
['103/05/20193232', '2019-05-03//'],
|
||||
['99/99/9999', '9999-99-99//'],
|
||||
['00/00/0000', '0000-00-00//'],
|
||||
['Created at: 03/05/2019', '2019-05-03//'],
|
||||
['03/05/2019 | 22:00', '2019-05-03//'],
|
||||
['Created at: 03/05/2019 | 22:00', '2019-05-03//'],
|
||||
|
||||
// TODO: more positive then negative examples
|
||||
|
||||
['13-Jan-2012', '2012-01-13//'],
|
||||
['3-Feb-2', '0002-02-03//'],
|
||||
['1-Mar-1900', '1900-03-01//'],
|
||||
['42-Apr-9999', '9999-04-42//'],
|
||||
['0-May-0', '0000-05-00//'],
|
||||
['21-Jun-2024', '2024-06-21//'],
|
||||
['7-Jul-1872', '1872-07-07//'],
|
||||
['15-Aug-1234', '1234-08-15//'],
|
||||
['1234-Sep-7777', '7777-09-1234//'],
|
||||
['3-Oct-2023', '2023-10-03//'],
|
||||
['8-Nov-2022', '2022-11-08//'],
|
||||
['18-Dec-2021', '2021-12-18//'],
|
||||
// Negative
|
||||
['88-Dec-2012', '2012-12-88//'], // Invalid case, Regexp on matcher in the caller should guard against this
|
||||
['13-JANUARY-2012', '2012-00-13//'], // Invalid case, Regexp on matcher in the caller should guard against this
|
||||
['1 .1', '0000-00-1 .1//'], // Invalid case, Regexp on matcher in the caller should guard against this
|
||||
['', '0000-00-00//'], // Invalid case, Regexp on matcher in the caller should guard against this
|
||||
['abc', '0000-00-abc//'], // Invalid case, Regexp on matcher in the caller should guard against this
|
||||
['def-abc', '0000-00-def//'], // Invalid case, Regexp on matcher in the caller should guard against this
|
||||
['88-Dec-2012', undefined],
|
||||
['13-JANUARY-2012', undefined],
|
||||
['1 .1', undefined],
|
||||
['', undefined],
|
||||
['abc', undefined],
|
||||
['def-abc', undefined],
|
||||
['3/5/2019', undefined],
|
||||
];
|
||||
it.each(params)('>%s< should become %s', (s: string, out: string) => {
|
||||
expect(extractorForPattern1(s)).toBe(out)
|
||||
expect(_unitTests.extractorFnForDate_ddmmyyyy(s)).toBe(out)
|
||||
})
|
||||
})
|
||||
|
|
|
@ -4,7 +4,8 @@ import {
|
|||
CompoundDotNumberNormalizerFn,
|
||||
ConsumedFolderMatchingRegexp,
|
||||
consumeFolderByRegexpExpression,
|
||||
convertPlainStringToRegex, Date_dd_Mmm_yyyy_NormalizerFn,
|
||||
convertPlainStringToRegex,
|
||||
Date_dd_Mmm_yyyy_NormalizerFn,
|
||||
detectSortingSymbols,
|
||||
escapeRegexUnsafeCharacters,
|
||||
extractSortingSymbol,
|
||||
|
@ -14,8 +15,14 @@ import {
|
|||
RomanNumberNormalizerFn,
|
||||
SortingSpecProcessor
|
||||
} from "../../custom-sort/sorting-spec-processor"
|
||||
import {CustomSortGroupType, CustomSortOrder, CustomSortSpec, IdentityNormalizerFn} from "../../custom-sort/custom-sort-types";
|
||||
import {
|
||||
CustomSortGroupType,
|
||||
CustomSortOrder,
|
||||
CustomSortSpec,
|
||||
IdentityNormalizerFn
|
||||
} from "../../custom-sort/custom-sort-types";
|
||||
import {FolderMatchingRegexp, FolderMatchingTreeNode} from "../../custom-sort/folder-matching-rules";
|
||||
import {_unitTests} from "../../custom-sort/mdata-extractors";
|
||||
|
||||
const txtInputExampleA: string = `
|
||||
order-asc: a-z
|
||||
|
@ -356,6 +363,17 @@ const expectedSortSpecsExampleA: { [key: string]: CustomSortSpec } = {
|
|||
}
|
||||
}
|
||||
|
||||
const txtInputExampleSortingSymbols: string = `
|
||||
/folders Chapter \\.d+ ...
|
||||
/:files ...section \\-r+.
|
||||
% Appendix \\-d+ (attachments)
|
||||
Plain syntax\\R+ ... works?
|
||||
And this kind of... \\D+plain syntax???
|
||||
Here goes ASCII word \\a+
|
||||
\\A+. is for any modern language word
|
||||
\\[dd-Mmm-yyyy] for the specific date format of 12-Apr-2024
|
||||
`
|
||||
|
||||
const expectedSortSpecsExampleSortingSymbols: { [key: string]: CustomSortSpec } = {
|
||||
"mock-folder": {
|
||||
groups: [{
|
||||
|
@ -418,17 +436,67 @@ const expectedSortSpecsExampleSortingSymbols: { [key: string]: CustomSortSpec }
|
|||
}
|
||||
}
|
||||
|
||||
const txtInputExampleSortingSymbols: string = `
|
||||
/folders Chapter \\.d+ ...
|
||||
/:files ...section \\-r+.
|
||||
% Appendix \\-d+ (attachments)
|
||||
Plain syntax\\R+ ... works?
|
||||
And this kind of... \\D+plain syntax???
|
||||
Here goes ASCII word \\a+
|
||||
\\A+. is for any modern language word
|
||||
\\[dd-Mmm-yyyy] for the specific date format of 12-Apr-2024
|
||||
const txtInputExampleMDataExtractors1: string = `
|
||||
< a-z by-metadata: created-by using-extractor: date(dd/mm/yyyy)
|
||||
/folders Chapter...
|
||||
> a-z by-metadata: updated-on using-extractor: date(mm/dd/yyyy)
|
||||
`
|
||||
|
||||
// Tricky elements captured:
|
||||
// - Order a-z. for by metadata is transformed to a-z (there is no notion of 'file extension' in metadata values)
|
||||
|
||||
const txtInputExampleMDataExtractors2: string = `
|
||||
< a-z. by-metadata: created by using-extractor: date(mm/dd/yyyy), < true a-z. by-metadata: using-extractor: date(dd/mm/yyyy)
|
||||
/folders ...Chapter
|
||||
> a-z. by-metadata: updated-on using-extractor: date(dd/mm/yyyy), > true a-z by-metadata: md2 using-extractor: date(mm/dd/yyyy)
|
||||
`
|
||||
|
||||
const expectedSortSpecsExampleMDataExtractors1: { [key: string]: CustomSortSpec } = {
|
||||
"mock-folder": {
|
||||
defaultOrder: CustomSortOrder.byMetadataFieldAlphabetical,
|
||||
byMetadataField: 'created-by',
|
||||
metadataFieldValueExtractor: _unitTests.extractorFnForDate_ddmmyyyy,
|
||||
groups: [{
|
||||
foldersOnly: true,
|
||||
type: CustomSortGroupType.ExactPrefix,
|
||||
exactPrefix: 'Chapter',
|
||||
order: CustomSortOrder.byMetadataFieldAlphabeticalReverse,
|
||||
byMetadataField: 'updated-on',
|
||||
metadataFieldValueExtractor: _unitTests.extractorFnForDate_mmddyyyy
|
||||
}, {
|
||||
type: CustomSortGroupType.Outsiders
|
||||
}],
|
||||
targetFoldersPaths: ['mock-folder'],
|
||||
outsidersGroupIdx: 1
|
||||
}
|
||||
}
|
||||
|
||||
const expectedSortSpecsExampleMDataExtractors2: { [key: string]: CustomSortSpec } = {
|
||||
"mock-folder": {
|
||||
defaultOrder: CustomSortOrder.byMetadataFieldAlphabetical,
|
||||
byMetadataField: 'created by',
|
||||
metadataFieldValueExtractor: _unitTests.extractorFnForDate_mmddyyyy,
|
||||
defaultSecondaryOrder: CustomSortOrder.byMetadataFieldTrueAlphabetical,
|
||||
byMetadataFieldSecondary: '',
|
||||
metadataFieldSecondaryValueExtractor: _unitTests.extractorFnForDate_ddmmyyyy,
|
||||
groups: [{
|
||||
foldersOnly: true,
|
||||
type: CustomSortGroupType.ExactSuffix,
|
||||
exactSuffix: 'Chapter',
|
||||
order: CustomSortOrder.byMetadataFieldAlphabeticalReverse,
|
||||
byMetadataField: 'updated-on',
|
||||
metadataFieldValueExtractor: _unitTests.extractorFnForDate_ddmmyyyy,
|
||||
secondaryOrder: CustomSortOrder.byMetadataFieldTrueAlphabeticalReverse,
|
||||
byMetadataFieldSecondary: 'md2',
|
||||
metadataFieldSecondaryValueExtractor: _unitTests.extractorFnForDate_mmddyyyy
|
||||
}, {
|
||||
type: CustomSortGroupType.Outsiders
|
||||
}],
|
||||
targetFoldersPaths: ['mock-folder'],
|
||||
outsidersGroupIdx: 1
|
||||
}
|
||||
}
|
||||
|
||||
describe('SortingSpecProcessor', () => {
|
||||
let processor: SortingSpecProcessor;
|
||||
beforeEach(() => {
|
||||
|
@ -449,6 +517,16 @@ describe('SortingSpecProcessor', () => {
|
|||
const result = processor.parseSortSpecFromText(inputTxtArr, 'mock-folder', 'custom-name-note.md')
|
||||
expect(result?.sortSpecByPath).toEqual(expectedSortSpecsExampleSortingSymbols)
|
||||
})
|
||||
it('should generate correct SortSpecs (example with mdata extractors)', () => {
|
||||
const inputTxtArr: Array<string> = txtInputExampleMDataExtractors1.split('\n')
|
||||
const result = processor.parseSortSpecFromText(inputTxtArr, 'mock-folder', 'custom-name-note.md')
|
||||
expect(result?.sortSpecByPath).toEqual(expectedSortSpecsExampleMDataExtractors1)
|
||||
})
|
||||
it('should generate correct SortSpecs (example with mdata extractors, advanced)', () => {
|
||||
const inputTxtArr: Array<string> = txtInputExampleMDataExtractors2.split('\n')
|
||||
const result = processor.parseSortSpecFromText(inputTxtArr, 'mock-folder', 'custom-name-note.md')
|
||||
expect(result?.sortSpecByPath).toEqual(expectedSortSpecsExampleMDataExtractors2)
|
||||
})
|
||||
})
|
||||
|
||||
const txtInputNotDuplicatedSortSpec: string = `
|
||||
|
@ -2922,6 +3000,17 @@ describe('SortingSpecProcessor error detection and reporting', () => {
|
|||
`${ERR_PREFIX} 7:InvalidAttributeValue Secondary sorting direction order-asc: and desc are contradicting ${ERR_SUFFIX_IN_LINE(2)}`)
|
||||
expect(errorsLogger).toHaveBeenNthCalledWith(2, ERR_LINE_TXT('sorting: standard, order-asc: modified desc by-metadata: xyz // <-- and it is checked earlier than the by-metadata incompatible order'))
|
||||
})
|
||||
it('should reject unknown value extractor', () => {
|
||||
const inputTxtArr: Array<string> = `
|
||||
< a-z. by-metadata: created by using-extractor: date(mm/dd/YYYY)
|
||||
`.replace(/\t/gi, '').split('\n')
|
||||
const result = processor.parseSortSpecFromText(inputTxtArr, 'mock-folder', 'custom-name-note.md')
|
||||
expect(result).toBeNull()
|
||||
expect(errorsLogger).toHaveBeenCalledTimes(2)
|
||||
expect(errorsLogger).toHaveBeenNthCalledWith(1,
|
||||
`${ERR_PREFIX} 7:InvalidAttributeValue Primary sorting order contains unrecognized value extractor: >>> date(mm/dd/YYYY) <<< ${ERR_SUFFIX_IN_LINE(2)}`)
|
||||
expect(errorsLogger).toHaveBeenNthCalledWith(2, ERR_LINE_TXT('< a-z. by-metadata: created by using-extractor: date(mm/dd/YYYY)'))
|
||||
})
|
||||
})
|
||||
|
||||
const txtInputTargetFolderCCC: string = `
|
||||
|
|
Loading…
Reference in New Issue