Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .eslintrc.json
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
{
"plugins": ["jsdoc"],
"plugins": ["jsdoc", "expect-type"],
"extends": [
"eslint:recommended",
"plugin:jsdoc/recommended",
"plugin:n/recommended",
"plugin:unicorn/recommended",
"plugin:expect-type/recommended",
"prettier"
],
"env": { "node": true },
Expand Down
109 changes: 109 additions & 0 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@
"benchmark": "^2.1.4",
"eslint": "^8.29.0",
"eslint-config-prettier": "^8.5.0",
"eslint-plugin-expect-type": "^0.2.1",
"eslint-plugin-jest": "^27.1.6",
"eslint-plugin-jsdoc": "^39.6.4",
"eslint-plugin-n": "^15.6.0",
Expand Down
120 changes: 120 additions & 0 deletions src/api/extract.spec.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
import * as fixtures from '../__fixtures__/fixtures.js';
import cheerio from '..';

interface RedSelObject {
red: string | undefined;
sel: string | undefined;
}

interface RedSelMultipleObject {
red: string[];
sel: string[];
}

describe('$.extract', () => {
it('() : should extract values for selectors', () => {
const $ = cheerio.load(fixtures.eleven);
const $root = cheerio.load(fixtures.eleven).root();
// An empty object should lead to an empty extraction.

// $ExpectType ExtractedMap<{}>
const emptyExtract = $root.extract({});
expect(emptyExtract).toStrictEqual({});
// Non-existent values should be undefined.

// $ExpectType ExtractedMap<{ foo: string; }>
const simpleExtract = $root.extract({ foo: 'bar' });
expect(simpleExtract).toStrictEqual({ foo: undefined });

// Existing values should be extracted.
expect<{ red: string | undefined }>(
$root.extract({ red: '.red' })
).toStrictEqual({
red: 'Four',
});
expect<RedSelObject>(
$root.extract({ red: '.red', sel: '.sel' })
).toStrictEqual({
red: 'Four',
sel: 'Three',
});
// Descriptors for extractions should be supported
expect<RedSelObject>(
$root.extract({
red: { selector: '.red' },
sel: { selector: '.sel' },
})
).toStrictEqual({ red: 'Four', sel: 'Three' });
// Should support extraction of multiple values.

// $ExpectType ExtractedMap<{ red: [string]; sel: [string]; }>
const multipleExtract = $root.extract({
red: ['.red'],
sel: ['.sel'],
});
expect<RedSelMultipleObject>(multipleExtract).toStrictEqual({
red: ['Four', 'Five', 'Nine'],
sel: ['Three', 'Nine', 'Eleven'],
});
// Should support custom `prop`s.
expect<RedSelObject>(
$root.extract({
red: { selector: '.red', value: 'outerHTML' },
sel: { selector: '.sel', value: 'tagName' },
})
).toStrictEqual({ red: '<li class="red">Four</li>', sel: 'LI' });
// Should support custom `prop`s for multiple values.
expect<{ red: string[] }>(
$root.extract({
red: [{ selector: '.red', value: 'outerHTML' }],
})
).toStrictEqual({
red: [
'<li class="red">Four</li>',
'<li class="red">Five</li>',
'<li class="red sel">Nine</li>',
],
});
// Should support custom extraction functions.
expect<{ red: string | undefined }>(
$root.extract({
red: {
selector: '.red',
value: (el, key) => `${key}=${$(el).text()}`,
},
})
).toStrictEqual({ red: 'red=Four' });
// Should support custom extraction functions for multiple values.
expect<{ red: string[] }>(
$root.extract({
red: [
{
selector: '.red',
value: (el, key) => `${key}=${$(el).text()}`,
},
],
})
).toStrictEqual({ red: ['red=Four', 'red=Five', 'red=Nine'] });
// Should support extraction objects

// $ExpectType ExtractedMap<{ section: { selector: string; value: { red: string; sel: string; }; }; }>
const subExtractObject = $root.extract({
section: {
selector: 'ul:nth(1)',
value: {
red: '.red',
sel: '.blue',
},
},
});

expect<{ section: RedSelObject | undefined }>(
subExtractObject
).toStrictEqual({
section: {
red: 'Five',
sel: 'Seven',
},
});
});
});
92 changes: 92 additions & 0 deletions src/api/extract.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
import type { AnyNode, Element } from 'domhandler';
import type { Cheerio } from '../cheerio.js';
import type { prop } from './attributes.js';

type ExtractDescriptorFn = (
el: Element,
key: string,
// TODO: This could be typed with ExtractedMap
obj: Record<string, unknown>
) => unknown;

interface ExtractDescriptor {
selector: string;
value?: string | ExtractDescriptorFn | ExtractMap;
}

type ExtractValue = string | ExtractDescriptor | [string | ExtractDescriptor];

export interface ExtractMap {
[key: string]: ExtractValue;
}

type ExtractedValue<V extends ExtractValue, M extends ExtractMap> = V extends [
string | ExtractDescriptor
]
? NonNullable<ExtractedValue<V[0], M>>[]
: V extends string
? string | undefined
: V extends ExtractDescriptor
? V['value'] extends ExtractMap
? ExtractedMap<V['value']> | undefined
: V['value'] extends ExtractDescriptorFn
? ReturnType<V['value']> | undefined
: ReturnType<typeof prop> | undefined
: never;

export type ExtractedMap<M extends ExtractMap> = {
[key in keyof M]: ExtractedValue<M[key], M>;
};

function getExtractDescr(
descr: string | ExtractDescriptor
): Required<ExtractDescriptor> {
if (typeof descr === 'string') {
return { selector: descr, value: 'textContent' };
}

return {
selector: descr.selector,
value: descr.value ?? 'textContent',
};
}

/**
* Extract multiple values from a document, and store them in an object.
*
* @param map - An object containing key-value pairs. The keys are the names of
* the properties to be created on the object, and the values are the
* selectors to be used to extract the values.
* @returns An object containing the extracted values.
*/
export function extract<M extends ExtractMap, T extends AnyNode>(
this: Cheerio<T>,
map: M
): ExtractedMap<M> {
const ret: Record<string, unknown> = {};

for (const key in map) {
const descr = map[key];
const isArray = Array.isArray(descr);

const { selector, value } = getExtractDescr(isArray ? descr[0] : descr);

const fn: ExtractDescriptorFn =
typeof value === 'function'
? value
: typeof value === 'string'
? (el: Element) => this._make(el).prop(value)
: (el: Element) => this._make(el).extract(value);

if (isArray) {
ret[key] = this._findBySelector(selector, Number.POSITIVE_INFINITY)
.map((_, el) => fn(el, key, ret))
.get();
} else {
const $ = this._findBySelector(selector, 1);
ret[key] = $.length > 0 ? fn($[0], key, ret) : undefined;
}
}

return ret as ExtractedMap<M>;
}
Loading