obsidian-book-tracker/src/data-sources/Goodreads.ts

275 lines
6.1 KiB
TypeScript

import { requestUrl } from "obsidian";
import type {
Author,
Book as OutputBook,
Series as OutputSeries,
} from "../types";
interface Ref {
__ref: string;
}
interface BookContributorEdge {
__typename: "BookContributorEdge";
node: Ref;
role: string;
}
interface BookSeries {
__typename: "BookSeries";
userPosition: string;
series: Ref;
}
interface Genre {
__typename: "Genre";
name: string;
webUrl: string;
}
interface BookGenre {
__typename: "BookGenre";
genre: Genre;
}
interface Language {
__typename: "Language";
name: string;
}
interface BookDetails {
__typename: "BookDetails";
asin: string;
format: string;
numPages: number;
publicationTime: number;
publisher: string;
isbn: string;
isbn13: string;
language: Language;
}
interface Book {
__typename: "Book";
id: string;
legacyId: number;
webUrl: string;
title: string;
titleComplete: string;
description: string;
primaryContributorEdge: BookContributorEdge;
secondaryContributorEdges: BookContributorEdge[];
imageUrl: string;
bookSeries: BookSeries[];
bookGenres: BookGenre[];
details: BookDetails;
work: Ref;
}
interface ContributorWorksConnection {
__typename: "ContributorWorksConnection";
totalCount: number;
}
interface ContributorFollowersConnection {
__typename: "ContributorFollowersConnection";
totalCount: number;
}
interface Contributor {
__typename: "Contributor";
id: string;
legacyId: number;
name: string;
description: string;
isGrAuthor: boolean;
works: ContributorWorksConnection;
profileImageUrl: string;
webUrl: string;
followers: ContributorFollowersConnection;
}
interface Series {
__typename: "Series";
id: string;
title: string;
webUrl: string;
}
interface Query {
__typename: "Query";
[key: string]: unknown;
}
interface NextData {
props: {
pageProps: {
apolloState: {
ROOT_QUERY: Query;
[key: string]: unknown;
};
params: Record<string, string>;
query: Record<string, string>;
jwtToken: string;
dataSource: string;
};
};
}
export interface SearchResult {
legacyId: number;
title: string;
authors: string[];
avgRating: number;
ratingCount: number;
publicationYear: number;
editionCount: number;
coverImageUrl: string;
}
export class Goodreads {
async getNextData(legacyId: number): Promise<NextData> {
const url = "https://www.goodreads.com/book/show/" + legacyId;
const res = await requestUrl({ url });
const doc = new DOMParser().parseFromString(res.text, "text/html");
const nextDataRaw = doc.getElementById("__NEXT_DATA__")?.textContent;
if (typeof nextDataRaw !== "string") {
throw new Error("Unable to find next data script in the document.");
}
const nextData = JSON.parse(nextDataRaw) as NextData;
return nextData;
}
extractBookFromNextData(nextData: NextData, ref: Ref): OutputBook {
const apolloState = nextData.props.pageProps.apolloState;
const bookData = apolloState[ref.__ref] as Book;
const contributorEdges = [
bookData.primaryContributorEdge,
...bookData.secondaryContributorEdges,
];
const authors = contributorEdges
.filter((edge) => edge.role === "Author")
.map((edge) => apolloState[edge.node.__ref] as Contributor)
.map<Author>((contributor) => ({
id: contributor.id,
legacyId: contributor.legacyId,
name: contributor.name,
description: contributor.description,
}));
let series: OutputSeries | null = null;
if (bookData.bookSeries.length > 0) {
const bookSeries = bookData.bookSeries[0];
const seriesData = apolloState[bookSeries.series.__ref] as Series;
series = {
title: seriesData.title,
position: parseInt(bookSeries.userPosition, 10),
};
}
let title = bookData.title;
let subtitle = "";
if (title.includes(": ")) {
const parts = title.split(": ");
subtitle = parts.pop()!;
title = parts.join(": ");
}
return {
title,
subtitle,
description: bookData.description,
authors,
series,
publisher: bookData.details.publisher,
publishedAt: new Date(bookData.details.publicationTime),
genres: bookData.bookGenres.map((genre) => genre.genre.name),
coverImageUrl: bookData.imageUrl,
pageCount: bookData.details.numPages,
isbn: bookData.details.isbn,
isbn13: bookData.details.isbn13,
};
}
async getBookByLegacyId(legacyId: number): Promise<OutputBook> {
const nextData = await this.getNextData(legacyId);
const bookRef = nextData.props.pageProps.apolloState.ROOT_QUERY[
`getBookByLegacyId({"legacyId":"${legacyId}"})`
] as Ref | undefined;
if (bookRef === undefined) {
throw new Error("Could not find reference for book.");
}
return this.extractBookFromNextData(nextData, bookRef);
}
async search(q: string): Promise<SearchResult[]> {
const url =
"https://www.goodreads.com/search?q=" + encodeURIComponent(q);
const res = await requestUrl({ url });
const doc = new DOMParser().parseFromString(res.text, "text/html");
const searchResults: SearchResult[] = [];
doc.querySelectorAll(
"table.tableList tr[itemtype='http://schema.org/Book']"
).forEach((el) => {
const legacyId = parseInt(
el.querySelector("div.u-anchorTarget")?.id ?? "",
10
);
const title =
el.querySelector("a.bookTitle")?.textContent?.trim() || "";
const authors = Array.from(el.querySelectorAll("a.authorName")).map(
(a) => a.textContent?.trim() || ""
);
const avgRating = parseFloat(
el
.querySelector("span.minirating")
?.textContent?.match(/(\d+\.\d+) avg rating/)?.[1] ?? "0"
);
const ratingCount = parseInt(
el
.querySelector("span.minirating")
?.textContent?.match(/(\d[\d,]*) ratings/)?.[1] ?? "0",
10
);
const publicationYear = parseInt(
el
.querySelector("span.greyText")
?.textContent?.match(/published (\d{4})/)?.[1] ?? "0",
10
);
const editionCount = parseInt(
el
.querySelector("span.greyText")
?.textContent?.match(/(\d+) editions/)?.[1] ?? "0",
10
);
const coverImageUrl =
el.querySelector("img.bookCover")?.getAttribute("src") || "";
searchResults.push({
legacyId,
title,
authors,
avgRating,
ratingCount,
publicationYear,
editionCount,
coverImageUrl,
});
});
return searchResults;
}
}