Skip to content

(EAI-1003) get available versions for data source #696

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
May 5, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion packages/ingest-mongodb-public/src/sources/mongoose.ts
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ export const mongooseSourceConstructor = async () => {
metadata: {
productName: "Mongoose ODM",
tags: ["node.js", "community library", "mongoose", "odm"],
version: "v7.x (current)",
versionLabel: "v7.x (current)",
},
});
};
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ import {
overrideCurrentVersion,
prepareSnootySources,
} from "./SnootyProjectsInfo";
import { Page } from "mongodb-rag-core";
import { PageMetadata } from "mongodb-rag-core";

const snootyDataApiBaseUrl = "https://snooty-data-api.mongodb.com/prod/";

Expand Down Expand Up @@ -128,7 +128,7 @@ describe("SnootyProjectsInfo", () => {
});
assert(snootySources !== undefined);
const pages = await snootySources[0].fetchPages();
const versions = pages.map((page) => (page.metadata?.version as Page['version']));
const versions = pages.map((page) => (page.metadata?.version as PageMetadata['version']));
const originalCurrentVersion = versions.find((version) => version?.isCurrent);
const currentVersionOverride = versions.find((version) => !version?.isCurrent)?.label;

Expand All @@ -141,7 +141,7 @@ describe("SnootyProjectsInfo", () => {
snootyDataApiBaseUrl,
});
const pagesAfterOverride = await sourcesAfterOverride[0].fetchPages();
const versionsAfterOverride = pagesAfterOverride.map((page) => (page.metadata?.version as Page['version']));
const versionsAfterOverride = pagesAfterOverride.map((page) => (page.metadata?.version as PageMetadata['version']));
const currentVersionAfterOverride = versionsAfterOverride.find((version) => version?.isCurrent);

// Check that the current version is the one we set
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@ export const makeMockPageStore = (): PageStore => {
},
async deletePages() {
return;
}
},
async getDataSourceVersions() {
return [];
},
};
};
Original file line number Diff line number Diff line change
Expand Up @@ -333,4 +333,64 @@ describe("MongoDbPageStore", () => {
expect(missingUrls).toEqual([]);
});
});

describe("getDataSourceVersions", () => {
const moviePagesWithVersion = moviePages.map((page, index) => ({
...page,
sourceName: `movie-source`,
metadata: {
version: {
label: `${index % 2 ? 1 : 2}`,
isCurrent: index % 2 ? false : true,
},
},
}));
beforeEach(async () => {
assert(store);
await store.updatePages(moviePagesWithVersion);
});
Comment on lines +348 to +351
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this can be beforeAll since you're only reading from the data in the tests

also add a clean up operation to remove the stuff you added to not interfere w/ other tests

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It fails with a beforeAll bc there's an afterEach in the parent describe that drops the db after each test. Keeping this as a beforeEach. I did add an afterAll to clean up

afterAll(async () => {
assert(store);
await store.deletePages({
dataSources: ["movie-source", "another-movie-source"],
permanent: true,
});
});

it("returns list of versions for a specific data source", async () => {
assert(store);
const dataSourceVersions = await store.getDataSourceVersions({
dataSources: ["movie-source"],
});

expect(dataSourceVersions.length).toBe(1);
const movieSourceVersions = dataSourceVersions[0];
expect(movieSourceVersions.sourceName).toBe("movie-source");
expect(movieSourceVersions.versions.length).toBe(2);
expect(movieSourceVersions.versions[0]).toMatchObject({
label: "1",
isCurrent: false,
});
expect(movieSourceVersions.versions[1]).toMatchObject({
label: "2",
isCurrent: true,
});
});

it("returns list of versions for all data sources", async () => {
assert(store);
// add another versioned data source
const anotherMoviePagesWithVersion = moviePagesWithVersion.map(
(page) => ({
...page,
sourceName: `another-movie-source`,
})
);
await store.updatePages(anotherMoviePagesWithVersion);

const dataSourceVersions = await store.getDataSourceVersions();

expect(dataSourceVersions.length).toBe(2);
});
});
});
40 changes: 40 additions & 0 deletions packages/mongodb-rag-core/src/contentStore/MongoDbPageStore.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import {
LoadPagesArgs,
PageStore,
PersistedPage,
SourceVersions,
} from "./Page";
import { Filter, Document } from "mongodb";

Expand All @@ -29,6 +30,9 @@ export type MongoDbPageStore = DatabaseConnection &
expectedUrls: string[];
urlTransformer?: (url: string) => string;
}): Promise<string[]>;
getDataSourceVersions(args: {
dataSources: string[];
}): Promise<SourceVersions[]>;
metadata: {
databaseName: string;
collectionName: string;
Expand Down Expand Up @@ -145,6 +149,42 @@ export function makeMongoDbPageStore({
);
return results.filter((url) => url !== null) as string[];
},
async getDataSourceVersions(args?: {
dataSources: string[];
}): Promise<SourceVersions[]> {
const pipeline = [
{
$match: {
...(args && { sourceName: { $in: args.dataSources } }), // Filter by data sources if provided
action: { $ne: "deleted" }, // Exclude deleted pages
"metadata.version.label": { $exists: true }, // Exclude unversioned pages
},
},
{
$group: {
_id: "$sourceName", // Group by sourceName
versions: {
$addToSet: "$metadata.version", // Collect unique versions
},
},
},
{
$project: {
_id: 0,
sourceName: "$_id",
versions: {
$sortArray: {
input: "$versions",
sortBy: {
label: 1, // sort by label in ascending order
},
},
},
},
},
];
return pagesCollection.aggregate<SourceVersions>(pipeline).toArray();
},
async init() {
await pagesCollection.createIndex({ url: 1 });
await pagesCollection.createIndex({ sourceName: 1 });
Expand Down
30 changes: 21 additions & 9 deletions packages/mongodb-rag-core/src/contentStore/Page.ts
Original file line number Diff line number Diff line change
Expand Up @@ -27,26 +27,27 @@ export type Page = {
*/
sourceName: string;

/**
The version of the page. This is relevant for versioned docs.
If the page is not versioned, this field should be undefined.
*/
version?: {
label: string;
isCurrent: boolean;
}

/**
Arbitrary metadata for page.
*/
metadata?: PageMetadata;
};

interface VersionInfo {
isCurrent: boolean;
label: string;
}

export type PageMetadata = {
/**
Arbitrary tags.
*/
tags?: string[];
/**
The version of the page. This is relevant for versioned docs.
If the page is not versioned, this field should be undefined.
*/
version?: VersionInfo;
/**
Page-level metadata. Should not be chunked.
*/
Expand Down Expand Up @@ -112,6 +113,10 @@ export type DeletePagesArgs = {
inverse?: boolean;
};

export interface SourceVersions {
[sourceName: string]: VersionInfo[];
}

/**
Data store for {@link Page} objects.
*/
Expand All @@ -137,6 +142,13 @@ export type PageStore = {
*/
deletePages(args?: DeletePagesArgs): Promise<void>;

/**
Gets a list of versions for dataSources.
*/
getDataSourceVersions(args?: {
dataSources: string[];
}): Promise<SourceVersions[]>;

/**
Close connection to data store.
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ const options: HandleHtmlPageFuncOptions = {
`https://example.com/${pathInRepo}`.replace(/index\.html$/, "testing.html"),
metadata: {
productName: "Java Reactive Streams Driver",
version: javaVersion,
versionLabel: javaVersion,
},
extractMetadata: () => ({
foo: "bar",
Expand Down Expand Up @@ -66,7 +66,7 @@ describe("handleHtmlDocument()", () => {
it("should extract metadata from DOM", () => {
expect(page.metadata).toMatchObject({
foo: "bar",
version: "4.10",
versionLabel: "4.10",
productName: "Java Reactive Streams Driver",
});
});
Expand Down
3 changes: 3 additions & 0 deletions packages/mongodb-rag-core/src/test/MockPageStore.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,5 +12,8 @@ export const makeMockPageStore = (): PageStore => {
async deletePages(_args) {
return;
},
async getDataSourceVersions(_args) {
return [];
},
};
};