Skip to content

Commit f8300a1

Browse files
authored
(EAI-585): Move MongoDB data sources to implementation package (#542)
* move mongodb data sources to implementation package * move test data * Remove files that shouldn't be present * remove for naming * rename * master --> main * master --> main more * clean up imports
1 parent 57096a8 commit f8300a1

File tree

58 files changed

+1678
-217
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

58 files changed

+1678
-217
lines changed
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
module.exports = {
22
preset: "ts-jest",
33
testEnvironment: "node",
4+
setupFiles: ["<rootDir>/src/test/jestSetUp.ts"],
45
testPathIgnorePatterns: ["<rootDir>/build"],
56
};

packages/ingest-mongodb-public/src/PublicIngestEnvVars.ts

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,18 @@ export const PUBLIC_INGEST_MONGODB_DOCS_META_ENV_VARS = {
1717
MONGODB_META_DATABASE_NAME: "",
1818
};
1919

20+
export const INGEST_DEVCENTER_ENV_VARS = {
21+
DEVCENTER_CONNECTION_URI: "",
22+
};
23+
24+
export const INGEST_MDBU_ENV_VARS = {
25+
UNIVERSITY_DATA_API_KEY: "",
26+
};
27+
2028
export const PUBLIC_INGEST_ENV_VARS = {
2129
...CORE_CHATBOT_APP_ENV_VARS,
2230
...CORE_OPENAI_EMBEDDING_ENV_VARS,
2331
...INGEST_ENV_VARS,
32+
...INGEST_DEVCENTER_ENV_VARS,
33+
...INGEST_MDBU_ENV_VARS,
2434
};

packages/ingest-mongodb-public/src/meta.config.ts

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,4 @@
1-
import {
2-
Config,
3-
INGEST_ENV_VARS,
4-
makeIngestMetaStore,
5-
} from "mongodb-rag-ingest";
1+
import { Config, makeIngestMetaStore } from "mongodb-rag-ingest";
62
import { standardChunkFrontMatterUpdater } from "mongodb-rag-ingest/embed";
73
import {
84
assertEnvVars,
@@ -12,12 +8,12 @@ import {
128
OpenAIClient,
139
AzureKeyCredential,
1410
} from "mongodb-rag-core";
15-
import { snootyDataApiBaseUrl } from "./sources/snooty";
16-
import { makeSnootyDataSource } from "mongodb-rag-ingest/sources/snooty";
11+
import { makeSnootyDataSource } from "./sources/snooty";
1712
import {
1813
PUBLIC_INGEST_ENV_VARS,
1914
PUBLIC_INGEST_MONGODB_DOCS_META_ENV_VARS,
2015
} from "./PublicIngestEnvVars";
16+
import { snootyDataApiBaseUrl } from "./sources/snootySources";
2117

2218
const {
2319
OPENAI_ENDPOINT,

packages/mongodb-rag-ingest/src/sources/DevCenterDataSource.test.ts renamed to packages/ingest-mongodb-public/src/sources/DevCenterDataSource.test.ts

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,16 @@
1-
import { strict as assert } from "assert";
21
import Path from "path";
32
import fs from "fs";
43
import {
54
makeDevCenterPageBody,
65
makeDevCenterDataSource,
76
makeDevCenterPage,
87
} from "./DevCenterDataSource";
9-
8+
import { assertEnvVars } from "mongodb-rag-core";
9+
import { INGEST_DEVCENTER_ENV_VARS } from "../PublicIngestEnvVars";
1010
import "dotenv/config";
1111

12+
const { DEVCENTER_CONNECTION_URI } = assertEnvVars(INGEST_DEVCENTER_ENV_VARS);
13+
1214
const SRC_ROOT = Path.resolve(__dirname, "../");
1315

1416
const devCenterDoc = JSON.parse(
@@ -21,18 +23,14 @@ const devCenterDoc = JSON.parse(
2123
);
2224
describe("DevCenterDataSource", () => {
2325
jest.setTimeout(90000);
24-
const { DEVCENTER_CONNECTION_URI } = process.env;
2526
it("loads pages from dev center", async () => {
26-
assert(
27-
DEVCENTER_CONNECTION_URI !== undefined,
28-
"env var DEVCENTER_CONNECTION_URI not defined. Did you copy .env.example to .env and fill it in?"
29-
);
3027
const source = await makeDevCenterDataSource({
3128
type: "devcenter",
3229
name: "devcenter",
3330
collectionName: "search_content_prod",
3431
databaseName: "devcenter",
3532
baseUrl: "https://example.com/developer",
33+
connectionUri: DEVCENTER_CONNECTION_URI,
3634
});
3735

3836
const pages = await source.fetchPages();

packages/mongodb-rag-ingest/src/sources/DevCenterDataSource.ts renamed to packages/ingest-mongodb-public/src/sources/DevCenterDataSource.ts

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,18 @@
1-
import { MongoClient } from "mongodb-rag-core";
1+
import { logger, MongoClient, Page } from "mongodb-rag-core";
22
import { strict as assert } from "assert";
33
import { convert } from "html-to-text";
4-
import { Page, assertEnvVars, logger } from "mongodb-rag-core";
5-
import { INGEST_ENV_VARS } from "../IngestEnvVars";
6-
import { removeMarkdownImagesAndLinks } from "./removeMarkdownImagesAndLinks";
7-
import { DataSource } from "./DataSource";
8-
import { ProjectBase } from "./ProjectBase";
4+
import {
5+
DataSource,
6+
ProjectBase,
7+
removeMarkdownImagesAndLinks,
8+
} from "mongodb-rag-ingest/sources";
99

1010
export type DevCenterProjectConfig = ProjectBase & {
1111
type: "devcenter";
1212
databaseName: string;
1313
collectionName: string;
1414
baseUrl: string;
15+
connectionUri: string;
1516
};
1617

1718
// This type is based on what's in the DevCenter search_content_prod collection
@@ -34,13 +35,12 @@ export const makeDevCenterDataSource = async ({
3435
databaseName,
3536
collectionName,
3637
baseUrl,
38+
connectionUri,
3739
}: DevCenterProjectConfig): Promise<DataSource> => {
38-
const { DEVCENTER_CONNECTION_URI } = assertEnvVars(INGEST_ENV_VARS);
39-
4040
return {
4141
name,
4242
async fetchPages() {
43-
const client = await new MongoClient(DEVCENTER_CONNECTION_URI).connect();
43+
const client = await new MongoClient(connectionUri).connect();
4444
try {
4545
const db = client.db(databaseName);
4646
const collection = db.collection<DevCenterEntry>(collectionName);

packages/ingest-mongodb-public/src/sources/index.ts

Lines changed: 19 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2,28 +2,35 @@ import { strict as assert } from "assert";
22
import { Page, extractFrontMatter } from "mongodb-rag-core";
33
import {
44
DataSource,
5-
makeDevCenterDataSource,
6-
DevCenterProjectConfig,
75
makeGitDataSource,
8-
HandleHtmlPageFuncOptions,
9-
handleHtmlDocument,
106
MakeMdOnGithubDataSourceParams,
117
makeMdOnGithubDataSource,
128
removeMarkdownImagesAndLinks,
13-
MakeMongoDbUniversityDataSourceParams,
14-
makeMongoDbUniversityDataSource,
15-
filterOnlyPublicActiveTiCatalogItems,
169
} from "mongodb-rag-ingest/sources";
17-
import { prepareSnootySources } from "mongodb-rag-ingest/sources/snooty";
1810
import { prismaSourceConstructor } from "./prisma";
1911
import { wiredTigerSourceConstructor } from "./wiredTiger";
2012
import { mongooseSourceConstructor } from "./mongoose";
2113
import { practicalAggregationsDataSource } from "./practicalAggregations";
2214
import {
15+
makeSnootyDataSources,
2316
snootyDataApiBaseUrl,
2417
snootyProjectConfig,
25-
makeSnootyDataSources,
26-
} from "./snooty";
18+
} from "./snootySources";
19+
20+
import { assertEnvVars } from "mongodb-rag-core";
21+
import { PUBLIC_INGEST_ENV_VARS } from "../PublicIngestEnvVars";
22+
import {
23+
DevCenterProjectConfig,
24+
makeDevCenterDataSource,
25+
} from "./DevCenterDataSource";
26+
import {
27+
MakeMongoDbUniversityDataSourceParams,
28+
filterOnlyPublicActiveTiCatalogItems,
29+
makeMongoDbUniversityDataSource,
30+
} from "./mongodb-university";
31+
const { DEVCENTER_CONNECTION_URI, UNIVERSITY_DATA_API_KEY } = assertEnvVars(
32+
PUBLIC_INGEST_ENV_VARS
33+
);
2734

2835
/**
2936
Async constructor for specific data sources -- parameters baked in.
@@ -36,10 +43,11 @@ export const devCenterProjectConfig: DevCenterProjectConfig = {
3643
collectionName: "search_content_prod",
3744
databaseName: "devcenter",
3845
baseUrl: "https://www.mongodb.com/developer",
46+
connectionUri: DEVCENTER_CONNECTION_URI,
3947
};
4048

4149
const mongoDbUniversitySourceConstructor = async () => {
42-
const universityDataApiKey = process.env.UNIVERSITY_DATA_API_KEY;
50+
const universityDataApiKey = UNIVERSITY_DATA_API_KEY;
4351
assert(!!universityDataApiKey, "UNIVERSITY_DATA_API_KEY required");
4452
const universityConfig: MakeMongoDbUniversityDataSourceParams = {
4553
sourceName: "mongodb-university",

packages/mongodb-rag-ingest/src/sources/mongodb-university/MongoDbUniversityDataSource.ts renamed to packages/ingest-mongodb-public/src/sources/mongodb-university/MongoDbUniversityDataSource.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import { PageMetadata } from "mongodb-rag-core";
2-
import { DataSource } from "../DataSource";
2+
import { DataSource } from "../../../../mongodb-rag-ingest/src/sources/DataSource";
33
import { makeUniversityPages } from "./makeUniversityPages";
44
import {
55
TiCatalogItem,

0 commit comments

Comments
 (0)