Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -171,12 +171,20 @@ bash prepare_model.sh -s <s3_bucket_name>

### 2. Deploy the CDK Stack

For global regions, execute the following commands:
```bash
cd generative-bi-using-rag/source/resources
npm install
npx cdk deploy
```

For China regions, execute the following commands:
```bash
cd generative-bi-using-rag/source/resources
npm install
npx cdk deploy --parameters S3ModelAssetsBucket=<s3_bucket_name>
```

### 3. Access the Streamlit Web UI
After the CDK stack is deployed, wait around 10 minutes for the initialization to complete. Then, open the Streamlit Web UI in your browser: `http://<your-ec2-public-ip>`

Expand Down
23 changes: 23 additions & 0 deletions application/.env.cntemplate
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
RDS_MYSQL_USERNAME=llmdata
RDS_MYSQL_PASSWORD=llmdata
RDS_MYSQL_HOST=mysql-db
RDS_MYSQL_PORT=3306
RDS_MYSQL_DBNAME=llm

AOS_HOST=opensearch-node1
AOS_PORT=9200
AOS_AWS_REGION=cn-north-1
AOS_DOMAIN=llm-data-analytics
AOS_INDEX=uba
AOS_INDEX_NER=uba_ner
AOS_USER=admin
AOS_PASSWORD=admin

BEDROCK_REGION=cn-north-1
RDS_REGION_NAME=cn-north-1
AWS_DEFAULT_REGION=cn-north-1

SAGEMAKER_ENDPOINT_EMBEDDING=embedding-bge-m3-3ab71
SAGEMAKER_ENDPOINT_INTENT=llm-internlm2-chat-7b-3ab71
SAGEMAKER_ENDPOINT_SQL=sql-sqlcoder-7b-2-7e5b6
SAGEMAKER_ENDPOINT_EXPLAIN=llm-internlm2-chat-7b-3ab71
8 changes: 4 additions & 4 deletions application/.env.template
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,15 @@ RDS_MYSQL_DBNAME=llm

AOS_HOST=opensearch-node1
AOS_PORT=9200
AOS_AWS_REGION=ap-northeast-1
AOS_AWS_REGION=us-west-2
AOS_DOMAIN=llm-data-analytics
AOS_INDEX=uba
AOS_INDEX_NER=uba_ner
AOS_USER=admin
AOS_PASSWORD=admin

BEDROCK_REGION=us-west-2
RDS_REGION_NAME=ap-northeast-1
AWS_DEFAULT_REGION=ap-northeast-1
RDS_REGION_NAME=us-west-2
AWS_DEFAULT_REGION=us-west-2

DYNAMODB_AWS_REGION=us-west-2
DYNAMODB_AWS_REGION=us-west-2
22 changes: 17 additions & 5 deletions source/model/sqlcoder/code/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,14 @@ def generate_prompt(question):
return prompt


def stream_items(sql_query):
chunks = sql_query.split("\n")
for chunk in chunks:
stream_buffer = chunk + "\n"
logging.info(f"Stream buffer: {stream_buffer}")
yield stream_buffer


def handle(inputs: Input):
global tokenizer, model
if not model:
Expand All @@ -100,7 +108,7 @@ def handle(inputs: Input):
data = inputs.get_as_json()

prompt = data["prompt"]
outputs = Output()
stream = data.get("stream", False)

# updated_prompt = generate_prompt(prompt)
updated_prompt = prompt
Expand All @@ -116,10 +124,14 @@ def handle(inputs: Input):
num_beams=1,
)
decoded_outputs = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
sql_query = sqlparse.format(decoded_outputs[0].split("[SQL]")[-1], reindent=True)
logging.info(f"SQL Query: {sql_query}")

result = {
"outputs": sqlparse.format(decoded_outputs[0].split("[SQL]")[-1], reindent=True)
}
outputs.add_as_json(result)
outputs = Output()
# split SQL query every into chunks containing 10 characters
if stream:
outputs.add_stream_content(stream_items(sql_query), output_formatter=None)
else:
outputs.add_as_json({"outputs": sql_query})

return outputs
2 changes: 1 addition & 1 deletion source/model/sqlcoder/code/serving.properties
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
engine=Python
option.tensor_parallel_degree=1
option.enable_streaming=true
# update according to your own path
# option.s3url = s3://<_S3ModelAssets>/<_AssetsStack._embeddingModelPrefix>
option.s3url = s3://llm-bot-models-256374081253-cn-north-1/sqlcoder-7b-2/
2 changes: 1 addition & 1 deletion source/model/sqlcoder/model/serving.properties
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
engine=Python
option.tensor_parallel_degree=tpd
option.enable_streaming=true
# update according to your own path
# option.s3url = s3://<_S3ModelAssets>/<_AssetsStack._embeddingModelPrefix>
option.s3url = S3PATH
22 changes: 17 additions & 5 deletions source/model/sqlcoder/model/sqlcoder-7b-2_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,14 @@ def generate_prompt(question):
return prompt


def stream_items(sql_query):
chunks = sql_query.split("\n")
for chunk in chunks:
stream_buffer = chunk + "\n"
logging.info(f"Stream buffer: {stream_buffer}")
yield stream_buffer


def handle(inputs: Input):
global tokenizer, model
if not model:
Expand All @@ -100,7 +108,7 @@ def handle(inputs: Input):
data = inputs.get_as_json()

prompt = data["prompt"]
outputs = Output()
stream = data.get("stream", False)

# updated_prompt = generate_prompt(prompt)
updated_prompt = prompt
Expand All @@ -116,10 +124,14 @@ def handle(inputs: Input):
num_beams=1,
)
decoded_outputs = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
sql_query = sqlparse.format(decoded_outputs[0].split("[SQL]")[-1], reindent=True)
logging.info(f"SQL Query: {sql_query}")

result = {
"outputs": sqlparse.format(decoded_outputs[0].split("[SQL]")[-1], reindent=True)
}
outputs.add_as_json(result)
outputs = Output()
# split SQL query every into chunks containing 10 characters
if stream:
outputs.add_stream_content(stream_items(sql_query), output_formatter=None)
else:
outputs.add_as_json({"outputs": sql_query})

return outputs
30 changes: 30 additions & 0 deletions source/resources/lib/ec2/cn_user_data/install_docker.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# Sleep 60 seconds to avoid yum failures
# Ref: https://repost.aws/questions/QUgNz4VGCFSC2TYekM-6GiDQ/dnf-yum-both-fails-while-being-executed-on-instance-bootstrap-on-amazon-linux-2023
sleep 60

sudo su - ec2-user

# Install components
sudo yum install docker python3-pip git -y

sudo pip3 config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple
sudo pip3 install -U awscli

# Remove python3-requests to avoid conflict with docker-compose
# Ref: https://stackoverflow.com/questions/76443104/error-cannot-uninstall-requests-2-25-1-record-file-not-found-hint-the-packag
sudo yum -y remove python3-requests

sudo pip3 install docker-compose

# Fix docker python wrapper 7.0 SSL version issue
sudo pip3 install docker==6.1.3

# Configure components
sudo systemctl enable docker
sudo systemctl start docker
echo "finishing starting docker"
sudo usermod -aG docker ec2-user
echo "finishing adding user to docker group"

# Exit the terminal
exit
39 changes: 39 additions & 0 deletions source/resources/lib/ec2/cn_user_data/setup_app.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
# Log in as user ec2-user

# Configure OpenSearch server parameters
sudo sh -c "echo 'vm.max_map_count=262144' > /etc/sysctl.conf" && sudo sysctl -p

# Clone the code
# git clone https://github.com/aws-samples/generative-bi-using-rag.git
wget https://aws-genbi-guidance-asset.s3.us-west-2.amazonaws.com/asset/code/genbi-guidance-asset.zip
unzip genbi-guidance-asset.zip

# Config the Environment Variable in .env file, modify AWS_DEFAULT_REGION to the region same as the EC2 instance.
cd genbi-guidance-asset/application && cp .env.cntemplate .env

file_path=".env"
ec2_region=`curl -s http://169.254.169.254/latest/meta-data/placement/region`
sed -i "s|AOS_AWS_REGION=ap-northeast-1|AOS_AWS_REGION=$ec2_region|g" $file_path
sed -i "s|RDS_REGION_NAME=ap-northeast-1|RDS_REGION_NAME=$ec2_region|g" $file_path
sed -i "s|AWS_DEFAULT_REGION=ap-northeast-1|AWS_DEFAULT_REGION=$ec2_region|g" $file_path
sed -i "s|DYNAMODB_AWS_REGION=us-west-2|AWS_DEFAULT_REGION=$ec2_region|g" $file_path


# # Build docker images locally
# docker-compose build

# # Start all services
# docker-compose up -d

# # Wait 3 minutes for MySQL and OpenSearch to initialize
# sleep 180

# cd initial_data && wget https://github.com/fengxu1211/generative-bi-using-rag/raw/demo_data/application/initial_data/init_mysql_db.sql.zip

# unzip init_mysql_db.sql.zip && cd ..

# docker exec nlq-mysql sh -c "mysql -u root -ppassword -D llm < /opt/data/init_mysql_db.sql"

# docker exec nlq-webserver python opensearch_deploy.py

# echo "All services are started successfully. Please access the application at http://<ec2-public-ip>"
11 changes: 9 additions & 2 deletions source/resources/lib/ec2/ec2-stack.ts
Original file line number Diff line number Diff line change
Expand Up @@ -66,13 +66,20 @@ export class Ec2Stack extends NestedStack {
}],
});

const installDockerAsset = new Asset(this, 'InstallDockerAsset', { path: path.join(__dirname, 'user_data/install_docker.sh') });
let installDockerAsset;
let setupAppAsset;
if (props.env?.region === "cn-north-1" || props.env?.region === "cn-northwest-1") {
installDockerAsset = new Asset(this, 'InstallDockerAsset', { path: path.join(__dirname, 'cn_user_data/install_docker.sh') });
setupAppAsset = new Asset(this, 'SetupAppAsset', { path: path.join(__dirname, 'cn_user_data/setup_app.sh') });
} else {
installDockerAsset = new Asset(this, 'InstallDockerAsset', { path: path.join(__dirname, 'user_data/install_docker.sh') });
setupAppAsset = new Asset(this, 'SetupAppAsset', { path: path.join(__dirname, 'user_data/setup_app.sh') });
}
const installDockerLocalPath = ec2Instance.userData.addS3DownloadCommand({
bucket: installDockerAsset.bucket,
bucketKey: installDockerAsset.s3ObjectKey,
});

const setupAppAsset = new Asset(this, 'SetupAppAsset', { path: path.join(__dirname, 'user_data/setup_app.sh') });
const setupAppLocalPath = ec2Instance.userData.addS3DownloadCommand({
bucket: setupAppAsset.bucket,
bucketKey: setupAppAsset.s3ObjectKey,
Expand Down
39 changes: 24 additions & 15 deletions source/resources/lib/main-stack.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,26 +11,35 @@ export class MainStack extends Stack {
const s3ModelAssetsBucket = new CfnParameter(this, "S3ModelAssetsBucket", {
type: "String",
description: "S3 Bucket for model & code assets",
default: "not-set"
});

const _LlmStack = new LLMStack(this, 'llm-Stack', {
s3ModelAssets: s3ModelAssetsBucket.valueAsString,
embeddingModelPrefix: 'bge-m3',
embeddingModelVersion: '3ab7155aa9b89ac532b2f2efcc3f136766b91025',
sqlModelPrefix: 'sqlcoder-7b-2',
sqlModelVersion: '7e5b6f7981c0aa7d143f6bec6fa26625bdfcbe66',
llmModelPrefix: 'internlm2-chat-7b',
llmModelVersion: '54a594b0be43065e7b7674d0f236911cd7c465ab',
let _LlmStack;

if (props.env?.region === "cn-north-1" || props.env?.region === "cn-northwest-1") {
_LlmStack = new LLMStack(this, 'llm-Stack', {
s3ModelAssets: s3ModelAssetsBucket.valueAsString,
embeddingModelPrefix: 'bge-m3',
embeddingModelVersion: '3ab7155aa9b89ac532b2f2efcc3f136766b91025',
sqlModelPrefix: 'sqlcoder-7b-2',
sqlModelVersion: '7e5b6f7981c0aa7d143f6bec6fa26625bdfcbe66',
llmModelPrefix: 'internlm2-chat-7b',
llmModelVersion: '54a594b0be43065e7b7674d0f236911cd7c465ab',
env: props.env || {},
});
}

const _Ec2Stack = new Ec2Stack(this, 'ec2-Stack', {
env: props.env,
});

// const _Ec2Stack = new Ec2Stack(this, 'ec2-Stack', {
// env: props.env,
// });
if (_LlmStack) {
_Ec2Stack.addDependency(_LlmStack);
}

// new CfnOutput(this, 'Ec2PublicIP', {
// value: _Ec2Stack._publicIP,
// description: 'Public IP of the EC2 instance',
// });
new CfnOutput(this, 'Ec2PublicIP', {
value: _Ec2Stack._publicIP,
description: 'Public IP of the EC2 instance',
});
}
}
13 changes: 7 additions & 6 deletions source/resources/lib/model/llm-stack.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ interface LLMStackProps extends cdk.StackProps {
sqlModelVersion: string;
llmModelPrefix: string;
llmModelVersion: string;
env: cdk.Environment;
}

export class LLMStack extends cdk.NestedStack {
Expand All @@ -24,12 +25,12 @@ export class LLMStack extends cdk.NestedStack {
super(scope, id, props);

const llmImageUrlDomain =
this.region === "cn-north-1" || this.region === "cn-northwest-1"
props.env.region === "cn-north-1" || props.env.region === "cn-northwest-1"
? ".amazonaws.com.cn/"
: ".amazonaws.com/";

const llmImageUrlAccount =
this.region === "cn-north-1" || this.region === "cn-northwest-1"
props.env.region === "cn-north-1" || props.env.region === "cn-northwest-1"
? "727897471807.dkr.ecr."
: "763104351884.dkr.ecr.";

Expand All @@ -54,7 +55,7 @@ export class LLMStack extends cdk.NestedStack {
"embedding-" + embeddingModelPrefix + "-" + embeddingVersionId.slice(0, 5);
const embeddingImageUrl =
llmImageUrlAccount +
this.region +
props.env.region +
llmImageUrlDomain +
"djl-inference:0.26.0-deepspeed0.12.6-cu121";
const embeddingModel = new sagemaker.CfnModel(this, embeddingModelName, {
Expand Down Expand Up @@ -116,7 +117,7 @@ export class LLMStack extends cdk.NestedStack {

const sqlImageUrl =
llmImageUrlAccount +
this.region +
props.env.region +
llmImageUrlDomain +
"djl-inference:0.26.0-deepspeed0.12.6-cu121";
const sqlModel = new sagemaker.CfnModel(this, sqlModelName, {
Expand Down Expand Up @@ -166,7 +167,7 @@ export class LLMStack extends cdk.NestedStack {


// INSTRUCT MODEL
// Create model, BucketDeployment construct automatically handles dependencies to ensure model assets uploaded before creating the model in this.region
// Create model, BucketDeployment construct automatically handles dependencies to ensure model assets uploaded before creating the model in props.env.region
// Instruct MODEL
const llmModelPrefix = props.llmModelPrefix;
const llmCodePrefix = llmModelPrefix + "_deploy_code";
Expand All @@ -179,7 +180,7 @@ export class LLMStack extends cdk.NestedStack {

const llmImageUrl =
llmImageUrlAccount +
this.region +
props.env.region +
llmImageUrlDomain +
"djl-inference:0.26.0-deepspeed0.12.6-cu121";
const llmModel = new sagemaker.CfnModel(this, llmModelName, {
Expand Down