Skip to content

Commit 3367b76

Browse files
authored
update upload_training_files format (#613)
* update upload_training_files format Signed-off-by: Yue, Wenjiao <[email protected]>
1 parent 9007212 commit 3367b76

File tree

5 files changed

+92
-28
lines changed

5 files changed

+92
-28
lines changed

comps/cores/proto/api_protocol.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -761,3 +761,39 @@ class FineTuningJobList(BaseModel):
761761
762762
If true, additional requests can be made to retrieve more jobs.
763763
"""
764+
765+
766+
class UploadFileRequest(BaseModel):
767+
purpose: str
768+
"""The intended purpose of the uploaded file.
769+
770+
Use "assistants" for Assistants and Message files, "vision" for Assistants image file inputs, "batch" for Batch API, and "fine-tune" for Fine-tuning.
771+
"""
772+
773+
file: UploadFile
774+
"""The File object (not file name) to be uploaded."""
775+
776+
777+
class FileObject(BaseModel):
778+
# Ordered by official OpenAI API documentation
779+
# https://platform.openai.com/docs/api-reference/files/object
780+
id: str
781+
"""The file identifier, which can be referenced in the API endpoints."""
782+
783+
bytes: int
784+
"""The size of the file, in bytes."""
785+
786+
created_at: int
787+
"""The Unix timestamp (in seconds) for when the file was created."""
788+
789+
filename: str
790+
"""The name of the file."""
791+
792+
object: str = "file"
793+
"""The object type, which is always file."""
794+
795+
purpose: str
796+
"""The intended purpose of the file.
797+
798+
Supported values are assistants, assistants_output, batch, batch_output, fine-tune, fine-tune-results and vision.
799+
"""

comps/finetuning/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ Assuming a training file `alpaca_data.json` is uploaded, it can be downloaded in
9393
```bash
9494
# upload a training file
9595

96-
curl http://${your_ip}:8015/v1/finetune/upload_training_files -X POST -H "Content-Type: multipart/form-data" -F "files=@./alpaca_data.json"
96+
curl http://${your_ip}:8015/v1/files -X POST -H "Content-Type: multipart/form-data" -F "file=@./alpaca_data.json" -F purpose="fine-tune"
9797

9898
# create a finetuning job
9999
curl http://${your_ip}:8015/v1/fine_tuning/jobs \

comps/finetuning/finetuning_service.py

Lines changed: 8 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,18 @@
11
# Copyright (C) 2024 Intel Corporation
22
# SPDX-License-Identifier: Apache-2.0
3-
4-
import os
5-
import urllib.parse
6-
from typing import List, Optional, Union
7-
8-
from fastapi import BackgroundTasks, File, UploadFile
3+
from fastapi import BackgroundTasks, Depends
94

105
from comps import opea_microservices, register_microservice
11-
from comps.cores.proto.api_protocol import FineTuningJobIDRequest
6+
from comps.cores.proto.api_protocol import FineTuningJobIDRequest, UploadFileRequest
127
from comps.finetuning.finetune_config import FineTuningParams
138
from comps.finetuning.handlers import (
14-
DATASET_BASE_PATH,
159
handle_cancel_finetuning_job,
1610
handle_create_finetuning_jobs,
1711
handle_list_finetuning_checkpoints,
1812
handle_list_finetuning_jobs,
1913
handle_retrieve_finetuning_job,
20-
save_content_to_local_disk,
14+
handle_upload_training_files,
15+
upload_file,
2116
)
2217

2318

@@ -51,22 +46,13 @@ def cancel_finetuning_job(request: FineTuningJobIDRequest):
5146

5247
@register_microservice(
5348
name="opea_service@finetuning",
54-
endpoint="/v1/finetune/upload_training_files",
49+
endpoint="/v1/files",
5550
host="0.0.0.0",
5651
port=8015,
5752
)
58-
async def upload_training_files(
59-
files: Optional[Union[UploadFile, List[UploadFile]]] = File(None),
60-
):
61-
if files:
62-
if not isinstance(files, list):
63-
files = [files]
64-
for file in files:
65-
filename = urllib.parse.quote(file.filename, safe="")
66-
save_path = os.path.join(DATASET_BASE_PATH, filename)
67-
await save_content_to_local_disk(save_path, file)
68-
69-
return {"status": 200, "message": "Training files uploaded."}
53+
async def upload_training_files(request: UploadFileRequest = Depends(upload_file)):
54+
uploadFileInfo = await handle_upload_training_files(request)
55+
return uploadFileInfo
7056

7157

7258
@register_microservice(

comps/finetuning/handlers.py

Lines changed: 35 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,16 +4,24 @@
44
import os
55
import random
66
import time
7+
import urllib.parse
78
import uuid
89
from pathlib import Path
910
from typing import Dict
1011

11-
from fastapi import BackgroundTasks, HTTPException
12+
from fastapi import BackgroundTasks, File, Form, HTTPException, UploadFile
1213
from pydantic_yaml import parse_yaml_raw_as, to_yaml_file
1314
from ray.job_submission import JobSubmissionClient
1415

1516
from comps import CustomLogger
16-
from comps.cores.proto.api_protocol import FineTuningJob, FineTuningJobIDRequest, FineTuningJobList
17+
from comps.cores.proto.api_protocol import (
18+
FileObject,
19+
FineTuningJob,
20+
FineTuningJobIDRequest,
21+
FineTuningJobList,
22+
FineTuningJobsRequest,
23+
UploadFileRequest,
24+
)
1725
from comps.finetuning.finetune_config import FinetuneConfig, FineTuningParams
1826

1927
logger = CustomLogger("finetuning_handlers")
@@ -185,3 +193,28 @@ def handle_list_finetuning_checkpoints(request: FineTuningJobIDRequest):
185193
if os.path.exists(output_dir):
186194
checkpoints = os.listdir(output_dir)
187195
return checkpoints
196+
197+
198+
async def upload_file(purpose: str = Form(...), file: UploadFile = File(...)):
199+
return UploadFileRequest(purpose=purpose, file=file)
200+
201+
202+
async def handle_upload_training_files(request: UploadFileRequest):
203+
file = request.file
204+
if file is None:
205+
raise HTTPException(status_code=404, detail="upload file failed!")
206+
filename = urllib.parse.quote(file.filename, safe="")
207+
save_path = os.path.join(DATASET_BASE_PATH, filename)
208+
await save_content_to_local_disk(save_path, file)
209+
210+
fileBytes = os.path.getsize(save_path)
211+
fileInfo = FileObject(
212+
id=f"file-{uuid.uuid4()}",
213+
object="file",
214+
bytes=fileBytes,
215+
created_at=int(time.time()),
216+
filename=filename,
217+
purpose="fine-tune",
218+
)
219+
220+
return fileInfo

tests/test_finetuning.sh

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,21 +33,30 @@ function validate_microservice() {
3333
export no_proxy="localhost,127.0.0.1,"${ip_address}
3434

3535
# test /v1/dataprep upload file
36-
URL="http://${ip_address}:$finetuning_service_port/v1/finetune/upload_training_files"
36+
URL="http://${ip_address}:$finetuning_service_port/v1/files"
3737
echo '[{"instruction": "Give three tips for staying healthy.", "input": "", "output": "1.Eat a balanced diet and make sure to include plenty of fruits and vegetables. \n2. Exercise regularly to keep your body active and strong. \n3. Get enough sleep and maintain a consistent sleep schedule."},{"instruction": "Give three tips for staying healthy.", "input": "", "output": "1.Eat a balanced diet and make sure to include plenty of fruits and vegetables. \n2. Exercise regularly to keep your body active and strong. \n3. Get enough sleep and maintain a consistent sleep schedule."},{"instruction": "Give three tips for staying healthy.", "input": "", "output": "1.Eat a balanced diet and make sure to include plenty of fruits and vegetables. \n2. Exercise regularly to keep your body active and strong. \n3. Get enough sleep and maintain a consistent sleep schedule."},{"instruction": "Give three tips for staying healthy.", "input": "", "output": "1.Eat a balanced diet and make sure to include plenty of fruits and vegetables. \n2. Exercise regularly to keep your body active and strong. \n3. Get enough sleep and maintain a consistent sleep schedule."},{"instruction": "Give three tips for staying healthy.", "input": "", "output": "1.Eat a balanced diet and make sure to include plenty of fruits and vegetables. \n2. Exercise regularly to keep your body active and strong. \n3. Get enough sleep and maintain a consistent sleep schedule."},{"instruction": "Give three tips for staying healthy.", "input": "", "output": "1.Eat a balanced diet and make sure to include plenty of fruits and vegetables. \n2. Exercise regularly to keep your body active and strong. \n3. Get enough sleep and maintain a consistent sleep schedule."},{"instruction": "Give three tips for staying healthy.", "input": "", "output": "1.Eat a balanced diet and make sure to include plenty of fruits and vegetables. \n2. Exercise regularly to keep your body active and strong. \n3. Get enough sleep and maintain a consistent sleep schedule."},{"instruction": "Give three tips for staying healthy.", "input": "", "output": "1.Eat a balanced diet and make sure to include plenty of fruits and vegetables. \n2. Exercise regularly to keep your body active and strong. \n3. Get enough sleep and maintain a consistent sleep schedule."},{"instruction": "Give three tips for staying healthy.", "input": "", "output": "1.Eat a balanced diet and make sure to include plenty of fruits and vegetables. \n2. Exercise regularly to keep your body active and strong. \n3. Get enough sleep and maintain a consistent sleep schedule."},{"instruction": "Give three tips for staying healthy.", "input": "", "output": "1.Eat a balanced diet and make sure to include plenty of fruits and vegetables. \n2. Exercise regularly to keep your body active and strong. \n3. Get enough sleep and maintain a consistent sleep schedule."},{"instruction": "Give three tips for staying healthy.", "input": "", "output": "1.Eat a balanced diet and make sure to include plenty of fruits and vegetables. \n2. Exercise regularly to keep your body active and strong. \n3. Get enough sleep and maintain a consistent sleep schedule."},{"instruction": "Give three tips for staying healthy.", "input": "", "output": "1.Eat a balanced diet and make sure to include plenty of fruits and vegetables. \n2. Exercise regularly to keep your body active and strong. \n3. Get enough sleep and maintain a consistent sleep schedule."},{"instruction": "Give three tips for staying healthy.", "input": "", "output": "1.Eat a balanced diet and make sure to include plenty of fruits and vegetables. \n2. Exercise regularly to keep your body active and strong. \n3. Get enough sleep and maintain a consistent sleep schedule."},{"instruction": "Give three tips for staying healthy.", "input": "", "output": "1.Eat a balanced diet and make sure to include plenty of fruits and vegetables. \n2. Exercise regularly to keep your body active and strong. \n3. Get enough sleep and maintain a consistent sleep schedule."},{"instruction": "Give three tips for staying healthy.", "input": "", "output": "1.Eat a balanced diet and make sure to include plenty of fruits and vegetables. \n2. Exercise regularly to keep your body active and strong. \n3. Get enough sleep and maintain a consistent sleep schedule."},{"instruction": "Give three tips for staying healthy.", "input": "", "output": "1.Eat a balanced diet and make sure to include plenty of fruits and vegetables. \n2. Exercise regularly to keep your body active and strong. \n3. Get enough sleep and maintain a consistent sleep schedule."},{"instruction": "Give three tips for staying healthy.", "input": "", "output": "1.Eat a balanced diet and make sure to include plenty of fruits and vegetables. \n2. Exercise regularly to keep your body active and strong. \n3. Get enough sleep and maintain a consistent sleep schedule."},{"instruction": "Give three tips for staying healthy.", "input": "", "output": "1.Eat a balanced diet and make sure to include plenty of fruits and vegetables. \n2. Exercise regularly to keep your body active and strong. \n3. Get enough sleep and maintain a consistent sleep schedule."},{"instruction": "Give three tips for staying healthy.", "input": "", "output": "1.Eat a balanced diet and make sure to include plenty of fruits and vegetables. \n2. Exercise regularly to keep your body active and strong. \n3. Get enough sleep and maintain a consistent sleep schedule."},{"instruction": "Give three tips for staying healthy.", "input": "", "output": "1.Eat a balanced diet and make sure to include plenty of fruits and vegetables. \n2. Exercise regularly to keep your body active and strong. \n3. Get enough sleep and maintain a consistent sleep schedule."}]' > $LOG_PATH/test_data.json
38-
HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -F 'files=@./test_data.json' -H 'Content-Type: multipart/form-data' "$URL")
38+
HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -F 'file=@./test_data.json' -F purpose="fine-tune" -H 'Content-Type: multipart/form-data' "$URL")
3939
HTTP_STATUS=$(echo $HTTP_RESPONSE | tr -d '\n' | sed -e 's/.*HTTPSTATUS://')
4040
RESPONSE_BODY=$(echo $HTTP_RESPONSE | sed -e 's/HTTPSTATUS\:.*//g')
4141
SERVICE_NAME="finetuning-server - upload - file"
4242

43+
# Parse the JSON response
44+
purpose=$(echo "$RESPONSE_BODY" | jq -r '.purpose')
45+
filename=$(echo "$RESPONSE_BODY" | jq -r '.filename')
46+
47+
# Define expected values
48+
expected_purpose="fine-tune"
49+
expected_filename="test_data.json"
50+
4351
if [ "$HTTP_STATUS" -ne "200" ]; then
4452
echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS"
4553
docker logs finetuning-server >> ${LOG_PATH}/finetuning-server_upload_file.log
4654
exit 1
4755
else
4856
echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..."
4957
fi
50-
if [[ "$RESPONSE_BODY" != *"Training files uploaded"* ]]; then
58+
# Check if the parsed values match the expected values
59+
if [[ "$purpose" != "$expected_purpose" || "$filename" != "$expected_filename" ]]; then
5160
echo "[ $SERVICE_NAME ] Content does not match the expected result: $RESPONSE_BODY"
5261
docker logs finetuning-server >> ${LOG_PATH}/finetuning-server_upload_file.log
5362
exit 1

0 commit comments

Comments
 (0)