Skip to content

Commit 3e548f3

Browse files
BUG FIX: LVM security fix (#572)
* add url validator Signed-off-by: BaoHuiling <[email protected]> * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * add validation for video_url Signed-off-by: BaoHuiling <[email protected]> --------- Signed-off-by: BaoHuiling <[email protected]> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
1 parent e38ed6d commit 3e548f3

File tree

3 files changed

+84
-34
lines changed

3 files changed

+84
-34
lines changed

comps/lvms/video-llama/server/requirements.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,4 +31,6 @@ torchaudio==0.13.1 --index-url https://download.pytorch.org/whl/cpu
3131
torchvision==0.14.1 --index-url https://download.pytorch.org/whl/cpu
3232
transformers
3333
uvicorn
34+
validators
3435
webdataset
36+
werkzeug

comps/lvms/video-llama/server/server.py

Lines changed: 81 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,14 @@
55
import argparse
66
import logging
77
import os
8+
import re
89
from threading import Thread
910
from urllib.parse import urlparse
1011

1112
import decord
1213
import requests
1314
import uvicorn
15+
import validators
1416
from extract_vl_embedding import VLEmbeddingExtractor as VL
1517
from fastapi import FastAPI, Query
1618
from fastapi.middleware.cors import CORSMiddleware
@@ -21,6 +23,7 @@
2123
from transformers import TextIteratorStreamer, set_seed
2224
from video_llama.common.registry import registry
2325
from video_llama.conversation.conversation_video import Chat
26+
from werkzeug.utils import secure_filename
2427

2528
# Initialize decord bridge and seed
2629
decord.bridge.set_bridge("torch")
@@ -33,7 +36,7 @@
3336
context_db = None
3437
streamer = None
3538
chat = None
36-
VIDEO_DIR = "/home/user/videos"
39+
VIDEO_DIR = "/home/user/comps/lvms/video-llama/server/data"
3740
CFG_PATH = "video_llama_config/video_llama_eval_only_vl.yaml"
3841
MODEL_TYPE = "llama_v2"
3942

@@ -161,6 +164,43 @@ def is_local_file(url):
161164
return not url.startswith("http://") and not url.startswith("https://")
162165

163166

167+
def is_valid_url(url):
168+
# Validate the URL's structure
169+
validation = validators.url(url)
170+
if not validation:
171+
logging.error("URL is invalid")
172+
return False
173+
174+
# Parse the URL to components
175+
parsed_url = urlparse(url)
176+
177+
# Check the scheme
178+
if parsed_url.scheme not in ["http", "https"]:
179+
logging.error("URL scheme is invalid")
180+
return False
181+
182+
# Check for "../" in the path
183+
if "../" in parsed_url.path:
184+
logging.error("URL contains '../', which is not allowed")
185+
return False
186+
187+
# Check that the path only contains one "." for the file extension
188+
if parsed_url.path.count(".") != 1:
189+
logging.error("URL path does not meet the requirement of having only one '.'")
190+
return False
191+
192+
# If all checks pass, the URL is valid
193+
logging.info("URL is valid")
194+
return True
195+
196+
197+
def is_valid_video(filename):
198+
if re.match(r"^[a-zA-Z0-9-_]+\.(mp4)$", filename, re.IGNORECASE):
199+
return secure_filename(filename)
200+
else:
201+
return False
202+
203+
164204
@app.get("/health")
165205
async def health() -> Response:
166206
"""Health check."""
@@ -175,46 +215,54 @@ async def generate(
175215
prompt: str = Query(..., description="Query for Video-LLama", examples="What is the man doing?"),
176216
max_new_tokens: int = Query(150, description="Maximum number of tokens to generate", examples=150),
177217
) -> StreamingResponse:
178-
if not is_local_file(video_url):
179-
parsed_url = urlparse(video_url)
180-
video_name = os.path.basename(parsed_url.path)
181-
else:
182-
video_name = os.path.basename(video_url)
183218

184-
if video_name.lower().endswith(".mp4"):
185-
logging.info(f"Format check passed, the file '{video_name}' is an MP4 file.")
219+
if video_url.lower().endswith(".mp4"):
220+
logging.info(f"Format check passed, the file '{video_url}' is an MP4 file.")
186221
else:
187-
logging.info(f"Format check failed, the file '{video_name}' is not an MP4 file.")
188-
return JSONResponse(status_code=400, content={"message": "Invalid file type. Only mp4 videos are allowed."})
189-
190-
if not is_local_file(video_url):
191-
try:
192-
video_path = os.path.join(VIDEO_DIR, video_name)
193-
response = requests.get(video_url, stream=True)
194-
195-
if response.status_code == 200:
196-
with open(video_path, "wb") as file:
197-
for chunk in response.iter_content(chunk_size=1024):
198-
if chunk: # filter out keep-alive new chunks
199-
file.write(chunk)
200-
logging.info(f"File downloaded: {video_path}")
201-
else:
222+
logging.info(f"Format check failed, the file '{video_url}' is not an MP4 file.")
223+
return JSONResponse(status_code=500, content={"message": "Invalid file type. Only mp4 videos are allowed."})
224+
225+
if is_local_file(video_url):
226+
# validate the video name
227+
if is_valid_video(video_url):
228+
secure_video_name = is_valid_video(video_url) # only support video name without path
229+
else:
230+
return JSONResponse(status_code=500, content={"message": "Invalid file name."})
231+
232+
video_path = os.path.join(VIDEO_DIR, secure_video_name)
233+
if os.path.exists(video_path):
234+
logging.info(f"File found: {video_path}")
235+
else:
236+
logging.error(f"File not found: {video_path}")
237+
return JSONResponse(
238+
status_code=404, content={"message": "File not found. Only local files under data folder are allowed."}
239+
)
240+
else:
241+
# validate the remote URL
242+
if not is_valid_url(video_url):
243+
return JSONResponse(status_code=500, content={"message": "Invalid URL."})
244+
else:
245+
parsed_url = urlparse(video_url)
246+
video_path = os.path.join(VIDEO_DIR, os.path.basename(parsed_url.path))
247+
try:
248+
response = requests.get(video_url, stream=True)
249+
if response.status_code == 200:
250+
with open(video_path, "wb") as file:
251+
for chunk in response.iter_content(chunk_size=1024):
252+
if chunk: # filter out keep-alive new chunks
253+
file.write(chunk)
254+
logging.info(f"File downloaded: {video_path}")
255+
else:
256+
logging.info(f"Error downloading file: {response.status_code}")
257+
return JSONResponse(status_code=500, content={"message": "Error downloading file."})
258+
except Exception as e:
202259
logging.info(f"Error downloading file: {response.status_code}")
203260
return JSONResponse(status_code=500, content={"message": "Error downloading file."})
204-
except Exception as e:
205-
logging.info(f"Error downloading file: {response.status_code}")
206-
return JSONResponse(status_code=500, content={"message": "Error downloading file."})
207-
else:
208-
# check if the video exist
209-
video_path = video_url
210-
if not os.path.exists(video_path):
211-
logging.info(f"File not found: {video_path}")
212-
return JSONResponse(status_code=404, content={"message": "File not found."})
261+
213262
video_info = videoInfo(start_time=start, duration=duration, video_path=video_path)
214263

215264
# format context and instruction
216265
instruction = f"{get_context(prompt,context_db)[0]}: {prompt}"
217-
# logging.info("instruction:",instruction)
218266

219267
return StreamingResponse(stream_res(video_info, instruction, max_new_tokens))
220268

tests/test_lvms_video-llama.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ function start_service() {
6262
}
6363

6464
function validate_microservice() {
65-
result=$(http_proxy="" curl http://localhost:5031/v1/lvm -X POST -d '{"video_url":"./data/silence_girl.mp4","chunk_start": 0,"chunk_duration": 7,"prompt":"What is the person doing?","max_new_tokens": 50}' -H 'Content-Type: application/json')
65+
result=$(http_proxy="" curl http://localhost:5031/v1/lvm -X POST -d '{"video_url":"silence_girl.mp4","chunk_start": 0,"chunk_duration": 7,"prompt":"What is the person doing?","max_new_tokens": 50}' -H 'Content-Type: application/json')
6666
if [[ $result == *"silence"* ]]; then
6767
echo "Result correct."
6868
else

0 commit comments

Comments
 (0)