5
5
import argparse
6
6
import logging
7
7
import os
8
+ import re
8
9
from threading import Thread
9
10
from urllib .parse import urlparse
10
11
11
12
import decord
12
13
import requests
13
14
import uvicorn
15
+ import validators
14
16
from extract_vl_embedding import VLEmbeddingExtractor as VL
15
17
from fastapi import FastAPI , Query
16
18
from fastapi .middleware .cors import CORSMiddleware
21
23
from transformers import TextIteratorStreamer , set_seed
22
24
from video_llama .common .registry import registry
23
25
from video_llama .conversation .conversation_video import Chat
26
+ from werkzeug .utils import secure_filename
24
27
25
28
# Initialize decord bridge and seed
26
29
decord .bridge .set_bridge ("torch" )
33
36
context_db = None
34
37
streamer = None
35
38
chat = None
36
- VIDEO_DIR = "/home/user/videos "
39
+ VIDEO_DIR = "/home/user/comps/lvms/video-llama/server/data "
37
40
CFG_PATH = "video_llama_config/video_llama_eval_only_vl.yaml"
38
41
MODEL_TYPE = "llama_v2"
39
42
@@ -161,6 +164,43 @@ def is_local_file(url):
161
164
return not url .startswith ("http://" ) and not url .startswith ("https://" )
162
165
163
166
167
+ def is_valid_url (url ):
168
+ # Validate the URL's structure
169
+ validation = validators .url (url )
170
+ if not validation :
171
+ logging .error ("URL is invalid" )
172
+ return False
173
+
174
+ # Parse the URL to components
175
+ parsed_url = urlparse (url )
176
+
177
+ # Check the scheme
178
+ if parsed_url .scheme not in ["http" , "https" ]:
179
+ logging .error ("URL scheme is invalid" )
180
+ return False
181
+
182
+ # Check for "../" in the path
183
+ if "../" in parsed_url .path :
184
+ logging .error ("URL contains '../', which is not allowed" )
185
+ return False
186
+
187
+ # Check that the path only contains one "." for the file extension
188
+ if parsed_url .path .count ("." ) != 1 :
189
+ logging .error ("URL path does not meet the requirement of having only one '.'" )
190
+ return False
191
+
192
+ # If all checks pass, the URL is valid
193
+ logging .info ("URL is valid" )
194
+ return True
195
+
196
+
197
+ def is_valid_video (filename ):
198
+ if re .match (r"^[a-zA-Z0-9-_]+\.(mp4)$" , filename , re .IGNORECASE ):
199
+ return secure_filename (filename )
200
+ else :
201
+ return False
202
+
203
+
164
204
@app .get ("/health" )
165
205
async def health () -> Response :
166
206
"""Health check."""
@@ -175,46 +215,54 @@ async def generate(
175
215
prompt : str = Query (..., description = "Query for Video-LLama" , examples = "What is the man doing?" ),
176
216
max_new_tokens : int = Query (150 , description = "Maximum number of tokens to generate" , examples = 150 ),
177
217
) -> StreamingResponse :
178
- if not is_local_file (video_url ):
179
- parsed_url = urlparse (video_url )
180
- video_name = os .path .basename (parsed_url .path )
181
- else :
182
- video_name = os .path .basename (video_url )
183
218
184
- if video_name .lower ().endswith (".mp4" ):
185
- logging .info (f"Format check passed, the file '{ video_name } ' is an MP4 file." )
219
+ if video_url .lower ().endswith (".mp4" ):
220
+ logging .info (f"Format check passed, the file '{ video_url } ' is an MP4 file." )
186
221
else :
187
- logging .info (f"Format check failed, the file '{ video_name } ' is not an MP4 file." )
188
- return JSONResponse (status_code = 400 , content = {"message" : "Invalid file type. Only mp4 videos are allowed." })
189
-
190
- if not is_local_file (video_url ):
191
- try :
192
- video_path = os .path .join (VIDEO_DIR , video_name )
193
- response = requests .get (video_url , stream = True )
194
-
195
- if response .status_code == 200 :
196
- with open (video_path , "wb" ) as file :
197
- for chunk in response .iter_content (chunk_size = 1024 ):
198
- if chunk : # filter out keep-alive new chunks
199
- file .write (chunk )
200
- logging .info (f"File downloaded: { video_path } " )
201
- else :
222
+ logging .info (f"Format check failed, the file '{ video_url } ' is not an MP4 file." )
223
+ return JSONResponse (status_code = 500 , content = {"message" : "Invalid file type. Only mp4 videos are allowed." })
224
+
225
+ if is_local_file (video_url ):
226
+ # validate the video name
227
+ if is_valid_video (video_url ):
228
+ secure_video_name = is_valid_video (video_url ) # only support video name without path
229
+ else :
230
+ return JSONResponse (status_code = 500 , content = {"message" : "Invalid file name." })
231
+
232
+ video_path = os .path .join (VIDEO_DIR , secure_video_name )
233
+ if os .path .exists (video_path ):
234
+ logging .info (f"File found: { video_path } " )
235
+ else :
236
+ logging .error (f"File not found: { video_path } " )
237
+ return JSONResponse (
238
+ status_code = 404 , content = {"message" : "File not found. Only local files under data folder are allowed." }
239
+ )
240
+ else :
241
+ # validate the remote URL
242
+ if not is_valid_url (video_url ):
243
+ return JSONResponse (status_code = 500 , content = {"message" : "Invalid URL." })
244
+ else :
245
+ parsed_url = urlparse (video_url )
246
+ video_path = os .path .join (VIDEO_DIR , os .path .basename (parsed_url .path ))
247
+ try :
248
+ response = requests .get (video_url , stream = True )
249
+ if response .status_code == 200 :
250
+ with open (video_path , "wb" ) as file :
251
+ for chunk in response .iter_content (chunk_size = 1024 ):
252
+ if chunk : # filter out keep-alive new chunks
253
+ file .write (chunk )
254
+ logging .info (f"File downloaded: { video_path } " )
255
+ else :
256
+ logging .info (f"Error downloading file: { response .status_code } " )
257
+ return JSONResponse (status_code = 500 , content = {"message" : "Error downloading file." })
258
+ except Exception as e :
202
259
logging .info (f"Error downloading file: { response .status_code } " )
203
260
return JSONResponse (status_code = 500 , content = {"message" : "Error downloading file." })
204
- except Exception as e :
205
- logging .info (f"Error downloading file: { response .status_code } " )
206
- return JSONResponse (status_code = 500 , content = {"message" : "Error downloading file." })
207
- else :
208
- # check if the video exist
209
- video_path = video_url
210
- if not os .path .exists (video_path ):
211
- logging .info (f"File not found: { video_path } " )
212
- return JSONResponse (status_code = 404 , content = {"message" : "File not found." })
261
+
213
262
video_info = videoInfo (start_time = start , duration = duration , video_path = video_path )
214
263
215
264
# format context and instruction
216
265
instruction = f"{ get_context (prompt ,context_db )[0 ]} : { prompt } "
217
- # logging.info("instruction:",instruction)
218
266
219
267
return StreamingResponse (stream_res (video_info , instruction , max_new_tokens ))
220
268
0 commit comments