13
13
import json
14
14
import os
15
15
import time
16
- from typing import Dict , Any
16
+ from typing import Dict , Any , List , Optional
17
17
18
18
from dotenv import load_dotenv
19
19
20
+ from pydantic import BaseModel , EmailStr , HttpUrl
20
21
from scrapegraph_py import Client
21
22
22
23
# Load environment variables from .env file
23
24
load_dotenv ()
24
25
26
+ # Pydantic models for schema
27
+ class SocialLinks (BaseModel ):
28
+ github : Optional [HttpUrl ]
29
+ linkedin : Optional [HttpUrl ]
30
+ twitter : Optional [HttpUrl ]
31
+
32
+ class Company (BaseModel ):
33
+ name : str
34
+ description : str
35
+ features : Optional [List [str ]] = None
36
+ contact_email : Optional [EmailStr ] = None
37
+ social_links : Optional [SocialLinks ] = None
38
+
39
+ class Service (BaseModel ):
40
+ service_name : str
41
+ description : str
42
+ features : Optional [List [str ]] = None
43
+
44
+ class Legal (BaseModel ):
45
+ privacy_policy : str
46
+ terms_of_service : str
47
+
48
+ class WebsiteContent (BaseModel ):
49
+ company : Company
50
+ services : List [Service ]
51
+ legal : Legal
25
52
26
53
def main ():
27
54
if not os .getenv ("SGAI_API_KEY" ):
@@ -31,53 +58,7 @@ def main():
31
58
return
32
59
33
60
# Example schema (from your curl command)
34
- schema : Dict [str , Any ] = {
35
- "$schema" : "http://json-schema.org/draft-07/schema#" ,
36
- "title" : "ScrapeGraphAI Website Content" ,
37
- "type" : "object" ,
38
- "properties" : {
39
- "company" : {
40
- "type" : "object" ,
41
- "properties" : {
42
- "name" : {"type" : "string" },
43
- "description" : {"type" : "string" },
44
- "features" : {"type" : "array" , "items" : {"type" : "string" }},
45
- "contact_email" : {"type" : "string" , "format" : "email" },
46
- "social_links" : {
47
- "type" : "object" ,
48
- "properties" : {
49
- "github" : {"type" : "string" , "format" : "uri" },
50
- "linkedin" : {"type" : "string" , "format" : "uri" },
51
- "twitter" : {"type" : "string" , "format" : "uri" },
52
- },
53
- "additionalProperties" : False ,
54
- },
55
- },
56
- "required" : ["name" , "description" ],
57
- },
58
- "services" : {
59
- "type" : "array" ,
60
- "items" : {
61
- "type" : "object" ,
62
- "properties" : {
63
- "service_name" : {"type" : "string" },
64
- "description" : {"type" : "string" },
65
- "features" : {"type" : "array" , "items" : {"type" : "string" }},
66
- },
67
- "required" : ["service_name" , "description" ],
68
- },
69
- },
70
- "legal" : {
71
- "type" : "object" ,
72
- "properties" : {
73
- "privacy_policy" : {"type" : "string" },
74
- "terms_of_service" : {"type" : "string" },
75
- },
76
- "required" : ["privacy_policy" , "terms_of_service" ],
77
- },
78
- },
79
- "required" : ["company" , "services" , "legal" ],
80
- }
61
+ schema = WebsiteContent .schema ()
81
62
82
63
url = "https://scrapegraphai.com/"
83
64
prompt = (
0 commit comments