Skip to content

Commit fef5ccc

Browse files
docs: speakeasy local development - added overlays for having client-side only parameter
1 parent 3c0a749 commit fef5ccc

File tree

8 files changed

+602
-2
lines changed

8 files changed

+602
-2
lines changed

.gitignore

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,4 +8,4 @@ __pycache__/
88
# human-added igore files
99
.ipynb_checkpoints/
1010
.idea/
11-
openapi.json
11+
openapi_client_preview.json

Makefile

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ install-test:
1515
install-dev:
1616
pip install jupyter
1717
pip install pylint
18+
sudo apt-get install jq
1819

1920
## install: installs all test, dev, and experimental requirements
2021
.PHONY: install
@@ -39,7 +40,18 @@ lint:
3940

4041
.PHONY: sdk-generate
4142
sdk-generate:
42-
speakeasy generate sdk -s openapi.json -o ./ -l python
43+
speakeasy generate sdk -s ./_dev/openapi_client.json -o ./ -l python
44+
45+
sdk-overlay-create:
46+
speakeasy overlay compare --schemas=./_dev/openapi.json --schemas=./_dev/openapi_client.json > ./_dev/overlay.yaml
47+
sed -i '/^extends:/d' ./_dev/overlay.yaml
48+
49+
sdk-overlay-apply:
50+
speakeasy overlay validate -o ./_dev/overlay.yaml
51+
speakeasy overlay apply -s=./_dev/openapi.json -o=./_dev/overlay.yaml > ./_dev/openapi_client_preview.json
52+
@cd _dev && jq . ./openapi_client_preview.json > ./openapi_client_preview.json.tmp \
53+
&& cp ./openapi_client_preview.json.tmp ./openapi_client_preview.json && rm ./openapi_client_preview.json.tmp
54+
4355

4456
###########
4557
# Jupyter #

_dev/README.md

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
# Localhost development
2+
3+
The file `openapi.json` is copied here from https://raw.githubusercontent.com/Unstructured-IO/unstructured-api/main/openapi.json
4+
and represents the API that is supported on backend.
5+
6+
The `openapi_client.json` is stored here, and treated as a source of truth for what should be accepted in python client.
7+
The idea is, that it is easier to maintain this file showing exactly what we support, instead of handcrafting overlays.
8+
9+
When `openapi.json` and `openapi_client.json` are compared using `make sdk-overlay-create`, the diff is created
10+
which forms Speakeasy overlay. This overlay is saved in repo for CI, so that it can be applied on top of backend
11+
`openapi.json` from the unstructured-api repo, when generating python client in Github Actions.
12+
It can be also simulated locally using `make sdk-overlay-apply`

_dev/openapi.json

Lines changed: 280 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,280 @@
1+
{
2+
"openapi": "3.1.0",
3+
"info": {
4+
"title": "Unstructured Pipeline API",
5+
"version": "0.0.1",
6+
"summary": "Partition documents with the Unstructured library"
7+
},
8+
"servers": [
9+
{
10+
"url": "https://api.unstructured.io",
11+
"description": "Hosted API",
12+
"x-speakeasy-server-id": "prod"
13+
},
14+
{
15+
"url": "http://localhost:8000",
16+
"description": "Development server",
17+
"x-speakeasy-server-id": "local"
18+
}
19+
],
20+
"x-speakeasy-retries": {
21+
"strategy": "backoff",
22+
"backoff": {
23+
"initialInterval": 500,
24+
"maxInterval": 60000,
25+
"maxElapsedTime": 900000,
26+
"exponent": 1.5
27+
},
28+
"statusCodes": ["5xx"],
29+
"retryConnectionErrors": true
30+
},
31+
"security":[
32+
{
33+
"ApiKeyAuth":[]
34+
}
35+
],
36+
"tags": [
37+
{
38+
"name": "general"
39+
}
40+
],
41+
"paths": {
42+
"/general/v0/general": {
43+
"post": {
44+
"tags": ["general"],
45+
"summary": "Pipeline 1",
46+
"operationId": "partition",
47+
"x-speakeasy-name-override": "partition",
48+
"requestBody": {
49+
"content": {
50+
"multipart/form-data": {
51+
"schema": {
52+
"$ref": "#/components/schemas/partition_parameters"
53+
}
54+
}
55+
}
56+
},
57+
"responses": {
58+
"200": {
59+
"description": "Successful Response",
60+
"content": {
61+
"application/json": {
62+
"schema": {
63+
"$ref": "#/components/schemas/Elements"
64+
}
65+
}
66+
}
67+
},
68+
"422": {
69+
"description": "Validation Error",
70+
"content": {
71+
"application/json": {
72+
"schema": {
73+
"$ref": "#/components/schemas/HTTPValidationError"
74+
}
75+
}
76+
}
77+
}
78+
}
79+
}
80+
}
81+
},
82+
"components": {
83+
"securitySchemes":{
84+
"ApiKeyAuth":{
85+
"type":"apiKey",
86+
"name":"unstructured-api-key",
87+
"in":"header",
88+
"x-speakeasy-example": "YOUR_API_KEY"
89+
}
90+
},
91+
"schemas": {
92+
"Elements":{
93+
"type": "array",
94+
"items":{
95+
"Element":{
96+
"type":"object",
97+
"properties": {
98+
"type": {},
99+
"element_id": {},
100+
"metadata": {},
101+
"text": {}
102+
}
103+
}
104+
}
105+
},
106+
"partition_parameters": {
107+
"properties": {
108+
"files": {
109+
"type": "string",
110+
"format": "binary",
111+
"description": "The file to extract",
112+
"required": "true",
113+
"example": {
114+
"summary": "File to be partitioned",
115+
"externalValue": "https://github.com/Unstructured-IO/unstructured/blob/98d3541909f64290b5efb65a226fc3ee8a7cc5ee/example-docs/layout-parser-paper.pdf"
116+
}
117+
},
118+
"strategy": {
119+
"type": "string",
120+
"title": "Strategy",
121+
"description": "The strategy to use for partitioning PDF/image. Options are fast, hi_res, auto. Default: auto",
122+
"example": "hi_res"
123+
},
124+
"gz_uncompressed_content_type": {
125+
"type": "string",
126+
"title": "Uncompressed Content Type",
127+
"description": "If file is gzipped, use this content type after unzipping",
128+
"example": "application/pdf"
129+
},
130+
"output_format": {
131+
"type": "string",
132+
"title": "Output Format",
133+
"description": "The format of the response. Supported formats are application/json and text/csv. Default: application/json.",
134+
"example": "application/json"
135+
},
136+
"coordinates": {
137+
"type": "boolean",
138+
"title": "Coordinates",
139+
"description": "If true, return coordinates for each element. Default: false"
140+
},
141+
"encoding": {
142+
"type": "string",
143+
"title": "Encoding",
144+
"description": "The encoding method used to decode the text input. Default: utf-8",
145+
"example": "utf-8"
146+
},
147+
"hi_res_model_name": {
148+
"type": "string",
149+
"title": "Hi Res Model Name",
150+
"description": "The name of the inference model used when strategy is hi_res",
151+
"example": "yolox"
152+
},
153+
"include_page_breaks": {
154+
"type": "boolean",
155+
"title": "Include Page Breaks",
156+
"description": "If True, the output will include page breaks if the filetype supports it. Default: false"
157+
},
158+
"languages": {
159+
"items": {
160+
"type": "string",
161+
"example": "eng"
162+
},
163+
"type": "array",
164+
"title": "OCR Languages",
165+
"default": [],
166+
"description": "The languages present in the document, for use in partitioning and/or OCR",
167+
"example": "[eng]"
168+
},
169+
"pdf_infer_table_structure": {
170+
"type": "boolean",
171+
"title": "Pdf Infer Table Structure",
172+
"description": "If True and strategy=hi_res, any Table Elements extracted from a PDF will include an additional metadata field, 'text_as_html', where the value (string) is a just a transformation of the data into an HTML <table>."
173+
},
174+
"skip_infer_table_types": {
175+
"items": {
176+
"type": "string",
177+
"example": "pdf"
178+
},
179+
"type": "array",
180+
"title": "Skip Infer Table Types",
181+
"description": "The document types that you want to skip table extraction with. Default: ['pdf', 'jpg', 'png']"
182+
},
183+
"xml_keep_tags": {
184+
"type": "boolean",
185+
"title": "Xml Keep Tags",
186+
"description": "If True, will retain the XML tags in the output. Otherwise it will simply extract the text from within the tags. Only applies to partition_xml."
187+
},
188+
"chunking_strategy": {
189+
"type": "string",
190+
"title": "Chunking Strategy",
191+
"description": "Use one of the supported strategies to chunk the returned elements. Currently supports: by_title",
192+
"example": "by_title"
193+
},
194+
"multipage_sections": {
195+
"type": "boolean",
196+
"title": "Multipage Sections",
197+
"description": "If chunking strategy is set, determines if sections can span multiple sections. Default: true"
198+
},
199+
"combine_under_n_chars": {
200+
"type": "integer",
201+
"title": "Combine Under N Chars",
202+
"description": "If chunking strategy is set, combine elements until a section reaches a length of n chars. Default: 500",
203+
"example": 500
204+
},
205+
"new_after_n_chars": {
206+
"type": "integer",
207+
"title": "New after n chars",
208+
"description": "If chunking strategy is set, cut off new sections after reaching a length of n chars (soft max). Default: 1500",
209+
"example": 1500
210+
},
211+
"max_characters": {
212+
"type": "integer",
213+
"title": "Max Characters",
214+
"description": "If chunking strategy is set, cut off new sections after reaching a length of n chars (hard max). Default: 1500",
215+
"example": 1500
216+
},
217+
"extract_image_block_types": {
218+
"items": {
219+
"type": "string",
220+
"example": "image"
221+
},
222+
"type": "array",
223+
"title": "Image block types to extract",
224+
"default": [],
225+
"description": "The types of elements to extract, for use in extracting image blocks as base64 encoded data stored in metadata fields",
226+
"example": ["image", "table"]
227+
}
228+
},
229+
"type": "object",
230+
"title": "Partition Parameters"
231+
},
232+
"HTTPValidationError": {
233+
"properties": {
234+
"detail": {
235+
"items": {
236+
"$ref": "#/components/schemas/ValidationError"
237+
},
238+
"type": "array",
239+
"title": "Detail"
240+
}
241+
},
242+
"type": "object",
243+
"title": "HTTPValidationError"
244+
},
245+
"ValidationError": {
246+
"properties": {
247+
"loc": {
248+
"items": {
249+
"oneOf": [
250+
{
251+
"type": "string"
252+
},
253+
{
254+
"type": "integer"
255+
}
256+
]
257+
},
258+
"type": "array",
259+
"title": "Location"
260+
},
261+
"msg": {
262+
"type": "string",
263+
"title": "Message"
264+
},
265+
"type": {
266+
"type": "string",
267+
"title": "Error Type"
268+
}
269+
},
270+
"type": "object",
271+
"required": [
272+
"loc",
273+
"msg",
274+
"type"
275+
],
276+
"title": "ValidationError"
277+
}
278+
}
279+
}
280+
}

0 commit comments

Comments
 (0)