-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathgenerate_market_data.py
More file actions
50 lines (37 loc) · 1.36 KB
/
generate_market_data.py
File metadata and controls
50 lines (37 loc) · 1.36 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
#!/usr/bin/env python3
"""Generate a synthetic HFT CSV dataset quickly."""
from __future__ import annotations
import random
from pathlib import Path
OUTPUT_PATH = Path("market_data.csv")
ROW_COUNT = 1_000_000
START_MID_PRICE = 10_000
MIN_MID_PRICE = 100
MIN_OFFSET = 1
MAX_OFFSET = 5
CHUNK_SIZE = 100_000
def generate_market_data() -> None:
rng = random.Random()
mid_price = START_MID_PRICE
randrange = rng.randrange
choices = ("B", "S")
with OUTPUT_PATH.open("w", buffering=1 << 20, newline="") as output:
for chunk_start in range(1, ROW_COUNT + 1, CHUNK_SIZE):
chunk_end = min(chunk_start + CHUNK_SIZE, ROW_COUNT + 1)
rows: list[str] = []
append = rows.append
for order_id in range(chunk_start, chunk_end):
mid_price = max(
MIN_MID_PRICE,
mid_price
+ (randrange(1, 11) if randrange(2) else -randrange(1, 11)),
)
side = choices[randrange(2)]
offset = randrange(MIN_OFFSET, MAX_OFFSET + 1)
price = mid_price - offset if side == "B" else mid_price + offset
append(
f"{order_id},{side},{price},{randrange(1, 101)},L\n"
)
output.writelines(rows)
if __name__ == "__main__":
generate_market_data()