Skip to content

Commit b040c45

Browse files
author
Dominik Schröder
committed
replaced beautifulsoup with typst xml parser
1 parent ba3139d commit b040c45

File tree

2 files changed

+95
-58
lines changed

2 files changed

+95
-58
lines changed

src/pyobsplot/obsplot.py

Lines changed: 17 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@
99
import warnings
1010
import tempfile
1111
import typst
12-
from bs4 import BeautifulSoup
1312
from pathlib import Path
1413
from subprocess import PIPE, Popen, SubprocessError
1514
from typing import Any, Optional, Union
@@ -23,6 +22,7 @@
2322
AVAILABLE_THEMES,
2423
DEFAULT_THEME,
2524
MIN_NPM_VERSION,
25+
bundler_output_dir
2626
)
2727
from pyobsplot.widget import ObsplotWidget
2828

@@ -400,65 +400,24 @@ def save_to_file(self, path: str, res: HTML) -> None:
400400
with open(path, "w", encoding="utf-8") as f:
401401
self.render_typst(str(res.data), f.name)
402402

403-
@staticmethod
404-
def shift_svg(svg):
405-
soup = BeautifulSoup(str(svg), "xml")
406-
svg = soup.svg
407-
if "viewBox" in svg.attrs:
408-
x, y, width, height = map(int, svg.attrs["viewBox"].split())
409-
if x != 0 or y != 0:
410-
g = soup.new_tag("g", transform=f"translate({-x}, {-y})")
411-
g.extend(svg.contents)
412-
svg.clear()
413-
svg.append(g)
414-
svg.attrs["viewBox"] = f"0 0 {width} {height}"
415-
return str(svg)
416-
417403
def render_typst(self, html: str, path: str) -> None:
418404
path_obj = Path(path)
419405
ext = "".join(path_obj.suffixes)
420-
stem = str(path_obj.name).removesuffix("".join(path_obj.suffixes))
421406

422407
with tempfile.TemporaryDirectory() as tmpdirname:
423-
soup = BeautifulSoup(html, "xml")
424-
figure = soup.find("figure", recursive=False)
425-
swatches = []
426-
plots = []
427-
for i, swatch in enumerate(figure.find_all("div", recursive=False)):
428-
new_swatch = []
429-
for j, svg in enumerate(swatch.find_all("svg", recursive=True)):
430-
with open(f"{tmpdirname}/{stem}_{i}_{j}.svg", "w") as f:
431-
f.write(ObsplotTypstCreator.shift_svg(str(svg)))
432-
new_swatch.append(
433-
{"file": f"{stem}_{i}_{j}.svg", "width": svg.attrs["width"], "height": svg.attrs["height"], "text": svg.next_sibling}
434-
)
435-
swatches.append(new_swatch)
436-
for i, svg in enumerate(figure.find_all("svg", recursive=False)):
437-
with open(f"{tmpdirname}/{stem}_{i}.svg", "w", encoding = 'utf-8') as f:
438-
f.write(ObsplotTypstCreator.shift_svg(str(svg)))
439-
plots.append({"file": f"{stem}_{i}.svg", "width": svg.attrs["width"], "height": svg.attrs["height"]})
440-
max_width = max(int(svg["width"]) for svg in plots)
441-
typeset = (
442-
f'#set text(\nfont: "{self.font}",\nsize: {self.font_size}pt,\nfallback: false)\n'
443-
+ f"#set page(\nwidth: {max_width+2*self.margin}pt,\nheight: auto,\nmargin: (x: {self.margin}pt, y: {self.margin}pt),\n)\n"
444-
)
445-
if title := figure.find("h2"):
446-
typeset += f"= {title.text}"
447-
if subtitle := figure.find("h3"):
448-
typeset += f"\n{subtitle.text}"
449-
typeset += "\n\n"
450-
for swatch in swatches:
451-
typeset += "#{\nset align(horizon)\nstack(\n dir: ltr,\n spacing: 10pt,\n"
452-
for el in swatch:
453-
typeset += f' image("{el["file"]}", width: {el["width"]}pt),\n'
454-
typeset += f' "{el["text"]}",\n'
455-
typeset += ")}\n\n"
456-
typeset += "#v(-10pt)\n".join([f'#image("{plot["file"]}", width: {plot["width"]}pt)\n' for plot in plots])
457-
458-
if caption := figure.find("figcaption"):
459-
typeset += f"\n{caption.text}"
460-
461-
with open(f"{tmpdirname}/{stem}.typ", "w") as f:
462-
f.write(typeset)
463-
464-
typst.compile(f"{tmpdirname}/{stem}.typ", output=path, ppi=self.dpi, format=ext[1:])
408+
with open(f"{tmpdirname}/jsdom.html", "w") as f:
409+
f.write(html)
410+
shutil.copy(bundler_output_dir / "template.typ", f"{tmpdirname}/template.typ")
411+
with open(f"{tmpdirname}/input.typ", "w") as f:
412+
f.write(f"""
413+
#import "template.typ": obsplot
414+
415+
#show: obsplot(
416+
"jsdom.html",
417+
margin: {self.margin}4pt,
418+
font: "{self.font}",
419+
font-size: {self.font_size}pt,
420+
)
421+
""")
422+
423+
typst.compile(f"{tmpdirname}/input.typ", output=path, ppi=self.dpi, format=ext[1:])

src/pyobsplot/static/template.typ

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
#let find-child(elem, tag) = {
2+
elem.children
3+
.find(e => "tag" in e and e.tag == tag)
4+
}
5+
6+
#let encode-xml(elem) = {
7+
if (type(elem) == "string") {
8+
elem
9+
} else if (type(elem) == "dictionary") {
10+
"<" + elem.tag + elem.attrs.pairs().map(
11+
v => " " + v.at(0) + "=\"" + v.at(1) + "\""
12+
).join("") + if (elem.tag == "svg") {" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\""} + ">" + elem.children.map(encode-xml).join("") + "</" + elem.tag + ">"
13+
}
14+
}
15+
16+
#let obsplot(
17+
file,
18+
margin: 4pt,
19+
font: "SF Pro Display",
20+
font-size: 10pt,
21+
) = {
22+
let swatch-item(elem) = {
23+
set align(horizon)
24+
stack(
25+
dir: ltr,
26+
spacing: .5em,
27+
image.decode(
28+
encode-xml(elem.children.first()),
29+
width: 1pt * int(elem.children.first().attrs.width),
30+
height: 1pt * int(elem.children.first().attrs.width)
31+
),
32+
text(elem.children.last())
33+
)
34+
}
35+
36+
let swatch(elem) = {
37+
stack(
38+
dir: ltr,
39+
spacing: 1em,
40+
..elem.children.filter(e => e.tag == "span").map(swatch-item)
41+
)
42+
}
43+
44+
let html = xml(file)
45+
let figure = html.first()
46+
let title = find-child(figure, "h2")
47+
let subtitle = find-child(figure, "h3")
48+
let caption = find-child(figure, "figcaption")
49+
let figuresvg = find-child(figure, "svg")
50+
let figurewidth = int(figuresvg.attrs.width)
51+
52+
set text(
53+
font: "SF Pro Display",
54+
size: font-size,
55+
fallback: false
56+
)
57+
58+
set page(
59+
width: 1pt*figurewidth + 2*margin,
60+
height: auto,
61+
margin: (x: margin, y: margin)
62+
)
63+
64+
stack(
65+
dir: ttb,
66+
spacing: 1em,
67+
heading(title.children.first(), level: 1),
68+
if (subtitle != none) {
69+
heading(subtitle.children.first(), level: 2)
70+
},
71+
v(2em),
72+
..figure.children.filter(e => e.tag == "div").map(swatch),
73+
image.decode(encode-xml(figuresvg)),
74+
if (caption != none) {
75+
text(caption.children.first())
76+
}
77+
)
78+
}

0 commit comments

Comments
 (0)