Skip to content

Commit 0ceeb2e

Browse files
committed
feat: Add use_wcwidth for Asian character support
1 parent 5f5e5cf commit 0ceeb2e

File tree

4 files changed

+71
-6
lines changed

4 files changed

+71
-6
lines changed

requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,2 @@
1+
wcwidth<1
12
typing-extensions>=3.7.4; python_version<'3.8'

table2ascii/options.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,3 +16,4 @@ class Options:
1616
alignments: list[Alignment] | None
1717
cell_padding: int
1818
style: TableStyle
19+
use_wcwidth: bool

table2ascii/table_to_ascii.py

Lines changed: 34 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22

33
from math import ceil, floor
44

5+
from wcwidth import wcswidth
6+
57
from .alignment import Alignment
68
from .annotations import SupportsStr
79
from .options import Options
@@ -36,6 +38,7 @@ def __init__(
3638
self.__first_col_heading = options.first_col_heading
3739
self.__last_col_heading = options.last_col_heading
3840
self.__cell_padding = options.cell_padding
41+
self.__use_wcwidth = options.use_wcwidth
3942

4043
# calculate number of columns
4144
self.__columns = self.__count_columns()
@@ -105,7 +108,7 @@ def __auto_column_widths(self) -> list[int]:
105108
def widest_line(value: SupportsStr) -> int:
106109
"""Returns the width of the longest line in a multi-line string"""
107110
text = str(value)
108-
return max(len(line) for line in text.splitlines()) if len(text) else 0
111+
return max(self.__str_width(line) for line in text.splitlines()) if len(text) else 0
109112

110113
column_widths = []
111114
# get the width necessary for each column
@@ -133,17 +136,18 @@ def __pad(self, cell_value: SupportsStr, width: int, alignment: Alignment) -> st
133136
text = str(cell_value)
134137
padding = " " * self.__cell_padding
135138
padded_text = f"{padding}{text}{padding}"
139+
text_width = self.__str_width(padded_text)
136140
if alignment == Alignment.LEFT:
137141
# pad with spaces on the end
138-
return padded_text + (" " * (width - len(padded_text)))
142+
return padded_text + (" " * (width - text_width))
139143
if alignment == Alignment.CENTER:
140144
# pad with spaces, half on each side
141-
before = " " * floor((width - len(padded_text)) / 2)
142-
after = " " * ceil((width - len(padded_text)) / 2)
145+
before = " " * floor((width - text_width) / 2)
146+
after = " " * ceil((width - text_width) / 2)
143147
return before + padded_text + after
144148
if alignment == Alignment.RIGHT:
145149
# pad with spaces at the beginning
146-
return (" " * (width - len(padded_text))) + padded_text
150+
return (" " * (width - text_width)) + padded_text
147151
raise ValueError(f"The value '{alignment}' is not valid for alignment.")
148152

149153
def __row_to_ascii(
@@ -291,6 +295,23 @@ def __body_to_ascii(self, body: list[list[SupportsStr]]) -> str:
291295
for row in body
292296
)
293297

298+
def __str_width(self, text: str) -> int:
299+
"""
300+
Returns the width of the string in characters for the purposes of monospace
301+
formatting. This is usually the same as the length of the string, but can be
302+
different for double-width characters (East Asian Wide and East Asian Fullwidth)
303+
or zero-width characters (combining characters, zero-width space, etc.)
304+
305+
Args:
306+
text: The text to measure
307+
308+
Returns:
309+
The width of the string in characters
310+
"""
311+
width = wcswidth(text) if self.__use_wcwidth else -1
312+
# if use_wcwidth is False or wcswidth fails, fall back to len
313+
return width if width >= 0 else len(text)
314+
294315
def to_ascii(self) -> str:
295316
"""
296317
Generates a formatted ASCII table
@@ -328,6 +349,7 @@ def table2ascii(
328349
alignments: list[Alignment] | None = None,
329350
cell_padding: int = 1,
330351
style: TableStyle = PresetStyle.double_thin_compact,
352+
use_wcwidth: bool = False,
331353
) -> str:
332354
"""
333355
Convert a 2D Python table to ASCII text
@@ -345,7 +367,7 @@ def table2ascii(
345367
Defaults to :py:obj:`False`.
346368
column_widths: List of widths in characters for each column. Any value of :py:obj:`None`
347369
indicates that the column width should be determined automatically. If :py:obj:`None`
348-
is passed instead of a :py:obj:`~typing.List`, all columns will be automatically sized.
370+
is passed instead of a :class:`list`, all columns will be automatically sized.
349371
Defaults to :py:obj:`None`.
350372
alignments: List of alignments for each column
351373
(ex. ``[Alignment.LEFT, Alignment.CENTER, Alignment.RIGHT]``). If not specified or set to
@@ -355,6 +377,11 @@ def table2ascii(
355377
Defaults to ``1``.
356378
style: Table style to use for styling (preset styles can be imported).
357379
Defaults to :ref:`PresetStyle.double_thin_compact <PresetStyle.double_thin_compact>`.
380+
use_wcwidth: Whether to use :func:`wcwidth.wcswidth` to determine the width of each cell instead of
381+
:func:`len`. This is useful when dealing with double-width characters
382+
(East Asian Wide and East Asian Fullwidth) or zero-width characters
383+
(combining characters, zero-width space, etc.) which are not properly handled by :func:`len`.
384+
Defaults to :py:obj:`False`.
358385
359386
Returns:
360387
The generated ASCII table
@@ -370,5 +397,6 @@ def table2ascii(
370397
alignments=alignments,
371398
cell_padding=cell_padding,
372399
style=style,
400+
use_wcwidth=use_wcwidth,
373401
),
374402
).to_ascii()

tests/test_convert.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -246,3 +246,38 @@ def test_multiline_cells():
246246
"╚═══════════════════════════════════════════╝"
247247
)
248248
assert text == expected
249+
250+
251+
def test_east_asian_wide_characters_and_zero_width():
252+
# using len() to count the number of characters
253+
text = t2a(
254+
header=["日期", "test"],
255+
body=[["2022/12/11", "test"], ["2022/1/1", "測試"]],
256+
cell_padding=5,
257+
)
258+
expected = (
259+
"╔═══════════════════════════════════╗\n"
260+
"║ 日期 test ║\n"
261+
"╟───────────────────────────────────╢\n"
262+
"║ 2022/12/11 test ║\n"
263+
"║ 2022/1/1 測試 ║\n"
264+
"╚═══════════════════════════════════╝"
265+
)
266+
assert text == expected
267+
268+
# using wcwidth.wcswidth() to count the number of characters
269+
text = t2a(
270+
header=["日期", "test"],
271+
body=[["2022/12/11", "test"], ["2022/1/1", "測試"]],
272+
cell_padding=5,
273+
use_wcwidth=True,
274+
)
275+
expected = (
276+
"╔═══════════════════════════════════╗\n"
277+
"║ 日期 test ║\n"
278+
"╟───────────────────────────────────╢\n"
279+
"║ 2022/12/11 test ║\n"
280+
"║ 2022/1/1 測試 ║\n"
281+
"╚═══════════════════════════════════╝"
282+
)
283+
assert text == expected

0 commit comments

Comments
 (0)