Skip to content

Commit 7de4a81

Browse files
committed
centralize the determination of the string encoding
this is surprisingly complicated. Centralizing this code into a `get_string_encoding()` function allows to reduce the amount of copy-and-pasted code and implement the `MinMaxLengthType` diag coded type properly. thanks to [at]kayoub5 for insisting on this. Signed-off-by: Andreas Lauser <andreas.lauser@mercedes-benz.com>
1 parent ae17e8a commit 7de4a81

File tree

4 files changed

+69
-54
lines changed

4 files changed

+69
-54
lines changed

odxtools/decodestate.py

Lines changed: 6 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
from dataclasses import dataclass, field
33
from typing import TYPE_CHECKING, Dict, List, Optional, Tuple
44

5-
from .encoding import Encoding
5+
from .encoding import Encoding, get_string_encoding
66
from .exceptions import DecodeError, odxassert, odxraise, strict_mode
77
from .odxtypes import AtomicOdxType, DataType, ParameterValue
88

@@ -114,29 +114,13 @@ def extract_atomic_value(
114114
# ... string types, ...
115115
elif base_data_type in (DataType.A_UTF8STRING, DataType.A_ASCIISTRING,
116116
DataType.A_UNICODE2STRING):
117-
# note that the spec disallows certain combinations of
118-
# base_data_type and encoding (e.g., A_ASCIISTRING encoded
119-
# using UTF-8). Since in python3 strings are always
120-
# capable of the full unicode character set, odxtools
121-
# ignores these restrictions...
122117
text_errors = 'strict' if strict_mode else 'replace'
123-
if base_type_encoding == Encoding.UTF8 or (base_data_type == DataType.A_UTF8STRING and
124-
base_type_encoding is None):
125-
internal_value = raw_value.decode("utf-8", errors=text_errors)
126-
elif base_type_encoding == Encoding.UCS2 or (base_data_type == DataType.A_UNICODE2STRING
127-
and base_type_encoding is None):
128-
text_encoding = "utf-16-be" if is_highlow_byte_order else "utf-16-le"
129-
internal_value = raw_value.decode(text_encoding, errors=text_errors)
130-
elif base_type_encoding == Encoding.ISO_8859_1 or (
131-
base_data_type == DataType.A_ASCIISTRING and base_type_encoding is None):
132-
internal_value = raw_value.decode("iso-8859-1", errors=text_errors)
133-
elif base_type_encoding == Encoding.ISO_8859_2:
134-
internal_value = raw_value.decode("iso-8859-2", errors=text_errors)
135-
elif base_type_encoding == Encoding.WINDOWS_1252:
136-
internal_value = raw_value.decode("cp1252", errors=text_errors)
118+
str_encoding = get_string_encoding(base_data_type, base_type_encoding,
119+
is_highlow_byte_order)
120+
if str_encoding is not None:
121+
internal_value = raw_value.decode(str_encoding, errors=text_errors)
137122
else:
138-
odxraise(f"Specified illegal encoding {base_type_encoding} for string object")
139-
internal_value = raw_value.decode("iso-8859-1", errors=text_errors)
123+
internal_value = "ERROR"
140124

141125
# ... signed integers, ...
142126
elif base_data_type == DataType.A_INT32:

odxtools/encodestate.py

Lines changed: 6 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
from dataclasses import dataclass, field
44
from typing import TYPE_CHECKING, Dict, List, Optional, SupportsBytes, Tuple
55

6-
from .encoding import Encoding
6+
from .encoding import Encoding, get_string_encoding
77
from .exceptions import EncodeError, OdxWarning, odxassert, odxraise
88
from .odxtypes import AtomicOdxType, DataType, ParameterValue
99

@@ -115,29 +115,12 @@ def emplace_atomic_value(
115115
if not isinstance(internal_value, str):
116116
odxraise(f"The internal value {internal_value!r} is not a string", EncodeError)
117117

118-
# note that the spec disallows certain combinations of
119-
# base_data_type and encoding (e.g., A_ASCIISTRING encoded
120-
# using UTF-8). Since in python3 strings are always
121-
# capable of the full unicode character set, odxtools
122-
# ignores these restrictions...
123-
124-
if base_type_encoding == Encoding.UTF8 or (base_data_type == DataType.A_UTF8STRING and
125-
base_type_encoding is None):
126-
raw_value = internal_value.encode("utf-8")
127-
elif base_type_encoding == Encoding.UCS2 or (base_data_type == DataType.A_UNICODE2STRING
128-
and base_type_encoding is None):
129-
text_encoding = "utf-16-be" if is_highlow_byte_order else "utf-16-le"
130-
raw_value = internal_value.encode(text_encoding)
131-
elif base_type_encoding == Encoding.ISO_8859_1 or (
132-
base_data_type == DataType.A_ASCIISTRING and base_type_encoding is None):
133-
raw_value = internal_value.encode("iso-8859-1")
134-
elif base_type_encoding == Encoding.ISO_8859_2:
135-
raw_value = internal_value.encode("iso-8859-2")
136-
elif base_type_encoding == Encoding.WINDOWS_1252:
137-
raw_value = internal_value.encode("cp1252")
118+
str_encoding = get_string_encoding(base_data_type, base_type_encoding,
119+
is_highlow_byte_order)
120+
if str_encoding is not None:
121+
raw_value = internal_value.encode(str_encoding)
138122
else:
139-
odxraise(f"Specified illegal encoding {base_type_encoding} for string object")
140-
raw_value = internal_value.encode("iso-8859-1")
123+
raw_value = b""
141124

142125
if 8 * len(raw_value) > bit_length:
143126
odxraise(

odxtools/encoding.py

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
# SPDX-License-Identifier: MIT
22
from enum import Enum
3+
from typing import Optional
4+
5+
from .exceptions import odxraise
6+
from .odxtypes import DataType
37

48

59
class Encoding(Enum):
@@ -17,3 +21,36 @@ class Encoding(Enum):
1721
WINDOWS_1252 = "WINDOWS-1252"
1822

1923
NONE = "NONE"
24+
25+
26+
def get_string_encoding(base_data_type: DataType, base_type_encoding: Optional[Encoding],
27+
is_highlow_byte_order: bool) -> Optional[str]:
28+
"""If the encoding is for a string, return the value for
29+
`str.encode()`/`str.decode()` to convert the string object
30+
to/from a byte array
31+
"""
32+
33+
# note that the spec disallows certain combinations of
34+
# base_data_type and encoding (e.g., A_ASCIISTRING encoded
35+
# using UTF-8). Since in python3 strings are always
36+
# capable of the full unicode character set, odxtools
37+
# ignores these restrictions...
38+
if base_type_encoding == Encoding.UTF8 or (base_data_type == DataType.A_UTF8STRING and
39+
base_type_encoding is None):
40+
return "utf-8"
41+
elif base_type_encoding == Encoding.UCS2 or (base_data_type == DataType.A_UNICODE2STRING and
42+
base_type_encoding is None):
43+
return "utf-16-be" if is_highlow_byte_order else "utf-16-le"
44+
elif base_type_encoding == Encoding.ISO_8859_1 or (base_data_type == DataType.A_ASCIISTRING and
45+
base_type_encoding is None):
46+
return "iso-8859-1"
47+
elif base_type_encoding == Encoding.ISO_8859_2:
48+
return "iso-8859-2"
49+
elif base_type_encoding == Encoding.WINDOWS_1252:
50+
return "cp1252"
51+
else:
52+
odxraise(f"Specified illegal encoding {base_type_encoding} for {base_data_type.value} "
53+
f"string object")
54+
return "iso-8859-1"
55+
56+
return None

odxtools/minmaxlengthtype.py

Lines changed: 20 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from .decodestate import DecodeState
99
from .diagcodedtype import DctType, DiagCodedType
1010
from .encodestate import EncodeState
11+
from .encoding import get_string_encoding
1112
from .exceptions import DecodeError, EncodeError, odxassert, odxraise, odxrequire
1213
from .odxlink import OdxDocFragment
1314
from .odxtypes import AtomicOdxType, DataType
@@ -78,11 +79,21 @@ def encode_into_pdu(self, internal_value: AtomicOdxType, encode_state: EncodeSta
7879
odxraise("MinMaxLengthType is currently only implemented for strings and byte arrays",
7980
EncodeError)
8081

81-
# TODO: This assumes that each character of a string is
82-
# encoded into a single byte. This is never the case for
83-
# UTF-16 encoded strings and does not always hold for
84-
# UTF-8. We ignore this issue for now...
85-
data_length = len(internal_value)
82+
raw_value = b''
83+
if isinstance(internal_value, str):
84+
str_encoding = get_string_encoding(self.base_data_type, self.base_type_encoding,
85+
self.is_highlow_byte_order)
86+
87+
if str_encoding is None:
88+
odxraise(f"Internal string value specified for object which is "
89+
f"'{self.base_data_type.value}' not a string")
90+
raw_value = b''
91+
else:
92+
raw_value = internal_value.encode(str_encoding)
93+
else:
94+
raw_value = bytes(internal_value)
95+
96+
data_length = len(raw_value)
8697

8798
if data_length < self.min_length:
8899
odxraise(
@@ -98,12 +109,12 @@ def encode_into_pdu(self, internal_value: AtomicOdxType, encode_state: EncodeSta
98109
data_length = self.max_length
99110

100111
encode_state.emplace_atomic_value(
101-
internal_value=internal_value,
112+
internal_value=raw_value,
102113
used_mask=None,
103114
bit_length=8 * data_length,
104-
base_data_type=self.base_data_type,
105-
base_type_encoding=self.base_type_encoding,
106-
is_highlow_byte_order=self.is_highlow_byte_order,
115+
base_data_type=DataType.A_BYTEFIELD,
116+
base_type_encoding=None,
117+
is_highlow_byte_order=True,
107118
)
108119

109120
# TODO: ensure that the termination delimiter is not

0 commit comments

Comments
 (0)