@@ -123,10 +123,25 @@ std::optional<int> getHexDigit(char c) {
123
123
return {};
124
124
}
125
125
126
+ enum Sign { NoSign, Pos, Neg };
127
+
126
128
// The result of lexing an integer token fragment.
127
129
struct LexIntResult : LexResult {
128
130
uint64_t n;
129
131
Sign sign;
132
+
133
+ template <typename T> bool isUnsigned () {
134
+ static_assert (std::is_integral_v<T> && std::is_unsigned_v<T>);
135
+ return sign == NoSign && n <= std::numeric_limits<T>::max ();
136
+ }
137
+
138
+ template <typename T> bool isSigned () {
139
+ static_assert (std::is_integral_v<T> && std::is_signed_v<T>);
140
+ if (sign == Neg) {
141
+ return uint64_t (std::numeric_limits<T>::min ()) <= n || n == 0 ;
142
+ }
143
+ return n <= uint64_t (std::numeric_limits<T>::max ());
144
+ }
130
145
};
131
146
132
147
// Lexing context that accumulates lexed input to produce an integer token
@@ -887,123 +902,6 @@ std::optional<LexResult> keyword(std::string_view in) {
887
902
888
903
} // anonymous namespace
889
904
890
- template <typename T> std::optional<T> Token::getU () const {
891
- static_assert (std::is_integral_v<T> && std::is_unsigned_v<T>);
892
- if (auto * tok = std::get_if<IntTok>(&data)) {
893
- if (tok->sign == NoSign && tok->n <= std::numeric_limits<T>::max ()) {
894
- return T (tok->n );
895
- }
896
- // TODO: Add error production for unsigned overflow.
897
- }
898
- return {};
899
- }
900
-
901
- template <typename T> std::optional<T> Token::getS () const {
902
- static_assert (std::is_integral_v<T> && std::is_signed_v<T>);
903
- if (auto * tok = std::get_if<IntTok>(&data)) {
904
- if (tok->sign == Neg) {
905
- if (uint64_t (std::numeric_limits<T>::min ()) <= tok->n || tok->n == 0 ) {
906
- return T (tok->n );
907
- }
908
- } else {
909
- if (tok->n <= uint64_t (std::numeric_limits<T>::max ())) {
910
- return T (tok->n );
911
- }
912
- }
913
- }
914
- return {};
915
- }
916
-
917
- template <typename T> std::optional<T> Token::getI () const {
918
- static_assert (std::is_integral_v<T> && std::is_unsigned_v<T>);
919
- if (auto n = getU<T>()) {
920
- return *n;
921
- }
922
- if (auto n = getS<std::make_signed_t <T>>()) {
923
- return T (*n);
924
- }
925
- return {};
926
- }
927
-
928
- template std::optional<uint64_t > Token::getU<uint64_t >() const ;
929
- template std::optional<int64_t > Token::getS<int64_t >() const ;
930
- template std::optional<uint64_t > Token::getI<uint64_t >() const ;
931
- template std::optional<uint32_t > Token::getU<uint32_t >() const ;
932
- template std::optional<int32_t > Token::getS<int32_t >() const ;
933
- template std::optional<uint32_t > Token::getI<uint32_t >() const ;
934
- template std::optional<uint16_t > Token::getU<uint16_t >() const ;
935
- template std::optional<int16_t > Token::getS<int16_t >() const ;
936
- template std::optional<uint16_t > Token::getI<uint16_t >() const ;
937
- template std::optional<uint8_t > Token::getU<uint8_t >() const ;
938
- template std::optional<int8_t > Token::getS<int8_t >() const ;
939
- template std::optional<uint8_t > Token::getI<uint8_t >() const ;
940
-
941
- std::optional<double > Token::getF64 () const {
942
- constexpr int signif = 52 ;
943
- constexpr uint64_t payloadMask = (1ull << signif) - 1 ;
944
- constexpr uint64_t nanDefault = 1ull << (signif - 1 );
945
- if (auto * tok = std::get_if<FloatTok>(&data)) {
946
- double d = tok->d ;
947
- if (std::isnan (d)) {
948
- // Inject payload.
949
- uint64_t payload = tok->nanPayload ? *tok->nanPayload : nanDefault;
950
- if (payload == 0 || payload > payloadMask) {
951
- // TODO: Add error production for out-of-bounds payload.
952
- return {};
953
- }
954
- uint64_t bits;
955
- static_assert (sizeof (bits) == sizeof (d));
956
- memcpy (&bits, &d, sizeof (bits));
957
- bits = (bits & ~payloadMask) | payload;
958
- memcpy (&d, &bits, sizeof (bits));
959
- }
960
- return d;
961
- }
962
- if (auto * tok = std::get_if<IntTok>(&data)) {
963
- if (tok->sign == Neg) {
964
- if (tok->n == 0 ) {
965
- return -0.0 ;
966
- }
967
- return double (int64_t (tok->n ));
968
- }
969
- return double (tok->n );
970
- }
971
- return {};
972
- }
973
-
974
- std::optional<float > Token::getF32 () const {
975
- constexpr int signif = 23 ;
976
- constexpr uint32_t payloadMask = (1u << signif) - 1 ;
977
- constexpr uint64_t nanDefault = 1ull << (signif - 1 );
978
- if (auto * tok = std::get_if<FloatTok>(&data)) {
979
- float f = tok->d ;
980
- if (std::isnan (f)) {
981
- // Validate and inject payload.
982
- uint64_t payload = tok->nanPayload ? *tok->nanPayload : nanDefault;
983
- if (payload == 0 || payload > payloadMask) {
984
- // TODO: Add error production for out-of-bounds payload.
985
- return {};
986
- }
987
- uint32_t bits;
988
- static_assert (sizeof (bits) == sizeof (f));
989
- memcpy (&bits, &f, sizeof (bits));
990
- bits = (bits & ~payloadMask) | payload;
991
- memcpy (&f, &bits, sizeof (bits));
992
- }
993
- return f;
994
- }
995
- if (auto * tok = std::get_if<IntTok>(&data)) {
996
- if (tok->sign == Neg) {
997
- if (tok->n == 0 ) {
998
- return -0 .0f ;
999
- }
1000
- return float (int64_t (tok->n ));
1001
- }
1002
- return float (tok->n );
1003
- }
1004
- return {};
1005
- }
1006
-
1007
905
void Lexer::skipSpace () {
1008
906
while (true ) {
1009
907
if (auto ctx = annotation (next ())) {
@@ -1020,9 +918,6 @@ void Lexer::skipSpace() {
1020
918
}
1021
919
1022
920
bool Lexer::takeLParen () {
1023
- if (curr) {
1024
- return false ;
1025
- }
1026
921
if (LexCtx (next ()).startsWith (" (" sv)) {
1027
922
++index ;
1028
923
advance ();
@@ -1032,9 +927,6 @@ bool Lexer::takeLParen() {
1032
927
}
1033
928
1034
929
bool Lexer::takeRParen () {
1035
- if (curr) {
1036
- return false ;
1037
- }
1038
930
if (LexCtx (next ()).startsWith (" )" sv)) {
1039
931
++index ;
1040
932
advance ();
@@ -1044,9 +936,6 @@ bool Lexer::takeRParen() {
1044
936
}
1045
937
1046
938
std::optional<std::string> Lexer::takeString () {
1047
- if (curr) {
1048
- return std::nullopt;
1049
- }
1050
939
if (auto result = str (next ())) {
1051
940
index += result->span .size ();
1052
941
advance ();
@@ -1060,9 +949,6 @@ std::optional<std::string> Lexer::takeString() {
1060
949
}
1061
950
1062
951
std::optional<Name> Lexer::takeID () {
1063
- if (curr) {
1064
- return std::nullopt;
1065
- }
1066
952
if (auto result = ident (next ())) {
1067
953
index += result->span .size ();
1068
954
advance ();
@@ -1080,9 +966,6 @@ std::optional<Name> Lexer::takeID() {
1080
966
}
1081
967
1082
968
std::optional<std::string_view> Lexer::takeKeyword () {
1083
- if (curr) {
1084
- return std::nullopt;
1085
- }
1086
969
if (auto result = keyword (next ())) {
1087
970
index += result->span .size ();
1088
971
advance ();
@@ -1130,20 +1013,124 @@ std::optional<uint32_t> Lexer::takeAlign() {
1130
1013
return std::nullopt;
1131
1014
}
1132
1015
1133
- void Lexer::lexToken () {
1134
- // TODO: Ensure we're getting the longest possible match.
1135
- Token tok;
1136
- if (auto t = integer (next ())) {
1137
- tok = Token{t->span , IntTok{t->n , t->sign }};
1138
- } else if (auto t = float_ (next ())) {
1139
- tok = Token{t->span , FloatTok{t->nanPayload , t->d }};
1140
- } else {
1141
- // TODO: Do something about lexing errors.
1142
- curr = std::nullopt;
1143
- return ;
1016
+ template <typename T> std::optional<T> Lexer::takeU () {
1017
+ static_assert (std::is_integral_v<T> && std::is_unsigned_v<T>);
1018
+ if (auto result = integer (next ()); result && result->isUnsigned <T>()) {
1019
+ index += result->span .size ();
1020
+ advance ();
1021
+ return T (result->n );
1022
+ }
1023
+ // TODO: Add error production for unsigned overflow.
1024
+ return std::nullopt;
1025
+ }
1026
+
1027
+ template <typename T> std::optional<T> Lexer::takeS () {
1028
+ static_assert (std::is_integral_v<T> && std::is_signed_v<T>);
1029
+ if (auto result = integer (next ()); result && result->isSigned <T>()) {
1030
+ index += result->span .size ();
1031
+ advance ();
1032
+ return T (result->n );
1033
+ }
1034
+ return std::nullopt;
1035
+ }
1036
+
1037
+ template <typename T> std::optional<T> Lexer::takeI () {
1038
+ static_assert (std::is_integral_v<T> && std::is_unsigned_v<T>);
1039
+ if (auto result = integer (next ())) {
1040
+ if (result->isUnsigned <T>() || result->isSigned <std::make_signed_t <T>>()) {
1041
+ index += result->span .size ();
1042
+ advance ();
1043
+ return T (result->n );
1044
+ }
1144
1045
}
1145
- index += tok.span .size ();
1146
- curr = {tok};
1046
+ return std::nullopt;
1047
+ }
1048
+
1049
+ template std::optional<uint64_t > Lexer::takeU<uint64_t >();
1050
+ template std::optional<int64_t > Lexer::takeS<int64_t >();
1051
+ template std::optional<uint64_t > Lexer::takeI<uint64_t >();
1052
+ template std::optional<uint32_t > Lexer::takeU<uint32_t >();
1053
+ template std::optional<int32_t > Lexer::takeS<int32_t >();
1054
+ template std::optional<uint32_t > Lexer::takeI<uint32_t >();
1055
+ template std::optional<uint16_t > Lexer::takeU<uint16_t >();
1056
+ template std::optional<int16_t > Lexer::takeS<int16_t >();
1057
+ template std::optional<uint16_t > Lexer::takeI<uint16_t >();
1058
+ template std::optional<uint8_t > Lexer::takeU<uint8_t >();
1059
+ template std::optional<int8_t > Lexer::takeS<int8_t >();
1060
+ template std::optional<uint8_t > Lexer::takeI<uint8_t >();
1061
+
1062
+ std::optional<double > Lexer::takeF64 () {
1063
+ constexpr int signif = 52 ;
1064
+ constexpr uint64_t payloadMask = (1ull << signif) - 1 ;
1065
+ constexpr uint64_t nanDefault = 1ull << (signif - 1 );
1066
+ if (auto result = float_ (next ())) {
1067
+ double d = result->d ;
1068
+ if (std::isnan (d)) {
1069
+ // Inject payload.
1070
+ uint64_t payload = result->nanPayload ? *result->nanPayload : nanDefault;
1071
+ if (payload == 0 || payload > payloadMask) {
1072
+ // TODO: Add error production for out-of-bounds payload.
1073
+ return std::nullopt;
1074
+ }
1075
+ uint64_t bits;
1076
+ static_assert (sizeof (bits) == sizeof (d));
1077
+ memcpy (&bits, &d, sizeof (bits));
1078
+ bits = (bits & ~payloadMask) | payload;
1079
+ memcpy (&d, &bits, sizeof (bits));
1080
+ }
1081
+ index += result->span .size ();
1082
+ advance ();
1083
+ return d;
1084
+ }
1085
+ if (auto result = integer (next ())) {
1086
+ index += result->span .size ();
1087
+ advance ();
1088
+ if (result->sign == Neg) {
1089
+ if (result->n == 0 ) {
1090
+ return -0.0 ;
1091
+ }
1092
+ return double (int64_t (result->n ));
1093
+ }
1094
+ return double (result->n );
1095
+ }
1096
+ return std::nullopt;
1097
+ }
1098
+
1099
+ std::optional<float > Lexer::takeF32 () {
1100
+ constexpr int signif = 23 ;
1101
+ constexpr uint32_t payloadMask = (1u << signif) - 1 ;
1102
+ constexpr uint64_t nanDefault = 1ull << (signif - 1 );
1103
+ if (auto result = float_ (next ())) {
1104
+ float f = result->d ;
1105
+ if (std::isnan (f)) {
1106
+ // Validate and inject payload.
1107
+ uint64_t payload = result->nanPayload ? *result->nanPayload : nanDefault;
1108
+ if (payload == 0 || payload > payloadMask) {
1109
+ // TODO: Add error production for out-of-bounds payload.
1110
+ return std::nullopt;
1111
+ }
1112
+ uint32_t bits;
1113
+ static_assert (sizeof (bits) == sizeof (f));
1114
+ memcpy (&bits, &f, sizeof (bits));
1115
+ bits = (bits & ~payloadMask) | payload;
1116
+ memcpy (&f, &bits, sizeof (bits));
1117
+ }
1118
+ index += result->span .size ();
1119
+ advance ();
1120
+ return f;
1121
+ }
1122
+ if (auto result = integer (next ())) {
1123
+ index += result->span .size ();
1124
+ advance ();
1125
+ if (result->sign == Neg) {
1126
+ if (result->n == 0 ) {
1127
+ return -0 .0f ;
1128
+ }
1129
+ return float (int64_t (result->n ));
1130
+ }
1131
+ return float (result->n );
1132
+ }
1133
+ return std::nullopt;
1147
1134
}
1148
1135
1149
1136
TextPos Lexer::position (const char * c) const {
@@ -1164,52 +1151,8 @@ bool TextPos::operator==(const TextPos& other) const {
1164
1151
return line == other.line && col == other.col ;
1165
1152
}
1166
1153
1167
- bool IntTok::operator ==(const IntTok& other) const {
1168
- return n == other.n && sign == other.sign ;
1169
- }
1170
-
1171
- bool FloatTok::operator ==(const FloatTok& other) const {
1172
- return std::signbit (d) == std::signbit (other.d ) &&
1173
- (d == other.d || (std::isnan (d) && std::isnan (other.d ) &&
1174
- nanPayload == other.nanPayload ));
1175
- }
1176
-
1177
- bool Token::operator ==(const Token& other) const {
1178
- return span == other.span &&
1179
- std::visit (
1180
- [](auto & t1, auto & t2) {
1181
- if constexpr (std::is_same_v<decltype (t1), decltype (t2)>) {
1182
- return t1 == t2;
1183
- } else {
1184
- return false ;
1185
- }
1186
- },
1187
- data,
1188
- other.data );
1189
- }
1190
-
1191
1154
std::ostream& operator <<(std::ostream& os, const TextPos& pos) {
1192
1155
return os << pos.line << " :" << pos.col ;
1193
1156
}
1194
1157
1195
- std::ostream& operator <<(std::ostream& os, const IntTok& tok) {
1196
- return os << (tok.sign == Pos ? " +" : tok.sign == Neg ? " -" : " " ) << tok.n ;
1197
- }
1198
-
1199
- std::ostream& operator <<(std::ostream& os, const FloatTok& tok) {
1200
- if (std::isnan (tok.d )) {
1201
- os << (std::signbit (tok.d ) ? " +" : " -" );
1202
- if (tok.nanPayload ) {
1203
- return os << " nan:0x" << std::hex << *tok.nanPayload << std::dec;
1204
- }
1205
- return os << " nan" ;
1206
- }
1207
- return os << tok.d ;
1208
- }
1209
-
1210
- std::ostream& operator <<(std::ostream& os, const Token& tok) {
1211
- std::visit ([&](const auto & t) { os << t; }, tok.data );
1212
- return os << " \" " << tok.span << " \" " ;
1213
- }
1214
-
1215
1158
} // namespace wasm::WATParser
0 commit comments