diff --git a/Cargo.toml b/Cargo.toml index ccaafd3..47fc8a1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -2,7 +2,7 @@ members = ["fuzz"] [workspace.package] -version = "0.0.0" +version = "0.1.0+llvm-462a31f5a5ab" edition = "2021" license = "Apache-2.0 WITH LLVM-exception" diff --git a/README.md b/README.md index 8554912..60ddd5b 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,68 @@ -# `rustc_apfloat` (Rust port of C++ `llvm::APFloat` library) +# `rustc_apfloat`
(Rust port of the C++ `llvm::APFloat` "softfloat" library) + +## History + +LLVM's `APFloat` (aka `llvm::APFloat`) software floating-point (or "softfloat") +library was first ported to Rust (and named `rustc_apfloat`) back in 2017, +in the Rust pull request [`rust-lang/rust#43554`](https://github.com/rust-lang/rust/pull/43554), +as part of an effort to expand Rust compile-time capabilities without sacrificing +determinism (and therefore soundness, if the type-system was involved). + +Note: while using the original C++ `llvm::APFloat` directly would've been an option, +certain high-level API design differences made in the Rust port, without behavioral impact +(C++ raw pointers and dynamic allocations vs Rust generics, traits and `#![no_std]`), +made the Rust port more appealing from a determinism standpoint (mostly thanks to +lacking all 3 of: `unsafe` code, host floating-point use, `std` access - and only +allocating to handle the arbitrary precision needed for conversions to/from decimal), +*even though there was a chance it had correctness issues unique to it*. + +However, that port had a fatal flaw: it was added to the `rust-lang/rust` repository +without its unique licensing status (as a port of a C++ library with its own license) +being properly tracked, communicated, taken into account, etc. +The end result was years of limbo, mostly chronicled in the Rust issue +[`rust-lang/rust#55993`](https://github.com/rust-lang/rust/issues/55993), in which +the in-tree port couldn't really receive proper updated or even maintenance, due +due to its unclear status. + +### Revival (as `rust-lang/rustc_apfloat`) + +This repository (`rust-lang/rustc_apfloat`) is the result of a 2022 plan on +[the relevant Zulip topic](https://rust-lang.zulipchat.com/#narrow/stream/231349-t-core.2Flicensing/topic/apfloat), fully put into motion during 2023: +* the `git` history of the in-tree `compiler/rustc_apfloat` library was extracted + (see the separate [`rustc_apfloat-git-history-extraction`](https://github.com/LykenSol/rustc_apfloat-git-history-extraction) repository for more details) +* only commits that were *both* necessary *and* had clear copyright status, were kept +* any missing functionality or bug fixes, would have to be either be re-contributed, + or rebuilt from the ground up (mostly the latter ended up being done, see below) + +Most changes since the original port had been aesthetic (e.g. spell-checking, `rustfmt`), +so little was lost in the process. + +Starting from that much smaller "trusted" base: +* everything could use LLVM's new (since 2019) license, "`Apache-2.0 WITH LLVM-exception`" + (see the ["Licensing"](#licensing) section below and/or [LICENSE-DETAILS.md](./LICENSE-DETAILS.md) for more details) +* new facilities were built (benchmarks, and [a fuzzer comparing Rust/C++/hardware](#fuzzing)) +* excessive testing was performed (via a combination of fuzzing and bruteforce search) +* latent bugs were discovered (e.g. LLVM issues +[#63895](https://github.com/llvm/llvm-project/issues/63895) and +[#63938](https://github.com/llvm/llvm-project/issues/63938)) +* the port has been forwarded in time, to include upstream (`llvm/llvm-project`) changes + to `llvm::APFloat` over the years (since 2017), removing the need for selective backports + +## Versioning + +As this is, for the time being, a "living port", tracking upstream (`llvm/llvm-project`) +`llvm::APFloat` changes, the `rustc_apfloat` crate will have versions of the form: -## 🚧 Work In Progress 🚧 +``` +0.X.Y+llvm-ZZZZZZZZZZZZ +``` +* `X` is always bumped after semver-incompatible API changes, + or when updating the upstream (`llvm/llvm-project`) commit the port is based on +* `Y` is only bumped when other parts of the version don't need to be (e.g. for bug fixes) +* `+llvm-ZZZZZZZZZZZZ` is ["version metadata"](https://doc.rust-lang.org/cargo/reference/resolver.html#version-metadata) (which Cargo itself ignores), + and `ZZZZZZZZZZZZ` always holds the first 12 hexadecimal digits of + the upstream (`llvm/llvm-project`) `git` commit hash the port is based on -**NOTE**: the repo (and [`rustc_apfloat-git-history-extraction`](https://github.com/LykenSol/rustc_apfloat-git-history-extraction)) might be public already, but only for convenience of discussion, see [relevant Zulip topic](https://rust-lang.zulipchat.com/#narrow/stream/231349-t-core.2Flicensing/topic/apfloat) for more details. ## Testing diff --git a/build.rs b/build.rs new file mode 100644 index 0000000..a7398a2 --- /dev/null +++ b/build.rs @@ -0,0 +1,41 @@ +// HACK(eddyb) easier dep-tracking if we let `rustc` do it. +const SRC_LIB_RS_CONTENTS: &str = include_str!("src/lib.rs"); + +const EXPECTED_SRC_LIB_RS_PREFIX: &str = "\ +//! Port of LLVM's APFloat software floating-point implementation from the +//! following C++ sources (please update commit hash when backporting): +//! https://github.com/llvm/llvm-project/commit/"; + +fn main() { + // HACK(eddyb) disable the default of re-running the build script on *any* + // change to *the entire source tree* (i.e. the default is roughly `./`). + println!("cargo:rerun-if-changed=build.rs"); + + let llvm_commit_hash = SRC_LIB_RS_CONTENTS + .strip_prefix(EXPECTED_SRC_LIB_RS_PREFIX) + .ok_or(()) + .map_err(|_| format!("expected `src/lib.rs` to start with:\n\n{EXPECTED_SRC_LIB_RS_PREFIX}")) + .and_then(|commit_hash_plus_rest_of_file| { + Ok(commit_hash_plus_rest_of_file + .split_once('\n') + .ok_or("expected `src/lib.rs` to have more than 3 lines")?) + }) + .and_then(|(commit_hash, _)| { + if commit_hash.len() != 40 || !commit_hash.chars().all(|c| matches!(c, '0'..='9'|'a'..='f')) { + Err(format!("expected `src/lib.rs` to have a valid commit hash, found {commit_hash:?}")) + } else { + Ok(commit_hash) + } + }) + .unwrap_or_else(|e| { + eprintln!("\n{e}\n"); + panic!("failed to validate `src/lib.rs`'s commit hash (see above)") + }); + + let expected_version_metadata = format!("+llvm-{}", &llvm_commit_hash[..12]); + let actual_version = env!("CARGO_PKG_VERSION"); + if !actual_version.ends_with(&expected_version_metadata) { + eprintln!("\nexpected version ending in `{expected_version_metadata}`, found `{actual_version}`\n"); + panic!("failed to validate Cargo package version (see above)"); + } +} diff --git a/fuzz/build.rs b/fuzz/build.rs index 2cc18dc..f23b373 100644 --- a/fuzz/build.rs +++ b/fuzz/build.rs @@ -7,6 +7,10 @@ fn main() -> std::io::Result { // change to *the entire source tree* (i.e. the default is roughly `./`). println!("cargo:rerun-if-changed=build.rs"); + // NOTE(eddyb) `rustc_apfloat`'s own `build.rs` validated the version string. + let (_, llvm_commit_hash) = env!("CARGO_PKG_VERSION").split_once("+llvm-").unwrap(); + assert_eq!(llvm_commit_hash.len(), 12); + let out_dir = std::path::PathBuf::from(std::env::var_os("OUT_DIR").unwrap()); std::fs::write(out_dir.join("generated_fuzz_ops.rs"), ops::generate_rust())?; @@ -40,11 +44,7 @@ fn main() -> std::io::Result { let sh_script_exit_status = Command::new("sh") .args(["-c", SH_SCRIPT]) .envs([ - // FIXME(eddyb) ensure this is kept in sync. - ( - "llvm_project_git_hash", - "f3598e8fca83ccfb11f58ec7957c229e349765e3", - ), + ("llvm_project_git_hash", llvm_commit_hash), ("cxx_apf_fuzz_exports", &cxx_exported_symbols.join(",")), ( "cxx_apf_fuzz_is_fuzzing", @@ -68,7 +68,7 @@ curl -sS "$llvm_project_tgz_url" | tar -C "$OUT_DIR" -xz llvm="$OUT_DIR"/llvm-project-"$llvm_project_git_hash"/llvm mkdir -p "$OUT_DIR"/fake-config/llvm/Config -touch "$OUT_DIR"/fake-config/llvm/Config/{abi-breaking,llvm-config}.h +touch "$OUT_DIR"/fake-config/llvm/Config/{abi-breaking,config,llvm-config}.h # HACK(eddyb) we want standard `assert`s to work, but `NDEBUG` also controls # unrelated LLVM facilities that are spread all over the place and it's harder @@ -91,8 +91,8 @@ echo | clang++ -x c++ - -std=c++17 \ $clang_codegen_flags \ -I "$llvm"/include \ -I "$OUT_DIR"/fake-config \ - -DNDEBUG \ - --include="$llvm"/lib/Support/{APInt,APFloat,SmallVector}.cpp \ + -DNDEBUG -DHAVE_UNISTD_H -DLLVM_ON_UNIX -DLLVM_ENABLE_THREADS=0 \ + --include="$llvm"/lib/Support/{APInt,APFloat,SmallVector,ErrorHandling}.cpp \ --include="$OUT_DIR"/cxx_apf_fuzz.cpp \ -c -emit-llvm -o "$OUT_DIR"/cxx_apf_fuzz.bc diff --git a/fuzz/ops.rs b/fuzz/ops.rs index 5f7d081..fb1a45d 100644 --- a/fuzz/ops.rs +++ b/fuzz/ops.rs @@ -12,8 +12,8 @@ struct Cxx(T); use self::OpKind::*; enum OpKind { - Unary(Rust, Cxx<&'static str>), - Binary(Rust, Cxx<&'static str>), + Unary(char), + Binary(char), Ternary(Rust<&'static str>, Cxx<&'static str>), // HACK(eddyb) all other ops have floating-point inputs *and* outputs, so @@ -25,6 +25,7 @@ enum OpKind { enum Type { SInt(usize), UInt(usize), + Float(usize), } impl Type { @@ -32,6 +33,7 @@ impl Type { match self { Type::SInt(w) => format!("i{w}"), Type::UInt(w) => format!("u{w}"), + Type::Float(w) => format!("f{w}"), } } } @@ -39,8 +41,8 @@ impl Type { impl OpKind { fn inputs<'a, T>(&self, all_inputs: &'a [T; 3]) -> &'a [T] { match self { - Unary(..) | Roundtrip(_) => &all_inputs[..1], - Binary(..) => &all_inputs[..2], + Unary(_) | Roundtrip(_) => &all_inputs[..1], + Binary(_) => &all_inputs[..2], Ternary(..) => &all_inputs[..3], } } @@ -48,18 +50,20 @@ impl OpKind { const OPS: &[(&str, OpKind)] = &[ // Unary (`F -> F`) ops. - ("Neg", Unary(Rust('-'), Cxx("changeSign"))), + ("Neg", Unary('-')), // Binary (`(F, F) -> F`) ops. - ("Add", Binary(Rust('+'), Cxx("add"))), - ("Sub", Binary(Rust('-'), Cxx("subtract"))), - ("Mul", Binary(Rust('*'), Cxx("multiply"))), - ("Div", Binary(Rust('/'), Cxx("divide"))), - ("Rem", Binary(Rust('%'), Cxx("mod"))), + ("Add", Binary('+')), + ("Sub", Binary('-')), + ("Mul", Binary('*')), + ("Div", Binary('/')), + ("Rem", Binary('%')), // Ternary (`(F, F) -> F`) ops. ("MulAdd", Ternary(Rust("mul_add"), Cxx("fusedMultiplyAdd"))), // Roundtrip (`F -> T -> F`) ops. ("FToI128ToF", Roundtrip(Type::SInt(128))), ("FToU128ToF", Roundtrip(Type::UInt(128))), + ("FToSingleToF", Roundtrip(Type::Float(32))), + ("FToDoubleToF", Roundtrip(Type::Float(64))), ]; fn all_ops_map_concat(f: impl Fn(usize, &'static str, &OpKind) -> String) -> String { @@ -132,17 +136,21 @@ impl FuzzOp where HF: num_traits::Float + num_traits::AsPrimitive - + num_traits::AsPrimitive, + + num_traits::AsPrimitive + + num_traits::AsPrimitive + + num_traits::AsPrimitive, i128: num_traits::AsPrimitive, u128: num_traits::AsPrimitive, + f32: num_traits::AsPrimitive, + f64: num_traits::AsPrimitive, { fn eval_hard(self) -> HF { match self { " + &all_ops_map_concat(|_tag, name, kind| { let inputs = kind.inputs(&["a", "b", "c"]); let expr = match kind { - Unary(Rust(op), _) => format!("{op}{}", inputs[0]), - Binary(Rust(op), _) => format!("{} {op} {}", inputs[0], inputs[1]), + Unary(op) => format!("{op}{}", inputs[0]), + Binary(op) => format!("{} {op} {}", inputs[0], inputs[1]), Ternary(Rust(method), _) => { format!("{}.{method}({}, {})", inputs[0], inputs[1], inputs[2]) } @@ -163,14 +171,21 @@ impl FuzzOp } } -impl FuzzOp { - fn eval_rs_apf(self) -> RustcApFloat { +impl FuzzOp + where + F: rustc_apfloat::Float + + rustc_apfloat::FloatConvert + + rustc_apfloat::FloatConvert, + rustc_apfloat::ieee::Single: rustc_apfloat::FloatConvert, + rustc_apfloat::ieee::Double: rustc_apfloat::FloatConvert, +{ + fn eval_rs_apf(self) -> F { match self { " + &all_ops_map_concat(|_tag, name, kind| { let inputs = kind.inputs(&["a", "b", "c"]); let expr = match kind { - Unary(Rust(op), _) => format!("{op}{}", inputs[0]), - Binary(Rust(op), _) => format!("({} {op} {}).value", inputs[0], inputs[1]), + Unary(op) => format!("{op}{}", inputs[0]), + Binary(op) => format!("({} {op} {}).value", inputs[0], inputs[1]), Ternary(Rust(method), _) => { format!("{}.{method}({}).value", inputs[0], inputs[1..].join(", ")) } @@ -178,9 +193,23 @@ impl FuzzOp { let (w, i_or_u) = match ty { Type::SInt(w) => (w, "i"), Type::UInt(w) => (w, "u"), + Type::Float(_) => unreachable!(), }; format!( - "RustcApFloat::from_{i_or_u}128({}.to_{i_or_u}128({w}).value).value", + "F::from_{i_or_u}128({}.to_{i_or_u}128({w}).value).value", + inputs[0], + ) + } + Roundtrip(Type::Float(w)) => { + let rs_apf_type = match w { + 32 => "rustc_apfloat::ieee::Single", + 64 => "rustc_apfloat::ieee::Double", + _ => unreachable!(), + }; + format!( + "rustc_apfloat::FloatConvert + ::convert(rustc_apfloat::FloatConvert::<{rs_apf_type}> + ::convert({}, &mut false).value, &mut false).value", inputs[0], ) } @@ -226,43 +255,64 @@ struct FuzzOp { F a, b, c; F eval() const { + + // HACK(eddyb) 'scratch' variables used by expressions below. + APFloat r(0.0); + APSInt i; + bool scratch_bool; + switch(tag) { " + &all_ops_map_concat(|_tag, name, kind| { let inputs = kind.inputs(&["a.to_apf()", "b.to_apf()", "c.to_apf()"]); - let (this, args) = inputs.split_first().unwrap(); - let args = args.join(", "); - let stmt = match kind { - // HACK(eddyb) `mod` doesn't take a rounding mode. - Unary(_, Cxx(method)) | Binary(_, Cxx(method @ "mod")) => { - format!("r.{method}({args})") - } + let expr = match kind { + // HACK(eddyb) `APFloat` doesn't overload `operator%`, so we have + // to go through the `mod` method instead. + Binary('%') => format!("((r = {}), r.mod({}), r)", inputs[0], inputs[1]), + + Unary(op) => format!("{op}{}", inputs[0]), + Binary(op) => format!("{} {op} {}", inputs[0], inputs[1]), - Binary(_, Cxx(method)) | Ternary(_, Cxx(method)) => { - format!("r.{method}({args}, APFloat::rmNearestTiesToEven)") + Ternary(_, Cxx(method)) => { + format!( + "((r = {}), r.{method}({}, {}, APFloat::rmNearestTiesToEven), r)", + inputs[0], inputs[1], inputs[2] + ) } Roundtrip(ty @ (Type::SInt(_) | Type::UInt(_))) => { let (w, signed) = match ty { Type::SInt(w) => (w, true), Type::UInt(w) => (w, false), + Type::Float(_) => unreachable!(), }; format!( - " - APSInt i({w}, !{signed}); - bool isExact; - r.convertToInteger(i, APFloat::rmTowardZero, &isExact); - r.convertFromAPInt(i, {signed}, APFloat::rmNearestTiesToEven)" + "((r = {}), + (i = APSInt({w}, !{signed})), + r.convertToInteger(i, APFloat::rmTowardZero, &scratch_bool), + r.convertFromAPInt(i, {signed}, APFloat::rmNearestTiesToEven), + r)", + inputs[0] + ) + } + Roundtrip(Type::Float(w)) => { + let cxx_apf_semantics = match w { + 32 => "APFloat::IEEEsingle()", + 64 => "APFloat::IEEEdouble()", + _ => unreachable!(), + }; + format!( + "((r = {input}), + r.convert({cxx_apf_semantics}, APFloat::rmNearestTiesToEven, &scratch_bool), + r.convert({input}.getSemantics(), APFloat::rmNearestTiesToEven, &scratch_bool), + r)", + input = inputs[0] ) } }; format!( " - case {name}: {{ - APFloat r = {this}; - {stmt}; - return F::from_apf(r); - }}", + case {name}: return F::from_apf({expr});" ) }) + " @@ -270,23 +320,28 @@ struct FuzzOp { } }; " + &[ - (16, "APFloat::IEEEhalf()"), - (32, "APFloat::IEEEsingle()"), - (64, "APFloat::IEEEdouble()"), - (128, "APFloat::IEEEquad()"), - (80, "APFloat::x87DoubleExtended()"), + (16, "IEEEhalf"), + (32, "IEEEsingle"), + (64, "IEEEdouble"), + (128, "IEEEquad"), + (16, "BFloat"), + (8, "Float8E5M2"), + (8, "Float8E4M3FN"), + (80, "x87DoubleExtended"), ] .into_iter() - .map(|(w, cxx_apf_semantics)| { - let (name_prefix, uint_width) = match w { - 80 => ("X87_F", 128), - _ => ("IEEE", w), + .map(|(w, cxx_apf_semantics): (usize, _)| { + let uint_width = w.next_power_of_two(); + let name = match (w, cxx_apf_semantics) { + (16, "BFloat") => "BrainF16".into(), + (8, s) if s.starts_with("Float8") => s.replace("Float8", "F8"), + (80, "x87DoubleExtended") => "X87_F80".into(), + _ => { + assert!(cxx_apf_semantics.starts_with("IEEE")); + format!("IEEE{w}") + } }; - let name = format!("{name_prefix}{w}"); - let exported_symbol = format!( - "cxx_apf_fuzz_eval_op_{}{w}", - name_prefix.to_ascii_lowercase() - ); + let exported_symbol = format!("cxx_apf_fuzz_eval_op_{}", name.to_ascii_lowercase()); exported_symbols.push(exported_symbol.clone()); let uint = format!("uint{uint_width}_t"); format!( @@ -309,11 +364,13 @@ struct __attribute__((packed)) {name} {{ }} APFloat to_apf() const {{ - std::array - words; + std::array< + APInt::WordType, + ({w} + APInt::APINT_BITS_PER_WORD - 1) / APInt::APINT_BITS_PER_WORD + > words; for(int i = 0; i < {w}; i += APInt::APINT_BITS_PER_WORD) words[i / APInt::APINT_BITS_PER_WORD] = bits >> i; - return APFloat({cxx_apf_semantics}, APInt({w}, words)); + return APFloat(APFloat::{cxx_apf_semantics}(), APInt({w}, words)); }} }}; extern "C" {{ diff --git a/fuzz/src/main.rs b/fuzz/src/main.rs index 32daf51..8d4fa07 100644 --- a/fuzz/src/main.rs +++ b/fuzz/src/main.rs @@ -1,13 +1,15 @@ use clap::{CommandFactory, Parser, Subcommand}; use rustc_apfloat::Float as _; use std::fmt; +use std::io::Write; use std::mem::MaybeUninit; +use std::num::NonZeroUsize; use std::path::PathBuf; // See `build.rs` and `ops.rs` for how `FuzzOp` is generated. include!(concat!(env!("OUT_DIR"), "/generated_fuzz_ops.rs")); -#[derive(Parser, Debug)] +#[derive(Clone, Parser, Debug)] struct Args { /// Disable comparison with C++ (LLVM's original) APFloat #[arg(long)] @@ -21,18 +23,29 @@ struct Args { #[arg(long)] strict_hard_nan_sign: bool, - /// Disable erasure of sNaN vs qNaN mismatches with hardware floating-point operations + /// Disable erasure of "which NaN input propagates" mismatches with hardware floating-point operations #[arg(long)] - strict_hard_qnan_vs_snan: bool, + strict_hard_nan_input_choice: bool, + + /// Hide FMA NaN mismatches for `a * b + NaN` when `a * b` generates a new NaN + // HACK(eddyb) this is opt-in, not opt-out, because the APFloat behavior, of + // generating a new NaN (instead of propagating the existing one) is dubious, + // and may end up changing over time, so the only purpose this serves is to + // enable fuzzing against hardware without wasting time on these mismatches. + #[arg(long)] + ignore_fma_nan_generate_vs_propagate: bool, #[command(subcommand)] command: Option, } -#[derive(Subcommand, Debug)] +#[derive(Clone, Subcommand, Debug)] enum Commands { /// Decode fuzzing in/out testcases (binary serialized `FuzzOp`s) Decode { files: Vec }, + + /// Exhaustively test all possible ops and inputs for tiny (8-bit) formats + BruteforceTiny, } /// Trait implemented for types that describe a floating-point format supported @@ -47,13 +60,24 @@ enum Commands { /// all types implementing this trait *must* be annotated with `#[repr(C, packed)]`, /// and `ops.rs` *must* also ensure exactly matching layout for the C++ counterpart. trait FloatRepr: Copy + Default + Eq + fmt::Display { - type RustcApFloat: rustc_apfloat::Float; + type RustcApFloat: rustc_apfloat::Float + + rustc_apfloat::Float + + rustc_apfloat::FloatConvert + + rustc_apfloat::FloatConvert; const BIT_WIDTH: usize = Self::RustcApFloat::BITS; const BYTE_LEN: usize = (Self::BIT_WIDTH + 7) / 8; const NAME: &'static str; + // HACK(eddyb) this has to be overwritable because we have more than one + // format with the same `BIT_WIDTH`, so it's not unambiguous on its own. + const REPR_TAG: u8 = Self::BIT_WIDTH as u8; + + fn short_lowercase_name() -> String { + Self::NAME.to_ascii_lowercase().replace("ieee", "f") + } + // FIXME(eddyb) these should ideally be using `[u8; Self::BYTE_LEN]`. fn from_le_bytes(bytes: &[u8]) -> Self; fn write_as_le_bytes_into(self, out_bytes: &mut Vec); @@ -70,17 +94,24 @@ trait FloatRepr: Copy + Default + Eq + fmt::Display { macro_rules! float_reprs { ($($name:ident($repr:ty) { type RustcApFloat = $rs_apf_ty:ty; + $(const REPR_TAG = $repr_tag:expr;)? extern fn = $cxx_apf_eval_fuzz_op:ident; $(type HardFloat = $hard_float_ty:ty;)? })+) => { // HACK(eddyb) helper macro used to actually handle all types uniformly. - macro_rules! dispatch_all_reprs { - ($ty_var:ident => $e:expr) => {{ - $({ - type $ty_var = $name; - $e - })+ - }} + macro_rules! dispatch_any_float_repr_by_repr_tag { + (match $repr_tag_value:ident { for<$ty_var:ident: FloatRepr> => $e:expr }) => { + // NOTE(eddyb) this doubles as an overlap check: `REPR_TAG` + // values across *all* `FloatRepr` `impl` *must* be unique. + #[deny(unreachable_patterns)] + match $repr_tag_value { + $($name::REPR_TAG => { + type $ty_var = $name; + $e; + })+ + _ => {} + } + } } $( @@ -96,6 +127,8 @@ macro_rules! float_reprs { const NAME: &'static str = stringify!($name); + $(const REPR_TAG: u8 = $repr_tag;)? + fn from_le_bytes(bytes: &[u8]) -> Self { // HACK(eddyb) this allows using e.g. `u128` to hold 80 bits. let mut repr_bytes = [0; std::mem::size_of::<$repr>()]; @@ -180,6 +213,23 @@ float_reprs! { type RustcApFloat = rustc_apfloat::ieee::Quad; extern fn = cxx_apf_fuzz_eval_op_ieee128; } + + // Non-standard IEEE-like formats. + F8E5M2(u8) { + type RustcApFloat = rustc_apfloat::ieee::Float8E5M2; + const REPR_TAG = 8 + 0; + extern fn = cxx_apf_fuzz_eval_op_f8e5m2; + } + F8E4M3FN(u8) { + type RustcApFloat = rustc_apfloat::ieee::Float8E4M3FN; + const REPR_TAG = 8 + 1; + extern fn = cxx_apf_fuzz_eval_op_f8e4m3fn; + } + BrainF16(u16) { + type RustcApFloat = rustc_apfloat::ieee::BFloat; + const REPR_TAG = 16 + 1; + extern fn = cxx_apf_fuzz_eval_op_brainf16; + } X87_F80(u128) { type RustcApFloat = rustc_apfloat::ieee::X87DoubleExtended; extern fn = cxx_apf_fuzz_eval_op_x87_f80; @@ -193,17 +243,20 @@ struct FuzzOpEvalOutputs { } impl FuzzOpEvalOutputs { - fn assert_all_match(self) { - if let Some(cxx_apf) = self.cxx_apf { - assert!(cxx_apf == self.rs_apf); - } - if let Some(hard) = self.hard { - assert!(hard == self.rs_apf); - } + fn all_match(self) -> bool { + [self.cxx_apf, self.hard] + .into_iter() + .flatten() + .all(|x| x == self.rs_apf) } } -impl FuzzOp { +impl FuzzOp +// FIXME(eddyb) such bounds shouldn't be here, but `FloatRepr` can't imply them. +where + rustc_apfloat::ieee::Single: rustc_apfloat::FloatConvert, + rustc_apfloat::ieee::Double: rustc_apfloat::FloatConvert, +{ fn try_decode(data: &[u8]) -> Result { let (&tag, inputs) = data.split_first().ok_or(())?; if inputs.len() % F::BYTE_LEN != 0 { @@ -285,15 +338,66 @@ impl FuzzOp { // Allow using CLI flags to toggle whether differences vs hardware are // erased (by copying e.g. signs from the `rustc_apfloat` result) or kept. // FIXME(eddyb) figure out how much we can really validate against hardware. + let mut strict_nan_bits_mask = !0; + if !cli_args.strict_hard_nan_sign { + strict_nan_bits_mask &= !sign_bit_mask; + }; + let rs_apf_bits = out.rs_apf.to_bits_u128(); if is_nan(out_hard_bits) && is_nan(rs_apf_bits) { - for (strict, bit_mask) in [ - (cli_args.strict_hard_nan_sign, sign_bit_mask), - (cli_args.strict_hard_qnan_vs_snan, qnan_bit_mask), - ] { - if !strict { - out_hard_bits &= !bit_mask; - out_hard_bits |= rs_apf_bits & bit_mask; + out_hard_bits &= strict_nan_bits_mask; + out_hard_bits |= rs_apf_bits & !strict_nan_bits_mask; + + // There is still a NaN payload difference, check if they both + // are propagated inputs (verbatim or at most "quieted" if SNaN), + // because in some cases with multiple NaN inputs, something + // (hardware or even e.g. LLVM passes or instruction selection) + // along the way from Rust code to final results, can end up + // causing a different input NaN to get propagated to the result. + if !cli_args.strict_hard_nan_input_choice && out_hard_bits != rs_apf_bits { + let out_nan_is_propagated_input = |out_nan_bits| { + assert!(is_nan(out_nan_bits)); + let mut found_any_matching_inputs = false; + self.map(F::to_bits_u128).map(|in_bits| { + // NOTE(eddyb) this `is_nan` check is important, as + // `INFINITY.to_bits() | qnan_bit_mask == NAN.to_bits()`, + // i.e. seeting the QNaN is more than enough to turn + // a non-NaN (infinities, specifically) into a NaN. + if is_nan(in_bits) { + // Make sure to "quiet" (i.e. turn SNaN into QNaN) + // the input first, as propagation does (in the + // default exception handling mode, at least). + if (in_bits | qnan_bit_mask) & strict_nan_bits_mask + == out_nan_bits & strict_nan_bits_mask + { + found_any_matching_inputs = true; + } + } + }); + found_any_matching_inputs + }; + if out_nan_is_propagated_input(out_hard_bits) + && out_nan_is_propagated_input(rs_apf_bits) + { + out_hard_bits = rs_apf_bits; + } + } + + // HACK(eddyb) last chance to hide a NaN payload difference, + // in this case for FMAs of the form `a * b + NaN`, when `a * b` + // generates a new NaN (which hardware can ignore in favor of the + // existing NaN, but APFloat returns the fresh new NaN instead). + if cli_args.ignore_fma_nan_generate_vs_propagate && out_hard_bits != rs_apf_bits { + if let FuzzOp::MulAdd(a, b, c) = self.map(F::to_bits_u128) { + if !is_nan(a) + && !is_nan(b) + && is_nan(c) + && out_hard_bits & strict_nan_bits_mask + == (c | qnan_bit_mask) & strict_nan_bits_mask + && rs_apf_bits == F::RustcApFloat::NAN.to_bits() + { + out_hard_bits = rs_apf_bits; + } } } } @@ -333,8 +437,11 @@ impl FuzzOp { } } - let short_float_type_name = F::NAME.to_ascii_lowercase().replace("ieee", "f"); - println!(" {short_float_type_name}.{:?}", self.map(FloatPrintHelper)); + println!( + " {}.{:?}", + F::short_lowercase_name(), + self.map(FloatPrintHelper) + ); // HACK(eddyb) this lets us show all files even if some cause panics. let FuzzOpEvalOutputs { @@ -366,6 +473,143 @@ impl FuzzOp { cxx_apf.map(|x| print(x, "C++ / llvm::APFloat")); hard.map(|x| print(x, "native hardware floats")); } + + /// [`Commands::BruteforceTiny`] implementation (for a specific choice of `F`), + /// returning `Err(mismatch_count)` if there were any mismatches. + // + // HACK(eddyb) this is a method here because of the bounds `eval` needs, which + // are thankfully on the whole `impl`, so `Self::eval` is callable. + fn bruteforce_tiny(cli_args: &Args) -> Result<(), NonZeroUsize> { + // Here "tiny" is "8-bit" - 16-bit floats could maybe also be bruteforced, + // but the cost increases exponentially, so less useful relative to fuzzing. + if F::BIT_WIDTH > 8 { + return Ok(()); + } + + // HACK(eddyb) avoid reporting panics while iterating. + std::panic::set_hook(Box::new(|_| {})); + + let all_ops = (0..) + .map(FuzzOp::from_tag) + .take_while(|op| op.is_some()) + .map(|op| op.unwrap()); + + let op_to_exhaustive_cases = |op: FuzzOp<()>| { + let mut total_bit_width = 0; + op.map(|()| total_bit_width += F::BIT_WIDTH); + (0..usize::checked_shl(1, total_bit_width as u32).unwrap()).map(move |i| -> Self { + let mut combined_input_bits = i; + let op_with_inputs = op.map(|()| { + let x = combined_input_bits & ((1 << F::BIT_WIDTH) - 1); + combined_input_bits >>= F::BIT_WIDTH; + F::from_bits_u128(x.try_into().unwrap()) + }); + assert_eq!(combined_input_bits, 0); + op_with_inputs + }) + }; + + let num_total_cases = all_ops + .clone() + .map(|op| op_to_exhaustive_cases(op).len()) + .try_fold(0, usize::checked_add) + .unwrap(); + + let float_name = F::short_lowercase_name(); + println!("Exhaustively checking all {num_total_cases} cases for {float_name}:",); + + const NUM_DOTS: usize = 80; + let cases_per_dot = num_total_cases / NUM_DOTS; + let mut cases_in_this_dot = 0; + let mut mismatches_in_this_dot = false; + let mut num_mismatches = 0; + let mut select_mismatches = vec![]; + let mut all_panics = vec![]; + for op in all_ops { + let mut first_mismatch = None; + for op_with_inputs in op_to_exhaustive_cases(op) { + cases_in_this_dot += 1; + if cases_in_this_dot >= cases_per_dot { + cases_in_this_dot -= cases_per_dot; + if mismatches_in_this_dot { + mismatches_in_this_dot = false; + print!("X"); + } else { + print!(".") + } + // HACK(eddyb) get around `stdout` line buffering. + std::io::stdout().flush().unwrap(); + } + + // HACK(eddyb) there are still panics we need to account for, + // e.g. https://github.com/llvm/llvm-project/issues/63895, and + // even if the Rust code didn't panic, LLVM asserts would trip. + match std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| { + op_with_inputs.eval(cli_args) + })) { + Ok(out) => { + if !out.all_match() { + num_mismatches += 1; + mismatches_in_this_dot = true; + if first_mismatch.is_none() { + first_mismatch = Some(op_with_inputs); + } + } + } + Err(_) => { + mismatches_in_this_dot = true; + all_panics.push(op_with_inputs); + } + } + } + select_mismatches.extend(first_mismatch); + } + println!(); + + // HACK(eddyb) undo what we did at the start of this function. + let _ = std::panic::take_hook(); + + if num_mismatches > 0 { + assert!(!select_mismatches.is_empty()); + println!(); + println!( + "!!! found {num_mismatches} ({:.1}%) mismatches for {float_name}, showing {} of them:", + (num_mismatches as f64) / (num_total_cases as f64) * 100.0, + select_mismatches.len(), + ); + for mismatch in select_mismatches { + mismatch.print_op_and_eval_outputs(cli_args); + } + println!(); + } else { + assert!(select_mismatches.is_empty()); + } + + if !all_panics.is_empty() { + // HACK(eddyb) there is a good chance C++ will also fail, so avoid + // triggering the (more fatal) C++ assertion failure. + let cli_args_plus_ignore_cxx = Args { + ignore_cxx: true, + ..cli_args.clone() + }; + + println!( + "!!! found {} panics for {float_name}, showing them (without trying C++):", + all_panics.len() + ); + for &panicking_case in &all_panics { + panicking_case.print_op_and_eval_outputs(&cli_args_plus_ignore_cxx); + } + println!(); + } + + if num_mismatches == 0 && all_panics.is_empty() { + println!("all {num_total_cases} cases match"); + println!(); + } + + NonZeroUsize::new(num_mismatches + all_panics.len()).map_or(Ok(()), Err) + } } fn main() { @@ -380,29 +624,45 @@ fn main() { data.split_first() .ok_or("empty file") - .and_then(|(&bit_width, data)| { - dispatch_all_reprs!(F => if bit_width as usize == F::BIT_WIDTH { - FuzzOp::::try_decode(data) - .ok() - .ok_or(std::any::type_name::>())? - .print_op_and_eval_outputs(&cli_args); - return Ok(()); + .and_then(|(&repr_tag, data)| { + dispatch_any_float_repr_by_repr_tag!(match repr_tag { + for => return Ok( + FuzzOp::::try_decode(data) + .ok() + .ok_or(std::any::type_name::>())? + .print_op_and_eval_outputs(&cli_args) + ) }); - Err("first byte not valid bit width") + Err("first byte not valid `FloatRepr::REPR_TAG`") }) .unwrap_or_else(|e| println!(" invalid data ({e})")); } } + Commands::BruteforceTiny => { + let mut any_mismatches = false; + for repr_tag in 0..=u8::MAX { + dispatch_any_float_repr_by_repr_tag!(match repr_tag { + for => { + any_mismatches |= FuzzOp::::bruteforce_tiny(&cli_args).is_err(); + } + }); + } + if any_mismatches { + // FIXME(eddyb) use `fn main() -> ExitStatus`. + std::process::exit(1); + } + } } return; } #[cfg_attr(not(fuzzing), allow(unused))] let fuzz_one_op = |data: &[u8]| { - data.split_first().and_then(|(&bit_width, data)| { - dispatch_all_reprs!(F => if bit_width as usize == F::BIT_WIDTH { - FuzzOp::::try_decode(data).ok()?.eval(&cli_args).assert_all_match(); - return Some(()); + data.split_first().and_then(|(&repr_tag, data)| { + dispatch_any_float_repr_by_repr_tag!(match repr_tag { + for => return Some( + assert!(FuzzOp::::try_decode(data).ok()?.eval(&cli_args).all_match()) + ) }); None }); diff --git a/src/ieee.rs b/src/ieee.rs index 54ca219..d6719f5 100644 --- a/src/ieee.rs +++ b/src/ieee.rs @@ -17,10 +17,20 @@ pub struct IeeeFloat { exp: ExpInt, /// What kind of floating point number this is. - category: Category, + // + // HACK(eddyb) because mutating this without accounting for `exp`/`sig` + // can break some subtle edge cases, it should be only read through the + // `.category()` method, and only set during initialization, either for + // one of the special value constants, or for conversion from bits. + read_only_category_do_not_mutate: Category, /// Sign bit of the number. - sign: bool, + // + // HACK(eddyb) because mutating this without accounting for `category` + // can break some subtle edge cases, it should be only read through the + // `.is_negative()` method, and only set through negation (which can be + // more easily used through e.g. `copy_sign`/`negate_if`/`with_sign`). + read_only_sign_do_not_mutate: bool, marker: PhantomData, } @@ -62,62 +72,155 @@ enum Loss { MoreThanHalf, // 1xxxxx x's not all zero } +/// How the nonfinite values Inf and NaN are represented. +#[derive(Copy, Clone, PartialEq, Eq)] +pub enum NonfiniteBehavior { + /// Represents standard IEEE 754 behavior. A value is nonfinite if the + /// exponent field is all 1s. In such cases, a value is Inf if the + /// significand bits are all zero, and NaN otherwise + IEEE754, + + /// Only the Float8E5M2 has this behavior. There is no Inf representation. A + /// value is NaN if the exponent field and the mantissa field are all 1s. + /// This behavior matches the FP8 E4M3 type described in + /// https://arxiv.org/abs/2209.05433. We treat both signed and unsigned NaNs + /// as non-signalling, although the paper does not state whether the NaN + /// values are signalling or not. + NanOnly, +} + +// HACK(eddyb) extension method flipping/changing the sign based on `bool`s. +trait NegExt: Neg + Sized { + fn negate_if(self, negate: bool) -> Self { + if negate { + -self + } else { + self + } + } + + fn with_sign(self, sign: bool) -> Self + where + Self: Float, + { + self.negate_if(self.is_negative() != sign) + } +} +impl> NegExt for T {} + /// Represents floating point arithmetic semantics. pub trait Semantics: Sized { - /// Total number of bits in the in-memory format. + /// Total number of bits in the interchange format. const BITS: usize; + /// Number of exponent bits in the interchange format. + const EXP_BITS: usize; + /// Number of bits in the significand. This includes the integer bit. - const PRECISION: usize; + const PRECISION: usize = (Self::BITS - 1 - Self::EXP_BITS) + 1; + + /// How the nonfinite values Inf and NaN are represented. + const NONFINITE_BEHAVIOR: NonfiniteBehavior = NonfiniteBehavior::IEEE754; /// The largest E such that 2^E is representable; this matches the /// definition of IEEE 754. - const MAX_EXP: ExpInt; + const MAX_EXP: ExpInt = { + let ieee_inf_and_nan_replaced_with_extra_normals = match Self::NONFINITE_BEHAVIOR { + NonfiniteBehavior::IEEE754 => false, + NonfiniteBehavior::NanOnly => true, + }; + Self::IEEE_MAX_EXP + + (Self::MIN_EXP - Self::IEEE_MIN_EXP) + + (ieee_inf_and_nan_replaced_with_extra_normals as ExpInt) + }; + const IEEE_MAX_EXP: ExpInt = -Self::IEEE_MIN_EXP + 1; /// The smallest E such that 2^E is a normalized number; this /// matches the definition of IEEE 754. - const MIN_EXP: ExpInt = -Self::MAX_EXP + 1; + const MIN_EXP: ExpInt = Self::IEEE_MIN_EXP; + const IEEE_MIN_EXP: ExpInt = -(1 << (Self::EXP_BITS - 1)) + 2; + + /// The base significand bitpattern of NaNs, i.e. the bits that must always + /// be set in all NaNs, with other significand bits being either used for + /// payload bits (if `NAN_PAYLOAD_MASK` covers them) or always unset. + const NAN_SIGNIFICAND_BASE: Limb = match Self::NONFINITE_BEHAVIOR { + NonfiniteBehavior::IEEE754 => 0, + NonfiniteBehavior::NanOnly => (1 << (Self::PRECISION - 1)) - 1, + }; - /// The significand bit that marks NaN as quiet. - const QNAN_BIT: usize = Self::PRECISION - 2; + /// The significand bitmask for the payload of a NaN (if supported), + /// including the "quiet bit" (telling QNaNs apart from SNaNs). + const NAN_PAYLOAD_MASK: Limb = match Self::NONFINITE_BEHAVIOR { + NonfiniteBehavior::IEEE754 => (1 << (Self::PRECISION - 1)) - 1, + NonfiniteBehavior::NanOnly => 0, + }; - /// The significand bitpattern to mark a NaN as quiet. - /// NOTE: for X87DoubleExtended we need to set two bits instead of 1. - const QNAN_SIGNIFICAND: Limb = 1 << Self::QNAN_BIT; + /// The significand bitpattern to mark a NaN as quiet (if supported). + /// + /// NOTE: for X87DoubleExtended we need to set two bits instead of one. + /// + /// NOTE: all NaNs are quiet if unsupported (see `NonfiniteBehavior::NanOnly`). + const QNAN_SIGNIFICAND: Limb = match Self::NONFINITE_BEHAVIOR { + NonfiniteBehavior::IEEE754 => 1 << (Self::PRECISION - 2), + NonfiniteBehavior::NanOnly => 0, + }; fn from_bits(bits: u128) -> IeeeFloat { assert!(Self::BITS > Self::PRECISION); let sign = bits & (1 << (Self::BITS - 1)); - let exponent = (bits & !sign) >> (Self::PRECISION - 1); + let exponent = ((bits & !sign) >> (Self::BITS - 1 - Self::EXP_BITS)) & ((1 << Self::EXP_BITS) - 1); let mut r = IeeeFloat { sig: [bits & ((1 << (Self::PRECISION - 1)) - 1)], // Convert the exponent from its bias representation to a signed integer. - exp: (exponent as ExpInt) - Self::MAX_EXP, - category: Category::Zero, - sign: sign != 0, + exp: (exponent as ExpInt) + (Self::MIN_EXP - 1), + read_only_category_do_not_mutate: Category::Zero, + read_only_sign_do_not_mutate: sign != 0, marker: PhantomData, }; - if r.exp == Self::MIN_EXP - 1 && r.sig == [0] { - // Exponent, significand meaningless. - r.category = Category::Zero; - } else if r.exp == Self::MAX_EXP + 1 && r.sig == [0] { - // Exponent, significand meaningless. - r.category = Category::Infinity; - } else if r.exp == Self::MAX_EXP + 1 && r.sig != [0] { - // Sign, exponent, significand meaningless. - r.category = Category::NaN; - } else { - r.category = Category::Normal; - if r.exp == Self::MIN_EXP - 1 { - // Denormal. - r.exp = Self::MIN_EXP; + // NOTE(eddyb) unlike the original C++ code, this doesn't check for + // specific exponent/significand combinations, but instead relies on + // being able to construct known-good special values to compare to. + let try_category_from_special = |mut special: IeeeFloat| { + special = special.copy_sign(r); + + // Ignore NaN payload to avoid needing a separate NaN check. + let sig_mask = if special.is_nan() { !Self::NAN_PAYLOAD_MASK } else { !0 }; + + if special.is_negative() == r.is_negative() + && special.exp == r.exp + && special.sig[0] & sig_mask == r.sig[0] & sig_mask + { + Some(special.category()) } else { - // Set integer bit. - sig::set_bit(&mut r.sig, Self::PRECISION - 1); + None } - } + }; + + // NOTE(eddyb) the order here matters, i.e. `NAN` needs to be last, as + // its relaxed check (see above) overlaps `INFINITY`, for IEEE NaNs. + let specials = [ + IeeeFloat::::ZERO, + IeeeFloat::::INFINITY, + IeeeFloat::::NAN, + ]; + + let category = specials + .into_iter() + .find_map(try_category_from_special) + .unwrap_or_else(|| { + if r.exp == Self::MIN_EXP - 1 { + // Denormal. + r.exp = Self::MIN_EXP; + } else { + // Set integer bit. + sig::set_bit(&mut r.sig, Self::PRECISION - 1); + } + Category::Normal + }); + + r.read_only_category_do_not_mutate = category; r } @@ -128,32 +231,39 @@ pub trait Semantics: Sized { // Split integer bit from significand. let integer_bit = sig::get_bit(&x.sig, Self::PRECISION - 1); let mut significand = x.sig[0] & ((1 << (Self::PRECISION - 1)) - 1); - let exponent = match x.category { + let mut exponent = x.exp; + match x.category() { Category::Normal => { - if x.exp == Self::MIN_EXP && !integer_bit { + if exponent == Self::MIN_EXP && !integer_bit { // Denormal. - Self::MIN_EXP - 1 - } else { - x.exp + exponent -= 1; } } Category::Zero => { // FIXME(eddyb) Maybe we should guarantee an invariant instead? - significand = 0; - Self::MIN_EXP - 1 + IeeeFloat:: { + sig: [significand], + exp: exponent, + .. + } = Float::ZERO; } Category::Infinity => { // FIXME(eddyb) Maybe we should guarantee an invariant instead? - significand = 0; - Self::MAX_EXP + 1 + IeeeFloat:: { + sig: [significand], + exp: exponent, + .. + } = Float::INFINITY; } - Category::NaN => Self::MAX_EXP + 1, - }; + Category::NaN => { + IeeeFloat:: { exp: exponent, .. } = Float::NAN; + } + } // Convert the exponent from a signed integer to its bias representation. - let exponent = (exponent + Self::MAX_EXP) as u128; + let exponent = (exponent - (Self::MIN_EXP - 1)) as u128; - ((x.sign as u128) << (Self::BITS - 1)) | (exponent << (Self::PRECISION - 1)) | significand + ((x.is_negative() as u128) << (Self::BITS - 1)) | (exponent << (Self::PRECISION - 1)) | significand } } @@ -165,13 +275,14 @@ impl Clone for IeeeFloat { } macro_rules! ieee_semantics { - ($($name:ident = $sem:ident($bits:tt : $exp_bits:tt)),*) => { + ($($name:ident = $sem:ident($bits:tt : $exp_bits:tt) $({ $($extra:tt)* })?),* $(,)?) => { $(pub struct $sem;)* $(pub type $name = IeeeFloat<$sem>;)* $(impl Semantics for $sem { const BITS: usize = $bits; - const PRECISION: usize = ($bits - 1 - $exp_bits) + 1; - const MAX_EXP: ExpInt = (1 << ($exp_bits - 1)) - 1; + const EXP_BITS: usize = $exp_bits; + + $($($extra)*)? })* } } @@ -180,56 +291,84 @@ ieee_semantics! { Half = HalfS(16:5), Single = SingleS(32:8), Double = DoubleS(64:11), - Quad = QuadS(128:15) + Quad = QuadS(128:15), + + // Non-standard IEEE-like semantics: + + // FIXME(eddyb) document this as "Brain Float 16" (C++ didn't have docs). + BFloat = BFloatS(16:8), + + // 8-bit floating point number following IEEE-754 conventions with bit + // layout S1E5M2 as described in https://arxiv.org/abs/2209.05433. + Float8E5M2 = Float8E5M2S(8:5), + + // 8-bit floating point number mostly following IEEE-754 conventions with + // bit layout S1E4M3 as described in https://arxiv.org/abs/2209.05433. + // Unlike IEEE-754 types, there are no infinity values, and NaN is + // represented with the exponent and mantissa bits set to all 1s. + Float8E4M3FN = Float8E4M3FNS(8:4) { + const NONFINITE_BEHAVIOR: NonfiniteBehavior = NonfiniteBehavior::NanOnly; + }, } +// FIXME(eddyb) consider moving X87-specific logic to a "has explicit integer bit" +// associated `const` on `Semantics` itself. pub struct X87DoubleExtendedS; pub type X87DoubleExtended = IeeeFloat; impl Semantics for X87DoubleExtendedS { const BITS: usize = 80; + const EXP_BITS: usize = 15; + + // HACK(eddyb) overwriting `EXP_BITS` because its default is incorrect. + // FIMXE(eddyb) get rid of this by allowing `Semantics` to generically have + // the concept of "explicitly included integer bit", which is the main way + // in which the 80-bit X87 format differs from standard IEEE encodings. const PRECISION: usize = 64; - const MAX_EXP: ExpInt = (1 << (15 - 1)) - 1; /// For x87 extended precision, we want to make a NaN, not a /// pseudo-NaN. Maybe we should expose the ability to make /// pseudo-NaNs? - const QNAN_SIGNIFICAND: Limb = 0b11 << Self::QNAN_BIT; + const QNAN_SIGNIFICAND: Limb = 0b11 << (Self::PRECISION - 2); /// Integer bit is explicit in this format. Intel hardware (387 and later) /// does not support these bit patterns: /// exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity") /// exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN") - /// exponent = 0, integer bit 1 ("pseudodenormal") /// exponent!=0 nor all 1's, integer bit 0 ("unnormal") - /// At the moment, the first two are treated as NaNs, the second two as Normal. + /// exponent = 0, integer bit 1 ("pseudodenormal") + /// At the moment, the first three are treated as NaNs, the last one as Normal. fn from_bits(bits: u128) -> IeeeFloat { let sign = bits & (1 << (Self::BITS - 1)); - let exponent = (bits & !sign) >> Self::PRECISION; + let exponent = ((bits & !sign) >> (Self::BITS - 1 - Self::EXP_BITS)) & ((1 << Self::EXP_BITS) - 1); let mut r = IeeeFloat { sig: [bits & ((1 << Self::PRECISION) - 1)], // Convert the exponent from its bias representation to a signed integer. - exp: (exponent as ExpInt) - Self::MAX_EXP, - category: Category::Zero, - sign: sign != 0, + exp: (exponent as ExpInt) + (Self::MIN_EXP - 1), + read_only_category_do_not_mutate: Category::Zero, + read_only_sign_do_not_mutate: sign != 0, marker: PhantomData, }; - if r.exp == Self::MIN_EXP - 1 && r.sig == [0] { - // Exponent, significand meaningless. - r.category = Category::Zero; + let integer_bit = r.sig[0] >> (Self::PRECISION - 1); + + let category = if r.exp == Self::MIN_EXP - 1 && r.sig == [0] { + Category::Zero } else if r.exp == Self::MAX_EXP + 1 && r.sig == [1 << (Self::PRECISION - 1)] { - // Exponent, significand meaningless. - r.category = Category::Infinity; - } else if r.exp == Self::MAX_EXP + 1 && r.sig != [1 << (Self::PRECISION - 1)] { - // Sign, exponent, significand meaningless. - r.category = Category::NaN; + Category::Infinity + } else if r.exp == Self::MAX_EXP + 1 && r.sig != [1 << (Self::PRECISION - 1)] + || r.exp != Self::MAX_EXP + 1 && r.exp != Self::MIN_EXP - 1 && integer_bit == 0 + { + r.exp = Self::MAX_EXP + 1; + Category::NaN } else { - r.category = Category::Normal; if r.exp == Self::MIN_EXP - 1 { // Denormal. r.exp = Self::MIN_EXP; } - } + Category::Normal + }; + + r.read_only_category_do_not_mutate = category; r } @@ -238,7 +377,7 @@ impl Semantics for X87DoubleExtendedS { // Get integer bit from significand. let integer_bit = sig::get_bit(&x.sig, Self::PRECISION - 1); let mut significand = x.sig[0] & ((1 << Self::PRECISION) - 1); - let exponent = match x.category { + let exponent = match x.category() { Category::Normal => { if x.exp == Self::MIN_EXP && !integer_bit { // Denormal. @@ -261,9 +400,9 @@ impl Semantics for X87DoubleExtendedS { }; // Convert the exponent from a signed integer to its bias representation. - let exponent = (exponent + Self::MAX_EXP) as u128; + let exponent = (exponent - (Self::MIN_EXP - 1)) as u128; - ((x.sign as u128) << (Self::BITS - 1)) | (exponent << Self::PRECISION) | significand + ((x.is_negative() as u128) << (Self::BITS - 1)) | (exponent << Self::PRECISION) | significand } } @@ -277,24 +416,28 @@ impl PartialEq for IeeeFloat { impl PartialOrd for IeeeFloat { fn partial_cmp(&self, rhs: &Self) -> Option { - match (self.category, rhs.category) { + match (self.category(), rhs.category()) { (Category::NaN, _) | (_, Category::NaN) => None, - (Category::Infinity, Category::Infinity) => Some((!self.sign).cmp(&(!rhs.sign))), + (Category::Infinity, Category::Infinity) => Some((!self.is_negative()).cmp(&(!rhs.is_negative()))), (Category::Zero, Category::Zero) => Some(Ordering::Equal), - (Category::Infinity, _) | (Category::Normal, Category::Zero) => Some((!self.sign).cmp(&self.sign)), + (Category::Infinity, _) | (Category::Normal, Category::Zero) => { + Some((!self.is_negative()).cmp(&self.is_negative())) + } - (_, Category::Infinity) | (Category::Zero, Category::Normal) => Some(rhs.sign.cmp(&(!rhs.sign))), + (_, Category::Infinity) | (Category::Zero, Category::Normal) => { + Some(rhs.is_negative().cmp(&(!rhs.is_negative()))) + } (Category::Normal, Category::Normal) => { // Two normal numbers. Do they have the same sign? - Some((!self.sign).cmp(&(!rhs.sign)).then_with(|| { + Some((!self.is_negative()).cmp(&(!rhs.is_negative())).then_with(|| { // Compare absolute values; invert result if negative. let result = self.cmp_abs_normal(*rhs); - if self.sign { + if self.is_negative() { result.reverse() } else { result @@ -305,10 +448,10 @@ impl PartialOrd for IeeeFloat { } } -impl Neg for IeeeFloat { +impl Neg for IeeeFloat { type Output = Self; fn neg(mut self) -> Self { - self.sign = !self.sign; + self.read_only_sign_do_not_mutate = !self.is_negative(); self } } @@ -344,9 +487,9 @@ impl fmt::Display for IeeeFloat { let width = f.width().unwrap_or(3); let alternate = f.alternate(); - match self.category { + match self.category() { Category::Infinity => { - if self.sign { + if self.is_negative() { return f.write_str("-Inf"); } else { return f.write_str("+Inf"); @@ -356,7 +499,7 @@ impl fmt::Display for IeeeFloat { Category::NaN => return f.write_str("NaN"), Category::Zero => { - if self.sign { + if self.is_negative() { f.write_char('-')?; } @@ -381,7 +524,7 @@ impl fmt::Display for IeeeFloat { Category::Normal => {} } - if self.sign { + if self.is_negative() { f.write_char('-')?; } @@ -629,14 +772,100 @@ impl fmt::Debug for IeeeFloat { f, "{}({:?} | {}{:?} * 2^{})", self, - self.category, - if self.sign { "-" } else { "+" }, + self.category(), + if self.is_negative() { "-" } else { "+" }, self.sig, self.exp ) } } +// HACK(eddyb) this logic is duplicated throughout the original C++ code, +// but it's a bit too long to keep repeating in the Rust port for all ops. +// FIXME(eddyb) find a better name/organization for all of this functionality +// (`IeeeDefaultExceptionHandling` doesn't have a counterpart in the C++ code). +struct IeeeDefaultExceptionHandling; +impl IeeeDefaultExceptionHandling { + fn result_from_nan(mut r: IeeeFloat) -> StatusAnd> { + assert!(r.is_nan()); + + let status = if r.is_signaling() { + // [IEEE Std 754-2008 6.2]: + // Under default exception handling, any operation signaling an invalid + // operation exception and for which a floating-point result is to be + // delivered shall deliver a quiet NaN. + let [sig] = &mut r.sig; + *sig |= if S::QNAN_SIGNIFICAND == X87DoubleExtendedS::QNAN_SIGNIFICAND { + // HACK(eddyb) remain bug-compatible with the original C++ code + // which doesn't appear to attempt avoiding creating pseudo-NaNs + // (see https://github.com/llvm/llvm-project/issues/63938). + S::QNAN_SIGNIFICAND & S::NAN_PAYLOAD_MASK + } else { + S::QNAN_SIGNIFICAND + }; + + // [IEEE Std 754-2008 6.2]: + // Signaling NaNs shall be reserved operands that, under default exception + // handling, signal the invalid operation exception(see 7.2) for every + // general-computational and signaling-computational operation except for + // the conversions described in 5.12. + Status::INVALID_OP + } else { + // [IEEE Std 754-2008 6.2]: + // For an operation with quiet NaN inputs, other than maximum and minimum + // operations, if a floating-point result is to be delivered the result + // shall be a quiet NaN which should be one of the input NaNs. + // ... + // Every general-computational and quiet-computational operation involving + // one or more input NaNs, none of them signaling, shall signal no + // exception, except fusedMultiplyAdd might signal the invalid operation + // exception(see 7.2). + Status::OK + }; + status.and(r) + } + + fn binop_result_from_either_nan(a: IeeeFloat, b: IeeeFloat) -> StatusAnd> { + let r = match (a.category(), b.category()) { + (Category::NaN, _) => a, + (_, Category::NaN) => b, + _ => unreachable!(), + }; + let mut status_and_r = Self::result_from_nan(r); + if b.is_signaling() { + status_and_r.status |= Status::INVALID_OP; + } + status_and_r + } +} + +impl IeeeFloat { + // HACK(eddyb) allow `Self::qnan` to be used from `IeeeFloat::NAN`. + // FIXME(eddyb) move back to the trait impl when that can be `const fn`. + const fn qnan(payload: Option) -> Self { + let sig = [S::NAN_SIGNIFICAND_BASE + | S::QNAN_SIGNIFICAND + | match payload { + // Zero out the excess bits of the significand. + Some(payload) => payload & S::NAN_PAYLOAD_MASK, + None => 0, + }]; + + let exp = match S::NONFINITE_BEHAVIOR { + NonfiniteBehavior::IEEE754 => S::MAX_EXP + 1, + NonfiniteBehavior::NanOnly => S::MAX_EXP, + }; + + IeeeFloat { + sig, + exp, + read_only_category_do_not_mutate: Category::NaN, + read_only_sign_do_not_mutate: false, + marker: PhantomData, + } + } +} + impl Float for IeeeFloat { const BITS: usize = S::BITS; const PRECISION: usize = S::PRECISION; @@ -646,53 +875,45 @@ impl Float for IeeeFloat { const ZERO: Self = IeeeFloat { sig: [0], exp: S::MIN_EXP - 1, - category: Category::Zero, - sign: false, + read_only_category_do_not_mutate: Category::Zero, + read_only_sign_do_not_mutate: false, marker: PhantomData, }; - const INFINITY: Self = IeeeFloat { - sig: [0], - exp: S::MAX_EXP + 1, - category: Category::Infinity, - sign: false, - marker: PhantomData, - }; + const INFINITY: Self = match S::NONFINITE_BEHAVIOR { + NonfiniteBehavior::IEEE754 => IeeeFloat { + sig: [0], + exp: S::MAX_EXP + 1, + read_only_category_do_not_mutate: Category::Infinity, + read_only_sign_do_not_mutate: false, + marker: PhantomData, + }, - // FIXME(eddyb) remove when qnan becomes const fn. - const NAN: Self = IeeeFloat { - sig: [S::QNAN_SIGNIFICAND], - exp: S::MAX_EXP + 1, - category: Category::NaN, - sign: false, - marker: PhantomData, + // There is no Inf, so make NaN instead. + NonfiniteBehavior::NanOnly => Self::NAN, }; + const NAN: Self = Self::qnan(None); + fn qnan(payload: Option) -> Self { - IeeeFloat { - sig: [S::QNAN_SIGNIFICAND - | payload.map_or(0, |payload| { - // Zero out the excess bits of the significand. - payload & ((1 << S::QNAN_BIT) - 1) - })], - exp: S::MAX_EXP + 1, - category: Category::NaN, - sign: false, - marker: PhantomData, - } + // NOTE(eddyb) this is not a recursive self-call, but rather it calls + // the `const fn` inherent mehtod (see above). + Self::qnan(payload) } fn snan(payload: Option) -> Self { let mut snan = Self::qnan(payload); + let [sig] = &mut snan.sig; + // We always have to clear the QNaN bit to make it an SNaN. - sig::clear_bit(&mut snan.sig, S::QNAN_BIT); + *sig &= !(S::QNAN_SIGNIFICAND & S::NAN_PAYLOAD_MASK); // If there are no bits set in the payload, we have to set // *something* to make it a NaN instead of an infinity; // conventionally, this is the next bit down from the QNaN bit. - if snan.sig[0] & !S::QNAN_SIGNIFICAND == 0 { - sig::set_bit(&mut snan.sig, S::QNAN_BIT - 1); + if *sig & S::NAN_PAYLOAD_MASK == 0 { + *sig |= (S::QNAN_SIGNIFICAND & S::NAN_PAYLOAD_MASK) >> 1; } snan @@ -703,10 +924,20 @@ impl Float for IeeeFloat { // exponent = 1..10 // significand = 1..1 IeeeFloat { - sig: [!0 & ((1 << S::PRECISION) - 1)], + sig: [((1 << S::PRECISION) - 1) + & match S::NONFINITE_BEHAVIOR { + // The largest number by magnitude in our format will be the floating point + // number with maximum exponent and with significand that is all ones. + NonfiniteBehavior::IEEE754 => !0, + + // The largest number by magnitude in our format will be the floating point + // number with maximum exponent and with significand that is all ones except + // the LSB. + NonfiniteBehavior::NanOnly => !1, + }], exp: S::MAX_EXP, - category: Category::Normal, - sign: false, + read_only_category_do_not_mutate: Category::Normal, + read_only_sign_do_not_mutate: false, marker: PhantomData, } } @@ -717,8 +948,8 @@ impl Float for IeeeFloat { const SMALLEST: Self = IeeeFloat { sig: [1], exp: S::MIN_EXP, - category: Category::Normal, - sign: false, + read_only_category_do_not_mutate: Category::Normal, + read_only_sign_do_not_mutate: false, marker: PhantomData, }; @@ -729,18 +960,22 @@ impl Float for IeeeFloat { IeeeFloat { sig: [1 << (S::PRECISION - 1)], exp: S::MIN_EXP, - category: Category::Normal, - sign: false, + read_only_category_do_not_mutate: Category::Normal, + read_only_sign_do_not_mutate: false, marker: PhantomData, } } fn add_r(mut self, rhs: Self, round: Round) -> StatusAnd { - let status = match (self.category, rhs.category) { + let status = match (self.category(), rhs.category()) { + (Category::NaN, _) | (_, Category::NaN) => { + return IeeeDefaultExceptionHandling::binop_result_from_either_nan(self, rhs); + } + (Category::Infinity, Category::Infinity) => { // Differently signed infinities can only be validly // subtracted. - if self.sign != rhs.sign { + if self.is_negative() != rhs.is_negative() { self = Self::NAN; Status::INVALID_OP } else { @@ -749,22 +984,30 @@ impl Float for IeeeFloat { } // Sign may depend on rounding mode; handled below. - (_, Category::Zero) | (Category::NaN, _) | (Category::Infinity, Category::Normal) => Status::OK, + (_, Category::Zero) | (Category::Infinity, Category::Normal) => Status::OK, - (Category::Zero, _) | (_, Category::NaN) | (_, Category::Infinity) => { + (Category::Zero, _) | (_, Category::Infinity) => { self = rhs; Status::OK } - // This return code means it was not a simple case. (Category::Normal, Category::Normal) => { - let loss = - sig::add_or_sub(&mut self.sig, &mut self.exp, &mut self.sign, &mut [rhs.sig[0]], rhs.exp, rhs.sign); + let mut sign = self.is_negative(); + let loss = sig::add_or_sub( + &mut self.sig, + &mut self.exp, + &mut sign, + &mut [rhs.sig[0]], + rhs.exp, + rhs.is_negative(), + ); + self = self.with_sign(sign); + let status; self = unpack!(status=, self.normalize(round, loss)); // Can only be zero if we lost no fraction. - assert!(self.category != Category::Zero || loss == Loss::ExactlyZero); + assert!(self.category() != Category::Zero || loss == Loss::ExactlyZero); status } @@ -773,42 +1016,44 @@ impl Float for IeeeFloat { // If two numbers add (exactly) to zero, IEEE 754 decrees it is a // positive zero unless rounding to minus infinity, except that // adding two like-signed zeroes gives that zero. - if self.category == Category::Zero && (rhs.category != Category::Zero || self.sign != rhs.sign) { - self.sign = round == Round::TowardNegative; + if self.category() == Category::Zero + && (rhs.category() != Category::Zero || self.is_negative() != rhs.is_negative()) + { + self = self.with_sign(round == Round::TowardNegative); } status.and(self) } + // NOTE(eddyb) we can't rely on the `sub_r` method default implementation + // because NaN handling needs the original `rhs` (i.e. without negation). + fn sub_r(self, rhs: Self, round: Round) -> StatusAnd { + match (self.category(), rhs.category()) { + (Category::NaN, _) | (_, Category::NaN) => { + IeeeDefaultExceptionHandling::binop_result_from_either_nan(self, rhs) + } + + _ => self.add_r(-rhs, round), + } + } + fn mul_r(mut self, rhs: Self, round: Round) -> StatusAnd { - self.sign ^= rhs.sign; + self = self.negate_if(rhs.is_negative()); - match (self.category, rhs.category) { - (Category::NaN, _) => { - self.sign = false; - Status::OK.and(self) - } + match (self.category(), rhs.category()) { + (Category::NaN, _) | (_, Category::NaN) => { + self = self.negate_if(rhs.is_negative()); // restore the original sign - (_, Category::NaN) => { - self.sign = false; - self.category = Category::NaN; - self.sig = rhs.sig; - Status::OK.and(self) + IeeeDefaultExceptionHandling::binop_result_from_either_nan(self, rhs) } (Category::Zero, Category::Infinity) | (Category::Infinity, Category::Zero) => { Status::INVALID_OP.and(Self::NAN) } - (_, Category::Infinity) | (Category::Infinity, _) => { - self.category = Category::Infinity; - Status::OK.and(self) - } + (Category::Infinity, _) | (_, Category::Infinity) => Status::OK.and(Self::INFINITY.copy_sign(self)), - (Category::Zero, _) | (_, Category::Zero) => { - self.category = Category::Zero; - Status::OK.and(self) - } + (Category::Zero, _) | (_, Category::Zero) => Status::OK.and(Self::ZERO.copy_sign(self)), (Category::Normal, Category::Normal) => { self.exp += rhs.exp; @@ -857,7 +1102,7 @@ impl Float for IeeeFloat { } // Post-multiplication sign, before addition. - self.sign ^= multiplicand.sign; + self = self.negate_if(multiplicand.is_negative()); // Allocate space for twice as many bits as the original significand, plus one // extra bit for the addition to overflow into. @@ -896,14 +1141,17 @@ impl Float for IeeeFloat { // that the high bit of the significand is zero (same as wide_sig), // so the addition will overflow (if it does overflow at all) into the top bit. sig::shift_left(&mut ext_addend_sig, &mut 0, ext_precision - 1 - S::PRECISION); + + let mut sign = self.is_negative(); loss = sig::add_or_sub( &mut wide_sig, &mut self.exp, - &mut self.sign, + &mut sign, &mut ext_addend_sig, addend.exp + 1, - addend.sign, + addend.is_negative(), ); + self = self.with_sign(sign); omsb = sig::omsb(&wide_sig); } @@ -933,44 +1181,35 @@ impl Float for IeeeFloat { // If two numbers add (exactly) to zero, IEEE 754 decrees it is a // positive zero unless rounding to minus infinity, except that // adding two like-signed zeroes gives that zero. - if self.category == Category::Zero && !status.intersects(Status::UNDERFLOW) && self.sign != addend.sign { - self.sign = round == Round::TowardNegative; + if self.category() == Category::Zero + && !status.intersects(Status::UNDERFLOW) + && self.is_negative() != addend.is_negative() + { + self = self.with_sign(round == Round::TowardNegative); } status.and(self) } fn div_r(mut self, rhs: Self, round: Round) -> StatusAnd { - self.sign ^= rhs.sign; + self = self.negate_if(rhs.is_negative()); - match (self.category, rhs.category) { - (Category::NaN, _) => { - self.sign = false; - Status::OK.and(self) - } + match (self.category(), rhs.category()) { + (Category::NaN, _) | (_, Category::NaN) => { + self = self.negate_if(rhs.is_negative()); // restore the original sign - (_, Category::NaN) => { - self.category = Category::NaN; - self.sig = rhs.sig; - self.sign = false; - Status::OK.and(self) + IeeeDefaultExceptionHandling::binop_result_from_either_nan(self, rhs) } (Category::Infinity, Category::Infinity) | (Category::Zero, Category::Zero) => { Status::INVALID_OP.and(Self::NAN) } - (Category::Infinity, _) | (Category::Zero, _) => Status::OK.and(self), + (Category::Infinity | Category::Zero, _) => Status::OK.and(self), - (Category::Normal, Category::Infinity) => { - self.category = Category::Zero; - Status::OK.and(self) - } + (_, Category::Infinity) => Status::OK.and(Self::ZERO.copy_sign(self)), - (Category::Normal, Category::Zero) => { - self.category = Category::Infinity; - Status::DIV_BY_ZERO.and(self) - } + (_, Category::Zero) => Status::DIV_BY_ZERO.and(Self::INFINITY.copy_sign(self)), (Category::Normal, Category::Normal) => { self.exp -= rhs.exp; @@ -986,80 +1225,205 @@ impl Float for IeeeFloat { } } + fn ieee_rem(self, rhs: Self) -> StatusAnd { + match (self.category(), rhs.category()) { + (Category::NaN, _) | (_, Category::NaN) => { + IeeeDefaultExceptionHandling::binop_result_from_either_nan(self, rhs) + } + + (Category::Infinity, _) | (_, Category::Zero) => Status::INVALID_OP.and(Self::NAN), + + (Category::Zero, _) | (_, Category::Infinity) => Status::OK.and(self), + + (Category::Normal, Category::Normal) => { + let mut status; + + let mut x = self; + let mut p = rhs; + + // Make sure the current value is less than twice the denom. If the addition + // did not succeed (an overflow has happened), which means that the finite + // value we currently posses must be less than twice the denom (as we are + // using the same semantics). + let p2 = unpack!(status=, p + p); + if status == Status::OK { + x = unpack!(status=, x.c_fmod(p2)); + assert_eq!(status, Status::OK); + } + + // Lets work with absolute numbers. + p = p.abs(); + x = x.abs(); + + // + // To calculate the remainder we use the following scheme. + // + // The remainder is defained as follows: + // + // remainder = numer - rquot * denom = x - r * p + // + // Where r is the result of: x/p, rounded toward the nearest integral value + // (with halfway cases rounded toward the even number). + // + // Currently, (after x mod 2p): + // r is the number of 2p's present inside x, which is inherently, an even + // number of p's. + // + // We may split the remaining calculation into 4 options: + // - if x < 0.5p then we round to the nearest number with is 0, and are done. + // - if x == 0.5p then we round to the nearest even number which is 0, and we + // are done as well. + // - if 0.5p < x < p then we round to nearest number which is 1, and we have + // to subtract 1p at least once. + // - if x >= p then we must subtract p at least once, as x must be a + // remainder. + // + // By now, we were done, or we added 1 to r, which in turn, now an odd number. + // + // We can now split the remaining calculation to the following 3 options: + // - if x < 0.5p then we round to the nearest number with is 0, and are done. + // - if x == 0.5p then we round to the nearest even number. As r is odd, we + // must round up to the next even number. so we must subtract p once more. + // - if x > 0.5p (and inherently x < p) then we must round r up to the next + // integral, and subtract p once more. + // + + // Return `x * 2` at no loss of precision (i.e. no overflow). + // + // HACK(eddyb) this may seem a bit sketchy because it can return + // values that `normalize` would've replaced with `overflow_result` + // (e.g. overflowing to infinity), but the result is only used for + // comparisons, where both sides of such comparison can be seen + // as transiently having a larger *effective* exponent range. + let lossless_2x = |mut x: Self| { + x.exp += 1; + + if x.exp >= Self::MAX_EXP { + // HACK(eddyb) skip lossy `normalize` (see above). + } else { + let status; + x = unpack!(status=, x.normalize(Round::NearestTiesToEven, Loss::ExactlyZero)); + assert_eq!(status, Status::OK); + } + + x + }; + + if lossless_2x(x) > p { + x = unpack!(status=, x - p); + assert_eq!(status, Status::OK); + + if lossless_2x(x) >= p { + x = unpack!(status=, x - p); + assert_eq!(status, Status::OK); + } + } + + if x.is_zero() { + Status::OK.and(x.copy_sign(self)) // IEEE754 requires this + } else { + Status::OK.and(x.negate_if(self.is_negative())) + } + } + } + } + fn c_fmod(mut self, rhs: Self) -> StatusAnd { - match (self.category, rhs.category) { - (Category::NaN, _) - | (Category::Zero, Category::Infinity) - | (Category::Zero, Category::Normal) - | (Category::Normal, Category::Infinity) => Status::OK.and(self), - - (_, Category::NaN) => { - self.sign = false; - self.category = Category::NaN; - self.sig = rhs.sig; - Status::OK.and(self) + match (self.category(), rhs.category()) { + (Category::NaN, _) | (_, Category::NaN) => { + IeeeDefaultExceptionHandling::binop_result_from_either_nan(self, rhs) } (Category::Infinity, _) | (_, Category::Zero) => Status::INVALID_OP.and(Self::NAN), + (Category::Zero, _) | (_, Category::Infinity) => Status::OK.and(self), + (Category::Normal, Category::Normal) => { + let orig = self; + while self.is_finite_non_zero() && rhs.is_finite_non_zero() && self.cmp_abs_normal(rhs) != Ordering::Less { - let mut v = rhs.scalbn(self.ilogb() - rhs.ilogb()); - if self.cmp_abs_normal(v) == Ordering::Less { - v = v.scalbn(-1); + let exp = self.ilogb() - rhs.ilogb(); + let mut v = rhs.scalbn(exp); + // `v` can overflow to NaN with `NonfiniteBehavior::NanOnly`, so explicitly + // check for it. + if v.is_nan() || self.cmp_abs_normal(v) == Ordering::Less { + v = rhs.scalbn(exp - 1); } - v.sign = self.sign; + v = v.copy_sign(self); let status; self = unpack!(status=, self - v); assert_eq!(status, Status::OK); } + if self.is_zero() { + self = self.copy_sign(orig); + } Status::OK.and(self) } } } fn round_to_integral(self, round: Round) -> StatusAnd { - // If the exponent is large enough, we know that this value is already - // integral, and the arithmetic below would potentially cause it to saturate - // to +/-Inf. Bail out early instead. - if self.is_finite_non_zero() && self.exp + 1 >= S::PRECISION as ExpInt { - return Status::OK.and(self); - } + match self.category() { + Category::NaN => IeeeDefaultExceptionHandling::result_from_nan(self), + + // [IEEE Std 754-2008 6.1]: + // The behavior of infinity in floating-point arithmetic is derived from the + // limiting cases of real arithmetic with operands of arbitrarily + // large magnitude, when such a limit exists. + // ... + // Operations on infinite operands are usually exact and therefore signal no + // exceptions ... + Category::Infinity => Status::OK.and(self), + + // [IEEE Std 754-2008 6.3]: + // ... the sign of the result of conversions, the quantize operation, the + // roundToIntegral operations, and the roundToIntegralExact(see 5.3.1) is + // the sign of the first or only operand. + Category::Zero => Status::OK.and(self), - // The algorithm here is quite simple: we add 2^(p-1), where p is the - // precision of our format, and then subtract it back off again. The choice - // of rounding modes for the addition/subtraction determines the rounding mode - // for our integral rounding as well. - // NOTE: When the input value is negative, we do subtraction followed by - // addition instead. - assert!(S::PRECISION <= 128); - let mut status; - let magic_const = unpack!(status=, Self::from_u128(1 << (S::PRECISION - 1))); - let magic_const = magic_const.copy_sign(self); + Category::Normal => { + // If the exponent is large enough, we know that this value is already + // integral, and the arithmetic below would potentially cause it to saturate + // to +/-Inf. Bail out early instead. + if self.exp + 1 >= S::PRECISION as ExpInt { + return Status::OK.and(self); + } - if status != Status::OK { - return status.and(self); - } + // The algorithm here is quite simple: we add 2^(p-1), where p is the + // precision of our format, and then subtract it back off again. The choice + // of rounding modes for the addition/subtraction determines the rounding mode + // for our integral rounding as well. + // NOTE: When the input value is negative, we do subtraction followed by + // addition instead. + assert!(S::PRECISION <= 128); + let mut status; + let magic_const = unpack!(status=, Self::from_u128(1 << (S::PRECISION - 1))); + assert_eq!(status, Status::OK); + let magic_const = magic_const.copy_sign(self); - let mut r = self; - r = unpack!(status=, r.add_r(magic_const, round)); - if status != Status::OK && status != Status::INEXACT { - return status.and(self); - } + let mut r = self; + r = unpack!(status=, r.add_r(magic_const, round)); - // Restore the input sign to handle 0.0/-0.0 cases correctly. - r.sub_r(magic_const, round).map(|r| r.copy_sign(self)) + // Current value and 'MagicConstant' are both integers, so the result of the + // subtraction is always exact according to Sterbenz' lemma. + r = r.sub_r(magic_const, round).value; + + // Restore the input sign to handle the case of zero result + // correctly. + status.and(r.copy_sign(self)) + } + } } fn next_up(mut self) -> StatusAnd { // Compute nextUp(x), handling each float category separately. - match self.category { + match self.category() { Category::Infinity => { - if self.sign { + if self.is_negative() { // nextUp(-inf) = -largest Status::OK.and(-Self::largest()) } else { @@ -1084,12 +1448,12 @@ impl Float for IeeeFloat { } Category::Normal => { // nextUp(-smallest) = -0 - if self.is_smallest() && self.sign { + if self.is_smallest() && self.is_negative() { return Status::OK.and(-Self::ZERO); } // nextUp(largest) == INFINITY - if self.is_largest() && !self.sign { + if self.is_largest() && !self.is_negative() { return Status::OK.and(Self::INFINITY); } @@ -1097,7 +1461,7 @@ impl Float for IeeeFloat { let sig_mask = (1 << (S::PRECISION - 1)) - 1; // nextUp(normal) == normal + inc. - if self.sign { + if self.is_negative() { // If we are negative, we need to decrement the significand. // We only cross a binade boundary that requires adjusting the exponent @@ -1168,43 +1532,92 @@ impl Float for IeeeFloat { IeeeFloat { sig: [input], exp: S::PRECISION as ExpInt - 1, - category: Category::Normal, - sign: false, + read_only_category_do_not_mutate: Category::Normal, + read_only_sign_do_not_mutate: false, marker: PhantomData, } .normalize(round, Loss::ExactlyZero) } - fn from_str_r(mut s: &str, mut round: Round) -> Result, ParseError> { + fn from_str_r(s: &str, mut round: Round) -> Result, ParseError> { if s.is_empty() { return Err(ParseError("Invalid string length")); } + // Handle a leading minus sign. + let (minus, s) = s.strip_prefix("-").map(|s| (true, s)).unwrap_or((false, s)); + let from_abs = |r: Self| r.negate_if(minus); + + // Handle a leading plus sign (mutually exclusive with minus). + let (explicit_plus, s) = s + .strip_prefix("+") + .filter(|_| !minus) + .map(|s| (true, s)) + .unwrap_or((false, s)); + // Handle special cases. - match s { - "inf" | "INFINITY" => return Ok(Status::OK.and(Self::INFINITY)), - "-inf" | "-INFINITY" => return Ok(Status::OK.and(-Self::INFINITY)), - "nan" | "NaN" => return Ok(Status::OK.and(Self::NAN)), - "-nan" | "-NaN" => return Ok(Status::OK.and(-Self::NAN)), - _ => {} + let special = match s { + "Inf" if minus || explicit_plus => Some(Self::INFINITY), + + "inf" | "INFINITY" if !explicit_plus => Some(Self::INFINITY), + + _ if !explicit_plus => { + // If we have a 's' (or 'S') prefix, then this is a Signaling NaN. + let (is_signaling, s) = s.strip_prefix(['s', 'S']).map_or((false, s), |s| (true, s)); + + s.strip_prefix("nan").or_else(|| s.strip_prefix("NaN")).and_then(|s| { + // Allow the payload to be inside parentheses. + let s = s + .strip_prefix("(") + .and_then(|s| { + // Parentheses should be balanced (and not empty). + s.strip_suffix(")").filter(|s| !s.is_empty()) + }) + .unwrap_or(s); + + let payload = if s.is_empty() { + // A NaN without payload. + None + } else { + // Determine the payload number's radix. + let (radix, s) = s + .strip_prefix("0") + .filter(|s| !s.is_empty()) + .map(|s| s.strip_prefix(['x', 'X']).map(|s| (16, s)).unwrap_or((8, s))) + .unwrap_or((10, s)); + + // Parse the payload and make the NaN. + Some(u128::from_str_radix(s, radix).ok()?) + }; + + Some(if is_signaling { + Self::snan(payload) + } else { + Self::qnan(payload) + }) + }) + } + + _ => None, + }; + if let Some(r) = special { + return Ok(Status::OK.and(from_abs(r))); } - // Handle a leading minus sign. - let minus = s.starts_with("-"); - if minus || s.starts_with("+") { - s = &s[1..]; - if s.is_empty() { - return Err(ParseError("String has no digits")); - } + if s.is_empty() { + return Err(ParseError("String has no digits")); } // Adjust the rounding mode for the absolute value below. - if minus { - round = -round; - } + round = round.negate_if(minus); - let r = if s.starts_with("0x") || s.starts_with("0X") { - s = &s[2..]; + let (is_hex, s) = s + .strip_prefix("0") + .and_then(|s| s.strip_prefix(['x', 'X'])) + .map(|s| (true, s)) + .unwrap_or((false, s)); + + let r = if is_hex { if s.is_empty() { return Err(ParseError("Invalid string")); } @@ -1213,7 +1626,7 @@ impl Float for IeeeFloat { Self::from_decimal_string(s, round)? }; - Ok(r.map(|r| if minus { -r } else { r })) + Ok(r.map(from_abs)) } fn to_bits(self) -> u128 { @@ -1223,7 +1636,7 @@ impl Float for IeeeFloat { fn to_u128_r(self, width: usize, round: Round, is_exact: &mut bool) -> StatusAnd { // The result of trying to convert a number too large. - let overflow = if self.sign { + let overflow = if self.is_negative() { // Negative numbers cannot be represented as unsigned. 0 } else { @@ -1233,14 +1646,14 @@ impl Float for IeeeFloat { *is_exact = false; - match self.category { + match self.category() { Category::NaN => Status::INVALID_OP.and(0), Category::Infinity => Status::INVALID_OP.and(overflow), Category::Zero => { // Negative zero can't be represented as an int. - *is_exact = !self.sign; + *is_exact = !self.is_negative(); Status::OK.and(0) } @@ -1312,11 +1725,11 @@ impl Float for IeeeFloat { } fn bitwise_eq(self, rhs: Self) -> bool { - if self.category != rhs.category || self.sign != rhs.sign { + if self.category() != rhs.category() || self.is_negative() != rhs.is_negative() { return false; } - if self.category == Category::Zero || self.category == Category::Infinity { + if self.category() == Category::Zero || self.category() == Category::Infinity { return true; } @@ -1328,7 +1741,7 @@ impl Float for IeeeFloat { } fn is_negative(self) -> bool { - self.sign + self.read_only_sign_do_not_mutate } fn is_denormal(self) -> bool { @@ -1338,11 +1751,11 @@ impl Float for IeeeFloat { fn is_signaling(self) -> bool { // IEEE-754R 2008 6.2.1: A signaling NaN bit string should be encoded with the // first bit of the trailing significand being 0. - self.is_nan() && !sig::get_bit(&self.sig, S::QNAN_BIT) + self.is_nan() && self.sig[0] & S::QNAN_SIGNIFICAND != S::QNAN_SIGNIFICAND } fn category(self) -> Category { - self.category + self.read_only_category_do_not_mutate } fn get_exact_inverse(self) -> Option { @@ -1412,7 +1825,7 @@ impl Float for IeeeFloat { self.exp = self.exp.saturating_add(exp_change as ExpInt); self = self.normalize(round, Loss::ExactlyZero).value; if self.is_nan() { - sig::set_bit(&mut self.sig, S::QNAN_BIT); + self = IeeeDefaultExceptionHandling::result_from_nan(self).value; } self } @@ -1422,7 +1835,7 @@ impl Float for IeeeFloat { // Quiet signalling nans. if *exp == IEK_NAN { - sig::set_bit(&mut self.sig, S::QNAN_BIT); + self = IeeeDefaultExceptionHandling::result_from_nan(self).value; return self; } @@ -1442,79 +1855,103 @@ impl Float for IeeeFloat { } impl FloatConvert> for IeeeFloat { - fn convert_r(self, round: Round, loses_info: &mut bool) -> StatusAnd> { - let mut r = IeeeFloat { - sig: self.sig, - exp: self.exp, - category: self.category, - sign: self.sign, - marker: PhantomData, - }; + fn convert_r(mut self, round: Round, loses_info: &mut bool) -> StatusAnd> { + // FIXME(eddyb) move this into the return result. + *loses_info = false; // x86 has some unusual NaNs which cannot be represented in any other // format; note them here. fn is_x87_double_extended() -> bool { S::QNAN_SIGNIFICAND == X87DoubleExtendedS::QNAN_SIGNIFICAND } - let x87_special_nan = is_x87_double_extended::() + let loses_x87_pseudo_nan = is_x87_double_extended::() && !is_x87_double_extended::() - && r.category == Category::NaN - && (r.sig[0] & S::QNAN_SIGNIFICAND) != S::QNAN_SIGNIFICAND; + && self.category() == Category::NaN + && (self.sig[0] & S::QNAN_SIGNIFICAND) != S::QNAN_SIGNIFICAND; + + // NOTE(eddyb) this is done early because the target semantics may not + // actually support encoding the distinction between SNaN and QNaN. + // + // Convert of sNaN creates qNaN and raises an exception (invalid op). + // This also guarantees that a sNaN does not become Inf on a truncation + // that loses all payload bits. + let mut status = Status::OK; + if self.is_nan() { + self = unpack!(status|=, IeeeDefaultExceptionHandling::result_from_nan(self)); + } + + let Self { mut sig, mut exp, .. } = self; // If this is a truncation of a denormal number, and the target semantics // has larger exponent range than the source semantics (this can happen // when truncating from PowerPC double-double to double format), the // right shift could lose result mantissa bits. Adjust exponent instead // of performing excessive shift. + // Also do a similar trick in case shifting denormal would produce zero + // significand as this case isn't handled correctly by normalize. let mut shift = T::PRECISION as ExpInt - S::PRECISION as ExpInt; - if shift < 0 && r.is_finite_non_zero() { - let mut exp_change = sig::omsb(&r.sig) as ExpInt - S::PRECISION as ExpInt; - if r.exp + exp_change < T::MIN_EXP { - exp_change = T::MIN_EXP - r.exp; + if shift < 0 && self.is_finite_non_zero() { + let omsb = sig::omsb(&sig) as ExpInt; + let mut exp_change = omsb - S::PRECISION as ExpInt; + if exp + exp_change < T::MIN_EXP { + exp_change = T::MIN_EXP - exp; } if exp_change < shift { exp_change = shift; } if exp_change < 0 { shift -= exp_change; - r.exp += exp_change; + exp += exp_change; + } else if omsb <= -shift { + exp_change = omsb + shift - 1; // leave at least one bit set + shift -= exp_change; + exp += exp_change; } } // If this is a truncation, perform the shift. let mut loss = Loss::ExactlyZero; - if shift < 0 && (r.is_finite_non_zero() || r.category == Category::NaN) { - loss = sig::shift_right(&mut r.sig, &mut 0, -shift as usize); + if shift < 0 && (self.is_finite_non_zero() || self.category() == Category::NaN && S::NAN_PAYLOAD_MASK != 0) { + loss = sig::shift_right(&mut sig, &mut 0, -shift as usize); } // If this is an extension, perform the shift. - if shift > 0 && (r.is_finite_non_zero() || r.category == Category::NaN) { - sig::shift_left(&mut r.sig, &mut 0, shift as usize); + if shift > 0 && (self.is_finite_non_zero() || self.category() == Category::NaN) { + sig::shift_left(&mut sig, &mut 0, shift as usize); } - let status; - if r.is_finite_non_zero() { - r = unpack!(status=, r.normalize(round, loss)); - *loses_info = status != Status::OK; - } else if r.category == Category::NaN { - *loses_info = loss != Loss::ExactlyZero || x87_special_nan; - - // For x87 extended precision, we want to make a NaN, not a special NaN if - // the input wasn't special either. - if !x87_special_nan && is_x87_double_extended::() { - sig::set_bit(&mut r.sig, T::PRECISION - 1); - } - - // gcc forces the Quiet bit on, which means (float)(double)(float_sNan) - // does not give you back the same bits. This is dubious, and we - // don't currently do it. You're really supposed to get - // an invalid operation signal at runtime, but nobody does that. - status = Status::OK; - } else { - *loses_info = false; - status = Status::OK; + let r = match self.category() { + Category::Normal => { + let r = IeeeFloat:: { + sig, + exp, + read_only_category_do_not_mutate: self.category(), + read_only_sign_do_not_mutate: self.is_negative(), + marker: PhantomData, + }; + unpack!(status|=, r.normalize(round, loss)) + } + Category::NaN => { + *loses_info = loss != Loss::ExactlyZero + || loses_x87_pseudo_nan + || S::NAN_PAYLOAD_MASK != 0 && T::NAN_PAYLOAD_MASK == 0; + + IeeeFloat::::qnan(Some(sig[0])).with_sign(self.is_negative()) + } + Category::Infinity => IeeeFloat::::INFINITY.with_sign(self.is_negative()), + Category::Zero => IeeeFloat::::ZERO.with_sign(self.is_negative()), + }; + + // NOTE(eddyb) this catches all cases of e.g. ±Inf turning into NaN, + // because of `T::NONFINITE_BEHAVIOR` not being `IEEE754`. + if matches!(self.category(), Category::Infinity | Category::Zero) + && (r.category() != self.category() || r.is_negative() != self.is_negative()) + { + status |= Status::INEXACT; } + *loses_info |= (status - Status::INVALID_OP) != Status::OK; + status.and(r) } } @@ -1554,15 +1991,15 @@ impl IeeeFloat { } // Our zeros don't have a significand to test. - if loss == Loss::ExactlyHalf && self.category != Category::Zero { + if loss == Loss::ExactlyHalf && self.category() != Category::Zero { return sig::get_bit(&self.sig, bit); } false } Round::TowardZero => false, - Round::TowardPositive => !self.sign, - Round::TowardNegative => self.sign, + Round::TowardPositive => !self.is_negative(), + Round::TowardNegative => self.is_negative(), } } @@ -1583,7 +2020,7 @@ impl IeeeFloat { // If the resulting exponent is too high, overflow according to // the rounding mode. if final_exp > S::MAX_EXP { - let round = if self.sign { -round } else { round }; + let round = round.negate_if(self.is_negative()); return Self::overflow_result(round).map(|r| r.copy_sign(self)); } @@ -1613,6 +2050,16 @@ impl IeeeFloat { } } + // NOTE(eddyb) for `NonfiniteBehavior::NanOnly`, the unique `NAN` takes up + // the largest significand of `MAX_EXP` (which also has normals), though + // comparing significands needs to ignore the integer bit `NAN` lacks. + if S::NONFINITE_BEHAVIOR == NonfiniteBehavior::NanOnly + && self.exp == Self::NAN.exp + && [self.sig[0] & S::NAN_SIGNIFICAND_BASE] == Self::NAN.sig + { + return Self::overflow_result(round).map(|r| r.copy_sign(self)); + } + // Now round the number according to round given the lost // fraction. @@ -1621,7 +2068,7 @@ impl IeeeFloat { if loss == Loss::ExactlyZero { // Canonicalize zeros. if omsb == 0 { - self.category = Category::Zero; + self = Self::ZERO.copy_sign(self); } return Status::OK.and(self); @@ -1643,15 +2090,23 @@ impl IeeeFloat { // significand right one. However if we already have the // maximum exponent we overflow to infinity. if self.exp == S::MAX_EXP { - self.category = Category::Infinity; - - return (Status::OVERFLOW | Status::INEXACT).and(self); + return (Status::OVERFLOW | Status::INEXACT).and(Self::INFINITY.copy_sign(self)); } let _: Loss = sig::shift_right(&mut self.sig, &mut self.exp, 1); return Status::INEXACT.and(self); } + + // NOTE(eddyb) for `NonfiniteBehavior::NanOnly`, the unique `NAN` takes up + // the largest significand of `MAX_EXP` (which also has normals), though + // comparing significands needs to ignore the integer bit `NAN` lacks. + if S::NONFINITE_BEHAVIOR == NonfiniteBehavior::NanOnly + && self.exp == Self::NAN.exp + && [self.sig[0] & S::NAN_SIGNIFICAND_BASE] == Self::NAN.sig + { + return Self::overflow_result(round).map(|r| r.copy_sign(self)); + } } // The normal case - we were and are not denormal, and any @@ -1665,7 +2120,7 @@ impl IeeeFloat { // Canonicalize zeros. if omsb == 0 { - self.category = Category::Zero; + self = Self::ZERO.copy_sign(self); } // The Category::Zero case is a denormal that underflowed to zero. @@ -1676,8 +2131,8 @@ impl IeeeFloat { let mut r = IeeeFloat { sig: [0], exp: 0, - category: Category::Normal, - sign: false, + read_only_category_do_not_mutate: Category::Normal, + read_only_sign_do_not_mutate: false, marker: PhantomData, }; @@ -1762,9 +2217,7 @@ impl IeeeFloat { return Err(ParseError("Exponent has no digits")); } - if exp_minus { - r.exp = -r.exp; - } + r.exp = r.exp.negate_if(exp_minus); break; } else { @@ -1857,22 +2310,21 @@ impl IeeeFloat { chars.next(); } - any_digits = false; + let mut any_exp_digits = false; for c in chars { if let Some(value) = c.to_digit(10) { - any_digits = true; + any_exp_digits = true; dec_exp = dec_exp.saturating_mul(10).saturating_add(value as i32); } else { return Err(ParseError("Invalid character in exponent")); } } - if !any_digits { - return Err(ParseError("Exponent has no digits")); + // Treat no exponent as 0 to match binutils + if !any_exp_digits { + assert_eq!(dec_exp, 0); } - if exp_minus { - dec_exp = -dec_exp; - } + dec_exp = dec_exp.negate_if(exp_minus); break; } else { @@ -2187,8 +2639,8 @@ impl IeeeFloat { let mut r = IeeeFloat { sig: [0], exp, - category: Category::Normal, - sign: false, + read_only_category_do_not_mutate: Category::Normal, + read_only_sign_do_not_mutate: false, marker: PhantomData, }; sig::extract(&mut r.sig, &sig_calc, used_bits, calc_precision - used_bits); @@ -2300,11 +2752,6 @@ mod sig { limbs[bit / LIMB_BITS] |= 1 << (bit % LIMB_BITS); } - /// Clear the given bit. - pub(super) fn clear_bit(limbs: &mut [Limb], bit: usize) { - limbs[bit / LIMB_BITS] &= !(1 << (bit % LIMB_BITS)); - } - /// Shift `dst` left `bits` bits, subtract `bits` from its exponent. pub(super) fn shift_left(dst: &mut [Limb], exp: &mut ExpInt, bits: usize) { if bits > 0 { @@ -2503,23 +2950,22 @@ mod sig { // an addition or subtraction. // Subtraction is more subtle than one might naively expect. if *a_sign ^ b_sign { - let (reverse, loss); + let loss; if bits == 0 { - reverse = cmp(a_sig, b_sig) == Ordering::Less; loss = Loss::ExactlyZero; } else if bits > 0 { loss = shift_right(b_sig, &mut 0, (bits - 1) as usize); shift_left(a_sig, a_exp, 1); - reverse = false; } else { loss = shift_right(a_sig, a_exp, (-bits - 1) as usize); shift_left(b_sig, &mut 0, 1); - reverse = true; } let borrow = (loss != Loss::ExactlyZero) as Limb; - if reverse { + + // Should we reverse the subtraction. + if cmp(a_sig, b_sig) == Ordering::Less { // The code above is intended to ensure that no borrow is necessary. assert_eq!(sub(b_sig, a_sig, borrow), 0); a_sig.copy_from_slice(b_sig); diff --git a/src/lib.rs b/src/lib.rs index 4e2dc13..de925f1 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,6 +1,6 @@ //! Port of LLVM's APFloat software floating-point implementation from the //! following C++ sources (please update commit hash when backporting): -//! https://github.com/llvm/llvm-project/commit/f3598e8fca83ccfb11f58ec7957c229e349765e3 +//! https://github.com/llvm/llvm-project/commit/462a31f5a5abb905869ea93cc49b096079b11aa4 //! * `llvm/include/llvm/ADT/APFloat.h` -> `Float` and `FloatConvert` traits //! * `llvm/lib/Support/APFloat.cpp` -> `ieee` and `ppc` modules //! * `llvm/unittests/ADT/APFloatTest.cpp` -> `tests` directory @@ -48,6 +48,11 @@ bitflags! { /// IEEE-754R 7: Default exception handling. /// /// UNDERFLOW or OVERFLOW are always returned or-ed with INEXACT. + /// + /// APFloat models this behavior specified by IEEE-754: + /// "For operations producing results in floating-point format, the default + /// result of an operation that signals the invalid operation exception + /// shall be a quiet NaN." #[must_use] pub struct Status: u8 { const OK = 0x00; @@ -132,7 +137,7 @@ impl Neg for Round { } /// A signed type to represent a floating point number's unbiased exponent. -pub type ExpInt = i16; +pub type ExpInt = i32; // \c ilogb error results. pub const IEK_INF: ExpInt = ExpInt::max_value(); @@ -290,42 +295,7 @@ pub trait Float: } fn div_r(self, rhs: Self, round: Round) -> StatusAnd; /// IEEE remainder. - // This is not currently correct in all cases. - fn ieee_rem(self, rhs: Self) -> StatusAnd { - let mut v = self; - - let status; - v = unpack!(status=, v / rhs); - if status == Status::DIV_BY_ZERO { - return status.and(self); - } - - assert!(Self::PRECISION < 128); - - let status; - let x = unpack!(status=, v.to_i128_r(128, Round::NearestTiesToEven, &mut false)); - if status == Status::INVALID_OP { - return status.and(self); - } - - let status; - let mut v = unpack!(status=, Self::from_i128(x)); - assert_eq!(status, Status::OK); // should always work - - let status; - v = unpack!(status=, v * rhs); - assert_eq!(status - Status::INEXACT, Status::OK); // should not overflow or underflow - - let status; - v = unpack!(status=, self - v); - assert_eq!(status - Status::INEXACT, Status::OK); // likewise - - if v.is_zero() { - status.and(v.copy_sign(self)) // IEEE754 requires this - } else { - status.and(v) - } - } + fn ieee_rem(self, rhs: Self) -> StatusAnd; /// C fmod, or llvm frem. fn c_fmod(self, rhs: Self) -> StatusAnd; fn round_to_integral(self, round: Round) -> StatusAnd; @@ -435,7 +405,7 @@ pub trait Float: other } else if other.is_nan() { self - } else if other.partial_cmp(&self) == Some(Ordering::Less) { + } else if other < self { other } else { self @@ -449,7 +419,47 @@ pub trait Float: other } else if other.is_nan() { self - } else if self.partial_cmp(&other) == Some(Ordering::Less) { + } else if self < other { + other + } else { + self + } + } + + /// Implements IEEE 754-2018 minimum semantics. Returns the smaller of 2 + /// arguments, propagating NaNs and treating -0 as less than +0. + fn minimum(self, other: Self) -> Self { + if self.is_nan() { + self + } else if other.is_nan() { + other + } else if self.is_zero() && other.is_zero() && self.is_negative() != other.is_negative() { + if self.is_negative() { + self + } else { + other + } + } else if other < self { + other + } else { + self + } + } + + /// Implements IEEE 754-2018 maximum semantics. Returns the larger of 2 + /// arguments, propagating NaNs and treating -0 as less than +0. + fn maximum(self, other: Self) -> Self { + if self.is_nan() { + self + } else if other.is_nan() { + other + } else if self.is_zero() && other.is_zero() && self.is_negative() != other.is_negative() { + if self.is_negative() { + other + } else { + self + } + } else if self < other { other } else { self @@ -515,6 +525,12 @@ pub trait Float: fn is_neg_zero(self) -> bool { self.is_zero() && self.is_negative() } + fn is_pos_infinity(self) -> bool { + self.is_infinite() && !self.is_negative() + } + fn is_neg_infinity(self) -> bool { + self.is_infinite() && self.is_negative() + } /// Returns true if and only if the number has the smallest possible non-zero /// magnitude in the current semantics. @@ -522,6 +538,12 @@ pub trait Float: Self::SMALLEST.copy_sign(self).bitwise_eq(self) } + /// Returns true if this is the smallest (by magnitude) normalized finite + /// number in the given semantics. + fn is_smallest_normalized(self) -> bool { + Self::smallest_normalized().copy_sign(self).bitwise_eq(self) + } + /// Returns true if and only if the number has the largest possible finite /// magnitude in the current semantics. fn is_largest(self) -> bool { diff --git a/src/ppc.rs b/src/ppc.rs index 0f80ca9..b03efff 100644 --- a/src/ppc.rs +++ b/src/ppc.rs @@ -35,6 +35,8 @@ type Fallback = ieee::IeeeFloat>; impl ieee::Semantics for FallbackS { // Forbid any conversion to/from bits. const BITS: usize = 0; + const EXP_BITS: usize = 0; + const PRECISION: usize = F::PRECISION * 2; const MAX_EXP: ExpInt = F::MAX_EXP as ExpInt; const MIN_EXP: ExpInt = F::MIN_EXP as ExpInt + F::PRECISION as ExpInt; @@ -50,8 +52,11 @@ type FallbackExtended = ieee::IeeeFloat>; impl ieee::Semantics for FallbackExtendedS { // Forbid any conversion to/from bits. const BITS: usize = 0; + const EXP_BITS: usize = 0; + const PRECISION: usize = Fallback::::PRECISION; const MAX_EXP: ExpInt = F::MAX_EXP as ExpInt; + const MIN_EXP: ExpInt = F::MIN_EXP as ExpInt; } impl From> for DoubleFloat @@ -336,6 +341,10 @@ where Fallback::from(self).div_r(Fallback::from(rhs), round).map(Self::from) } + fn ieee_rem(self, rhs: Self) -> StatusAnd { + Fallback::from(self).ieee_rem(Fallback::from(rhs)).map(Self::from) + } + fn c_fmod(self, rhs: Self) -> StatusAnd { Fallback::from(self).c_fmod(Fallback::from(rhs)).map(Self::from) } @@ -396,7 +405,7 @@ where self.category() == Category::Normal && (self.0.is_denormal() || self.0.is_denormal() || // (double)(Hi + Lo) == Hi defines a normal number. - !(self.0 + self.1).value.bitwise_eq(self.0)) + self.0 != (self.0 + self.1).value) } fn is_signaling(self) -> bool { @@ -407,6 +416,10 @@ where self.0.category() } + fn is_integer(self) -> bool { + self.0.is_integer() && self.1.is_integer() + } + fn get_exact_inverse(self) -> Option { Fallback::from(self).get_exact_inverse().map(Self::from) } @@ -428,3 +441,13 @@ where DoubleFloat(a, b) } } + +// HACK(eddyb) this is here instead of in `tests/ppc.rs` because `DoubleFloat` +// has private fields, and it's not worth it to make them public just for testing. +#[test] +fn is_integer() { + let double_from_f64 = |f: f64| ieee::Double::from_bits(f.to_bits().into()); + assert!(DoubleFloat(double_from_f64(-0.0), double_from_f64(-0.0)).is_integer()); + assert!(!DoubleFloat(double_from_f64(3.14159), double_from_f64(-0.0)).is_integer()); + assert!(!DoubleFloat(double_from_f64(-0.0), double_from_f64(3.14159)).is_integer()); +} diff --git a/tests/downstream.rs b/tests/downstream.rs index f02e171..fc60a7c 100644 --- a/tests/downstream.rs +++ b/tests/downstream.rs @@ -46,7 +46,7 @@ fn fuzz_roundtrip_through_i128() { // `f32` FMA bit-patterns which used to produce the wrong output (found by fuzzing). pub const FUZZ_IEEE32_FMA_CASES_WITH_EXPECTED_OUTPUTS: &[((u32, u32, u32), u32)] = &[ ((0x00001000 /* 5.74e-42 */, 0x0000001a /* 3.6e-44 */, 0xffff1a00 /* NaN */), 0xffff1a00 /* NaN */), - ((0x000080aa /* 4.6156e-41 */, 0xaaff0000 /* -4.52971e-13 */, 0xff9e007f /* NaN */), 0xff9e007f /* NaN */), + ((0x000080aa /* 4.6156e-41 */, 0xaaff0000 /* -4.52971e-13 */, 0xff9e007f /* NaN */), 0xffde007f /* NaN */), ((0x0000843f /* 4.7441e-41 */, 0x0084ff80 /* 1.2213942e-38 */, 0xffff8000 /* NaN */), 0xffff8000 /* NaN */), ((0x00009eaa /* 5.6918e-41 */, 0x201d7f1e /* 1.3340477e-19 */, 0xffff0001 /* NaN */), 0xffff0001 /* NaN */), ((0x020400ff /* 9.698114e-38 */, 0x7f7f2200 /* 3.3912968e+38 */, 0xffffffff /* NaN */), 0xffffffff /* NaN */), @@ -63,22 +63,22 @@ pub const FUZZ_IEEE32_FMA_CASES_WITH_EXPECTED_OUTPUTS: &[((u32, u32, u32), u32)] ((0x200004aa /* 1.0843565e-19 */, 0x00202020 /* 2.95026e-39 */, 0x7fff00ff /* NaN */), 0x7fff00ff /* NaN */), ( (0x20005eaa /* 1.0873343e-19 */, 0x9e9e9e3a /* -1.6794342e-20 */, 0xff9e009e /* NaN */), - 0xff9e009e, /* NaN */ + 0xffde009e, /* NaN */ ), - ((0x20007faa /* 1.0884262e-19 */, 0x9e00611e /* -6.796347e-21 */, 0x7faa0600 /* NaN */), 0x7faa0600 /* NaN */), + ((0x20007faa /* 1.0884262e-19 */, 0x9e00611e /* -6.796347e-21 */, 0x7faa0600 /* NaN */), 0x7fea0600 /* NaN */), ( (0x20007faa /* 1.0884262e-19 */, 0xaa069e1e /* -1.1956449e-13 */, 0xffffecff /* NaN */), 0xffffecff, /* NaN */ ), - ((0x20025eaa /* 1.104275e-19 */, 0x9e01033a /* -6.82987e-21 */, 0xff9e009e /* NaN */), 0xff9e009e /* NaN */), + ((0x20025eaa /* 1.104275e-19 */, 0x9e01033a /* -6.82987e-21 */, 0xff9e009e /* NaN */), 0xffde009e /* NaN */), ((0x3314f400 /* 3.4680852e-8 */, 0x00ff7903 /* 2.3461462e-38 */, 0xffffffdb /* NaN */), 0xffffffdb /* NaN */), ((0x3314f400 /* 3.4680852e-8 */, 0x00ff7903 /* 2.3461462e-38 */, 0xfffffff6 /* NaN */), 0xfffffff6 /* NaN */), - ((0x3a218275 /* 0.0006161102 */, 0x3a3a3a3a /* 0.00071040133 */, 0x7f8a063a /* NaN */), 0x7f8a063a /* NaN */), + ((0x3a218275 /* 0.0006161102 */, 0x3a3a3a3a /* 0.00071040133 */, 0x7f8a063a /* NaN */), 0x7fca063a /* NaN */), ((0x40000001 /* 2.0000002 */, 0xfefffffe /* -1.7014116e+38 */, 0xfffe40ff /* NaN */), 0xfffe40ff /* NaN */), ((0x50007faa /* 8623401000 */, 0x000011fb /* 6.45e-42 */, 0xff800000 /* -inf */), 0xff800000 /* -inf */), - ((0x64007f8b /* 9.481495e+21 */, 0xfa9a8702 /* -4.01176e+35 */, 0xff820000 /* NaN */), 0xff820000 /* NaN */), - ((0x6a017faa /* 3.9138577e+25 */, 0x00000070 /* 1.57e-43 */, 0xff80db03 /* NaN */), 0xff80db03 /* NaN */), - ((0x6a017faa /* 3.9138577e+25 */, 0x00000070 /* 1.57e-43 */, 0xff80db23 /* NaN */), 0xff80db23 /* NaN */), + ((0x64007f8b /* 9.481495e+21 */, 0xfa9a8702 /* -4.01176e+35 */, 0xff820000 /* NaN */), 0xffc20000 /* NaN */), + ((0x6a017faa /* 3.9138577e+25 */, 0x00000070 /* 1.57e-43 */, 0xff80db03 /* NaN */), 0xffc0db03 /* NaN */), + ((0x6a017faa /* 3.9138577e+25 */, 0x00000070 /* 1.57e-43 */, 0xff80db23 /* NaN */), 0xffc0db23 /* NaN */), ( (0x6e000000 /* 9.9035203e+27 */, 0xdf008000 /* -9259401000000000000 */, 0x7f800000 /* inf */), 0x7f800000, /* inf */ @@ -90,7 +90,7 @@ pub const FUZZ_IEEE32_FMA_CASES_WITH_EXPECTED_OUTPUTS: &[((u32, u32, u32), u32)] ), ( (0xdf0603ff /* -9656842000000000000 */, 0x808000ff /* -1.1755301e-38 */, 0xff9b0000 /* NaN */), - 0xff9b0000, /* NaN */ + 0xffdb0000, /* NaN */ ), ( ( @@ -232,7 +232,7 @@ pub const FUZZ_IEEE64_FMA_CASES_WITH_EXPECTED_OUTPUTS: &[((u64, u64, u64), u64)] 0xffd8000000000000, /* -6.741349255733685e+307 */ 0xfff0001000000000, /* NaN */ ), - 0xfff0001000000000, /* NaN */ + 0xfff8001000000000, /* NaN */ ), ( ( @@ -240,7 +240,7 @@ pub const FUZZ_IEEE64_FMA_CASES_WITH_EXPECTED_OUTPUTS: &[((u64, u64, u64), u64)] 0xfbd8000000000000, /* -3.6544927542749997e+288 */ 0xfff0ff1000000000, /* NaN */ ), - 0xfff0ff1000000000, /* NaN */ + 0xfff8ff1000000000, /* NaN */ ), ( ( @@ -328,7 +328,7 @@ pub const FUZZ_IEEE64_FMA_CASES_WITH_EXPECTED_OUTPUTS: &[((u64, u64, u64), u64)] 0x00bc000000004000, /* 3.987332354453194e-305 */ 0xfff0000000e20000, /* NaN */ ), - 0xfff0000000e20000, /* NaN */ + 0xfff8000000e20000, /* NaN */ ), ( ( @@ -391,10 +391,7 @@ fn fuzz_fma_with_expected_outputs() { // found many examples in all ops, as the root issue was the handling of the // bit-level encoding itself, but negation was the easiest op to test here). pub const FUZZ_X87_F80_NEG_CASES_WITH_EXPECTED_OUTPUTS: &[(u128, u128)] = &[ - ( - 0x01010101010100000000, /* 3.05337213397376214408E-4857 */ - 0x81010101010100000000, /* -3.05337213397376214408E-4857 */ - ), + (0x01010101010100000000 /* NaN */, 0xffff0101010100000000 /* NaN */), ( 0x0000ff7f2300ff000000, /* 6.71098449692300485303E-4932 */ 0x8001ff7f2300ff000000, /* -6.71098449692300485303E-4932 */ diff --git a/tests/ieee.rs b/tests/ieee.rs index 52e0915..8af64dc 100644 --- a/tests/ieee.rs +++ b/tests/ieee.rs @@ -1,9 +1,39 @@ #[macro_use] extern crate rustc_apfloat; -use rustc_apfloat::ieee::{Double, Half, Quad, Single, X87DoubleExtended}; +use core::cmp::Ordering; +use rustc_apfloat::ieee::{BFloat, Double, Float8E4M3FN, Float8E5M2, Half, Quad, Single, X87DoubleExtended}; use rustc_apfloat::{Category, ExpInt, IEK_INF, IEK_NAN, IEK_ZERO}; -use rustc_apfloat::{Float, FloatConvert, ParseError, Round, Status}; +use rustc_apfloat::{Float, FloatConvert, Round, Status}; + +// FIXME(eddyb) maybe include this in `rustc_apfloat` itself? +macro_rules! define_for_each_float_type { + ($($ty:ty),+ $(,)?) => { + macro_rules! for_each_float_type { + // FIXME(eddyb) use generic closures if they're ever added to Rust. + (for<$ty_var:ident: Float> $e:expr) => {{ + $({ + type $ty_var = $ty; + $e; + })+ + }} + } + } +} +define_for_each_float_type! { + Half, + Single, + Double, + Quad, + + BFloat, + Float8E5M2, + Float8E4M3FN, + X87DoubleExtended, + + // NOTE(eddyb) tests for this are usually in `ppc.rs` but this works too. + rustc_apfloat::ppc::DoubleDouble, +} trait SingleExt { fn from_f32(input: f32) -> Self; @@ -35,6 +65,37 @@ impl DoubleExt for Double { } } +// NOTE(eddyb) these match the C++ `convertToFloat`/`convertToDouble` methods, +// after their generalization to allow an optional lossless conversion to their +// expected semantics (from e.g. `IEEEhalf`/`BFloat`, for `convertToSingle`). +// FIXME(eddyb) should the methods have e.g. `_lossless_via_convert` in their names? +fn assert_lossless_conversion, T: Float>(src: S) -> T { + let mut loses_info = false; + let status; + let r = unpack!(status=, src.convert(&mut loses_info)); + assert!(!status.intersects(Status::INEXACT) && !loses_info, "Unexpected imprecision"); + r +} + +trait ToF32LosslessViaConvertToSingle: FloatConvert { + fn to_f32(self) -> f32 { + assert_lossless_conversion(self).to_f32() + } +} +impl ToF32LosslessViaConvertToSingle for Half {} +impl ToF32LosslessViaConvertToSingle for BFloat {} +impl ToF32LosslessViaConvertToSingle for Float8E5M2 {} +impl ToF32LosslessViaConvertToSingle for Float8E4M3FN {} + +trait ToF64LosslessViaConvertToDouble: FloatConvert { + fn to_f64(self) -> f64 { + assert_lossless_conversion(self).to_f64() + } +} +impl ToF64LosslessViaConvertToDouble for Single {} +// HACK(eddyb) take advantage of the transitivity of "are conversions lossless". +impl> ToF64LosslessViaConvertToDouble for T {} + #[test] fn is_signaling() { // We test qNaN, -qNaN, +sNaN, -sNaN with and without payloads. @@ -564,6 +625,23 @@ fn fma() { assert!(!loses_info); assert_eq!(4.0, r.to_f32()); } + + // Regression test that failed an assertion. + { + let mut f1 = Single::from_f32(-8.85242279E-41); + let f2 = Single::from_f32(2.0); + let f3 = Single::from_f32(8.85242279E-41); + f1 = f1.mul_add(f2, f3).value; + assert_eq!(-8.85242279E-41, f1.to_f32()); + } + + // Test using only a single instance of APFloat. + { + let mut f = Double::from_f64(1.5); + + f = f.mul_add(f, f).value; + assert_eq!(3.75, f.to_f64()); + } } #[test] @@ -590,29 +668,59 @@ fn max_num() { assert_eq!(1.0, nan.max(f1).to_f64()); } +#[test] +fn minimum() { + let f1 = Double::from_f64(1.0); + let f2 = Double::from_f64(2.0); + let zp = Double::from_f64(0.0); + let zn = Double::from_f64(-0.0); + let nan = Double::NAN; + + assert_eq!(1.0, f1.minimum(f2).to_f64()); + assert_eq!(1.0, f2.minimum(f1).to_f64()); + assert_eq!(-0.0, zp.minimum(zn).to_f64()); + assert_eq!(-0.0, zn.minimum(zp).to_f64()); + assert!(f1.minimum(nan).to_f64().is_nan()); + assert!(nan.minimum(f1).to_f64().is_nan()); +} + +#[test] +fn maximum() { + let f1 = Double::from_f64(1.0); + let f2 = Double::from_f64(2.0); + let zp = Double::from_f64(0.0); + let zn = Double::from_f64(-0.0); + let nan = Double::NAN; + + assert_eq!(2.0, f1.maximum(f2).to_f64()); + assert_eq!(2.0, f2.maximum(f1).to_f64()); + assert_eq!(0.0, zp.maximum(zn).to_f64()); + assert_eq!(0.0, zn.maximum(zp).to_f64()); + assert!(f1.maximum(nan).to_f64().is_nan()); + assert!(nan.maximum(f1).to_f64().is_nan()); +} + #[test] fn denormal() { // Test single precision { - assert!(!Single::from_f32(0.0).is_denormal()); + assert!(!Single::from_u128(0).value.is_denormal()); let mut t = "1.17549435082228750797e-38".parse::().unwrap(); assert!(!t.is_denormal()); - let val2 = Single::from_f32(2.0e0); - t /= val2; + t /= Single::from_u128(2).value; assert!(t.is_denormal()); } // Test double precision { - assert!(!Double::from_f64(0.0).is_denormal()); + assert!(!Double::from_u128(0).value.is_denormal()); let mut t = "2.22507385850720138309e-308".parse::().unwrap(); assert!(!t.is_denormal()); - let val2 = Double::from_f64(2.0e0); - t /= val2; + t /= Double::from_u128(2).value; assert!(t.is_denormal()); } @@ -639,22 +747,69 @@ fn denormal() { } } +#[test] +fn is_smallest_normalized() { + for_each_float_type!(for test::()); + fn test() { + assert!(!F::ZERO.is_smallest_normalized()); + assert!(!(-F::ZERO).is_smallest_normalized()); + + assert!(!F::INFINITY.is_smallest_normalized()); + assert!(!(-F::INFINITY).is_smallest_normalized()); + + assert!(!F::qnan(None).is_smallest_normalized()); + assert!(!F::snan(None).is_smallest_normalized()); + + assert!(!F::largest().is_smallest_normalized()); + assert!(!(-F::largest()).is_smallest_normalized()); + + assert!(!F::SMALLEST.is_smallest_normalized()); + assert!(!(-F::SMALLEST).is_smallest_normalized()); + + assert!(!F::from_bits(!0u128 >> (128 - F::BITS)).is_smallest_normalized()); + + let pos_smallest_normalized = F::smallest_normalized(); + let neg_smallest_normalized = -F::smallest_normalized(); + assert!(pos_smallest_normalized.is_smallest_normalized()); + assert!(neg_smallest_normalized.is_smallest_normalized()); + + for mut val in [pos_smallest_normalized, neg_smallest_normalized] { + let old_sign = val.is_negative(); + + let mut status; + + // Step down, make sure it's still not smallest normalized. + val = unpack!(status=, val.next_down()); + assert_eq!(Status::OK, status); + assert_eq!(old_sign, val.is_negative()); + assert!(!val.is_smallest_normalized()); + assert_eq!(old_sign, val.is_negative()); + + // Step back up should restore it to being smallest normalized. + val = unpack!(status=, val.next_up()); + assert_eq!(Status::OK, status); + assert!(val.is_smallest_normalized()); + assert_eq!(old_sign, val.is_negative()); + + // Step beyond should no longer smallest normalized. + val = unpack!(status=, val.next_up()); + assert_eq!(Status::OK, status); + assert!(!val.is_smallest_normalized()); + assert_eq!(old_sign, val.is_negative()); + } + } +} + #[test] fn decimal_strings_without_null_terminators() { // Make sure that we can parse strings without null terminators. // rdar://14323230. - let val = "0.00"[..3].parse::().unwrap(); - assert_eq!(val.to_f64(), 0.0); - let val = "0.01"[..3].parse::().unwrap(); - assert_eq!(val.to_f64(), 0.0); - let val = "0.09"[..3].parse::().unwrap(); - assert_eq!(val.to_f64(), 0.0); - let val = "0.095"[..4].parse::().unwrap(); - assert_eq!(val.to_f64(), 0.09); - let val = "0.00e+3"[..7].parse::().unwrap(); - assert_eq!(val.to_f64(), 0.00); - let val = "0e+3"[..4].parse::().unwrap(); - assert_eq!(val.to_f64(), 0.00); + assert_eq!("0.00"[..3].parse::().unwrap().to_f64(), 0.0); + assert_eq!("0.01"[..3].parse::().unwrap().to_f64(), 0.0); + assert_eq!("0.09"[..3].parse::().unwrap().to_f64(), 0.0); + assert_eq!("0.095"[..4].parse::().unwrap().to_f64(), 0.09); + assert_eq!("0.00e+3"[..7].parse::().unwrap().to_f64(), 0.00); + assert_eq!("0e+3"[..4].parse::().unwrap().to_f64(), 0.00); } #[test] @@ -850,6 +1005,33 @@ fn from_decimal_string() { assert_eq!(2.05e+12, "002.05000e+12".parse::().unwrap().to_f64()); assert_eq!(2.05e-12, "002.05000e-12".parse::().unwrap().to_f64()); + assert_eq!(1.0, "1e".parse::().unwrap().to_f64()); + assert_eq!(1.0, "+1e".parse::().unwrap().to_f64()); + assert_eq!(-1.0, "-1e".parse::().unwrap().to_f64()); + + assert_eq!(1.0, "1.e".parse::().unwrap().to_f64()); + assert_eq!(1.0, "+1.e".parse::().unwrap().to_f64()); + assert_eq!(-1.0, "-1.e".parse::().unwrap().to_f64()); + + assert_eq!(0.1, ".1e".parse::().unwrap().to_f64()); + assert_eq!(0.1, "+.1e".parse::().unwrap().to_f64()); + assert_eq!(-0.1, "-.1e".parse::().unwrap().to_f64()); + + assert_eq!(1.1, "1.1e".parse::().unwrap().to_f64()); + assert_eq!(1.1, "+1.1e".parse::().unwrap().to_f64()); + assert_eq!(-1.1, "-1.1e".parse::().unwrap().to_f64()); + + assert_eq!(1.0, "1e+".parse::().unwrap().to_f64()); + assert_eq!(1.0, "1e-".parse::().unwrap().to_f64()); + + assert_eq!(0.1, ".1e".parse::().unwrap().to_f64()); + assert_eq!(0.1, ".1e+".parse::().unwrap().to_f64()); + assert_eq!(0.1, ".1e-".parse::().unwrap().to_f64()); + + assert_eq!(1.0, "1.0e".parse::().unwrap().to_f64()); + assert_eq!(1.0, "1.0e+".parse::().unwrap().to_f64()); + assert_eq!(1.0, "1.0e-".parse::().unwrap().to_f64()); + // These are "carefully selected" to overflow the fast log-base // calculations in the implementation. assert!("99e99999".parse::().unwrap().is_infinite()); @@ -860,6 +1042,117 @@ fn from_decimal_string() { assert_eq!(2.71828, "2.71828".parse::().unwrap().to_f64()); } +#[test] +fn from_string_specials() { + let precision = 53; + let payload_bits = precision - 2; + let payload_mask = (1 << payload_bits) - 1; + + let mut nan_payloads = [ + 0, + 1, + 123, + 0xDEADBEEF, + -2i32 as u128, + 1 << payload_bits, // overflow bit + 1 << (payload_bits - 1), // signaling bit + 1 << (payload_bits - 2), // highest possible bit + ]; + + // Convert payload integer to decimal string representation. + let nan_payload_dec_strings: Vec<_> = nan_payloads.iter().map(|payload| format!("{payload}")).collect(); + + // Convert payload integer to hexadecimal string representation. + let nan_payload_hex_strings: Vec<_> = nan_payloads.iter().map(|payload| format!("{payload:#x}")).collect(); + + // Fix payloads to expected result. + for payload in &mut nan_payloads { + *payload &= payload_mask; + } + + // Signaling NaN must have a non-zero payload. In case a zero payload is + // requested, a default arbitrary payload is set instead. Save this payload + // for testing. + let snan_default_payload = Double::snan(None).to_bits() & payload_mask; + + // Negative sign prefix (or none - for positive). + let signs = ["", "-"]; + + // "Signaling" prefix (or none - for "Quiet"). + let nan_types = ["", "s", "S"]; + + let nan_strings = ["nan", "NaN"]; + for nan_str in nan_strings { + for type_str in nan_types { + let signaling = matches!(type_str, "s" | "S"); + + for j in 0..nan_payloads.len() { + let payload = if signaling && nan_payloads[j] == 0 { + snan_default_payload + } else { + nan_payloads[j] + }; + let payload_dec = &nan_payload_dec_strings[j]; + let payload_hex = &nan_payload_hex_strings[j]; + + for sign_str in signs { + let negative = sign_str == "-"; + + let prefix = format!("{sign_str}{type_str}{nan_str}"); + + let test_strings = [ + // Test without any paylod. + (payload == 0).then(|| prefix.clone()), + // Test with the payload as a suffix. + Some(format!("{prefix}{payload_dec}")), + Some(format!("{prefix}{payload_hex}")), + // Test with the payload inside parentheses. + Some(format!("{prefix}({payload_dec})")), + Some(format!("{prefix}({payload_hex})")), + ] + .into_iter() + .flatten(); + + for test_str in test_strings { + let f = test_str + .parse::() + .map_err(|e| format!("{test_str:?}: {e:?}")) + .unwrap(); + assert!(f.is_nan()); + assert_eq!(signaling, f.is_signaling()); + assert_eq!(negative, f.is_negative()); + assert_eq!(payload, f.to_bits() & payload_mask); + } + } + } + } + } + + let inf_strings = ["inf", "INFINITY", "+Inf", "-inf", "-INFINITY", "-Inf"]; + for &inf_str in &inf_strings { + let negative = inf_str.starts_with('-'); + + let f = inf_str.parse::().unwrap(); + assert!(f.is_infinite()); + assert_eq!(negative, f.is_negative()); + assert_eq!(0, f.to_bits() & payload_mask); + } +} + +#[test] +fn from_to_string_specials() { + assert_eq!("+Inf", "+Inf".parse::().unwrap().to_string()); + assert_eq!("+Inf", "INFINITY".parse::().unwrap().to_string()); + assert_eq!("+Inf", "inf".parse::().unwrap().to_string()); + assert_eq!("-Inf", "-Inf".parse::().unwrap().to_string()); + assert_eq!("-Inf", "-INFINITY".parse::().unwrap().to_string()); + assert_eq!("-Inf", "-inf".parse::().unwrap().to_string()); + assert_eq!("NaN", "NaN".parse::().unwrap().to_string()); + assert_eq!("NaN", "nan".parse::().unwrap().to_string()); + assert_eq!("NaN", "-NaN".parse::().unwrap().to_string()); + assert_eq!("NaN", "-nan".parse::().unwrap().to_string()); +} + #[test] fn from_hexadecimal_string() { assert_eq!(1.0, "0x1p0".parse::().unwrap().to_f64()); @@ -989,6 +1282,7 @@ fn to_string() { assert_eq!("873.18340000000001", to_string(873.1834, 0, 1)); assert_eq!("8.73183400000000010e+02", to_string(873.1834, 0, 0)); assert_eq!("1.79769313486231570e+308", to_string(1.7976931348623157E+308, 0, 0)); + assert_eq!("NaN", X87DoubleExtended::from_bits(1 << 64).to_string()); } #[test] @@ -1058,11 +1352,11 @@ fn to_integer() { #[test] fn nan() { - fn nanbits(signaling: bool, negative: bool, fill: u128) -> u128 { + fn nanbits_from_u128(signaling: bool, negative: bool, payload: u128) -> u128 { let x = if signaling { - T::snan(Some(fill)) + F::snan(Some(payload)) } else { - T::qnan(Some(fill)) + F::qnan(Some(payload)) }; if negative { (-x).to_bits() @@ -1071,216 +1365,201 @@ fn nan() { } } - assert_eq!(0x7fc00000, nanbits::(false, false, 0)); - assert_eq!(0xffc00000, nanbits::(false, true, 0)); - assert_eq!(0x7fc0ae72, nanbits::(false, false, 0xae72)); - assert_eq!(0x7fffae72, nanbits::(false, false, 0xffffae72)); - assert_eq!(0x7fa00000, nanbits::(true, false, 0)); - assert_eq!(0xffa00000, nanbits::(true, true, 0)); - assert_eq!(0x7f80ae72, nanbits::(true, false, 0xae72)); - assert_eq!(0x7fbfae72, nanbits::(true, false, 0xffffae72)); - - assert_eq!(0x7ff8000000000000, nanbits::(false, false, 0)); - assert_eq!(0xfff8000000000000, nanbits::(false, true, 0)); - assert_eq!(0x7ff800000000ae72, nanbits::(false, false, 0xae72)); - assert_eq!(0x7fffffffffffae72, nanbits::(false, false, 0xffffffffffffae72)); - assert_eq!(0x7ff4000000000000, nanbits::(true, false, 0)); - assert_eq!(0xfff4000000000000, nanbits::(true, true, 0)); - assert_eq!(0x7ff000000000ae72, nanbits::(true, false, 0xae72)); - assert_eq!(0x7ff7ffffffffae72, nanbits::(true, false, 0xffffffffffffae72)); -} - -#[test] -fn string_decimal_death() { - assert_eq!("".parse::(), Err(ParseError("Invalid string length"))); - assert_eq!("+".parse::(), Err(ParseError("String has no digits"))); - assert_eq!("-".parse::(), Err(ParseError("String has no digits"))); - - assert_eq!("\0".parse::(), Err(ParseError("Invalid character in significand"))); - assert_eq!("1\0".parse::(), Err(ParseError("Invalid character in significand"))); - assert_eq!("1\02".parse::(), Err(ParseError("Invalid character in significand"))); - assert_eq!("1\02e1".parse::(), Err(ParseError("Invalid character in significand"))); - assert_eq!("1e\0".parse::(), Err(ParseError("Invalid character in exponent"))); - assert_eq!("1e1\0".parse::(), Err(ParseError("Invalid character in exponent"))); - assert_eq!("1e1\02".parse::(), Err(ParseError("Invalid character in exponent"))); - - assert_eq!("1.0f".parse::(), Err(ParseError("Invalid character in significand"))); - - assert_eq!("..".parse::(), Err(ParseError("String contains multiple dots"))); - assert_eq!("..0".parse::(), Err(ParseError("String contains multiple dots"))); - assert_eq!("1.0.0".parse::(), Err(ParseError("String contains multiple dots"))); + let tests_single = [ + // expected SNaN Neg payload + (0x7fc00000, false, false, 0x00000000), + (0xffc00000, false, true, 0x00000000), + (0x7fc0ae72, false, false, 0x0000ae72), + (0x7fffae72, false, false, 0xffffae72), + (0x7fdaae72, false, false, 0x00daae72), + (0x7fa00000, true, false, 0x00000000), + (0xffa00000, true, true, 0x00000000), + (0x7f80ae72, true, false, 0x0000ae72), + (0x7fbfae72, true, false, 0xffffae72), + (0x7f9aae72, true, false, 0x001aae72), + ]; + let tests_double = [ + // expected SNaN Neg payload + (0x7ff8000000000000, false, false, 0x0000000000000000), + (0xfff8000000000000, false, true, 0x0000000000000000), + (0x7ff800000000ae72, false, false, 0x000000000000ae72), + (0x7fffffffffffae72, false, false, 0xffffffffffffae72), + (0x7ffdaaaaaaaaae72, false, false, 0x000daaaaaaaaae72), + (0x7ff4000000000000, true, false, 0x0000000000000000), + (0xfff4000000000000, true, true, 0x0000000000000000), + (0x7ff000000000ae72, true, false, 0x000000000000ae72), + (0x7ff7ffffffffae72, true, false, 0xffffffffffffae72), + (0x7ff1aaaaaaaaae72, true, false, 0x0001aaaaaaaaae72), + ]; + for (expected, signaling, negative, payload) in tests_single { + assert_eq!(expected, nanbits_from_u128::(signaling, negative, payload)); + } + for (expected, signaling, negative, payload) in tests_double { + assert_eq!(expected, nanbits_from_u128::(signaling, negative, payload)); + } } #[test] -fn string_decimal_significand_death() { - assert_eq!(".".parse::(), Err(ParseError("Significand has no digits"))); - assert_eq!("+.".parse::(), Err(ParseError("Significand has no digits"))); - assert_eq!("-.".parse::(), Err(ParseError("Significand has no digits"))); - - assert_eq!("e".parse::(), Err(ParseError("Significand has no digits"))); - assert_eq!("+e".parse::(), Err(ParseError("Significand has no digits"))); - assert_eq!("-e".parse::(), Err(ParseError("Significand has no digits"))); - - assert_eq!("e1".parse::(), Err(ParseError("Significand has no digits"))); - assert_eq!("+e1".parse::(), Err(ParseError("Significand has no digits"))); - assert_eq!("-e1".parse::(), Err(ParseError("Significand has no digits"))); - - assert_eq!(".e1".parse::(), Err(ParseError("Significand has no digits"))); - assert_eq!("+.e1".parse::(), Err(ParseError("Significand has no digits"))); - assert_eq!("-.e1".parse::(), Err(ParseError("Significand has no digits"))); - - assert_eq!(".e".parse::(), Err(ParseError("Significand has no digits"))); - assert_eq!("+.e".parse::(), Err(ParseError("Significand has no digits"))); - assert_eq!("-.e".parse::(), Err(ParseError("Significand has no digits"))); +fn string_decimal_error() { + assert_eq!("Invalid string length", "".parse::().unwrap_err().0); + assert_eq!("String has no digits", "+".parse::().unwrap_err().0); + assert_eq!("String has no digits", "-".parse::().unwrap_err().0); + + assert_eq!("Invalid character in significand", "\0".parse::().unwrap_err().0); + assert_eq!("Invalid character in significand", "1\0".parse::().unwrap_err().0); + assert_eq!("Invalid character in significand", "1\02".parse::().unwrap_err().0); + assert_eq!("Invalid character in significand", "1\02e1".parse::().unwrap_err().0); + assert_eq!("Invalid character in exponent", "1e\0".parse::().unwrap_err().0); + assert_eq!("Invalid character in exponent", "1e1\0".parse::().unwrap_err().0); + assert_eq!("Invalid character in exponent", "1e1\02".parse::().unwrap_err().0); + + assert_eq!("Invalid character in significand", "1.0f".parse::().unwrap_err().0); + + assert_eq!("String contains multiple dots", "..".parse::().unwrap_err().0); + assert_eq!("String contains multiple dots", "..0".parse::().unwrap_err().0); + assert_eq!("String contains multiple dots", "1.0.0".parse::().unwrap_err().0); } #[test] -fn string_decimal_exponent_death() { - assert_eq!("1e".parse::(), Err(ParseError("Exponent has no digits"))); - assert_eq!("+1e".parse::(), Err(ParseError("Exponent has no digits"))); - assert_eq!("-1e".parse::(), Err(ParseError("Exponent has no digits"))); - - assert_eq!("1.e".parse::(), Err(ParseError("Exponent has no digits"))); - assert_eq!("+1.e".parse::(), Err(ParseError("Exponent has no digits"))); - assert_eq!("-1.e".parse::(), Err(ParseError("Exponent has no digits"))); - - assert_eq!(".1e".parse::(), Err(ParseError("Exponent has no digits"))); - assert_eq!("+.1e".parse::(), Err(ParseError("Exponent has no digits"))); - assert_eq!("-.1e".parse::(), Err(ParseError("Exponent has no digits"))); - - assert_eq!("1.1e".parse::(), Err(ParseError("Exponent has no digits"))); - assert_eq!("+1.1e".parse::(), Err(ParseError("Exponent has no digits"))); - assert_eq!("-1.1e".parse::(), Err(ParseError("Exponent has no digits"))); - - assert_eq!("1e+".parse::(), Err(ParseError("Exponent has no digits"))); - assert_eq!("1e-".parse::(), Err(ParseError("Exponent has no digits"))); - - assert_eq!(".1e".parse::(), Err(ParseError("Exponent has no digits"))); - assert_eq!(".1e+".parse::(), Err(ParseError("Exponent has no digits"))); - assert_eq!(".1e-".parse::(), Err(ParseError("Exponent has no digits"))); - - assert_eq!("1.0e".parse::(), Err(ParseError("Exponent has no digits"))); - assert_eq!("1.0e+".parse::(), Err(ParseError("Exponent has no digits"))); - assert_eq!("1.0e-".parse::(), Err(ParseError("Exponent has no digits"))); +fn string_decimal_significand_error() { + assert_eq!("Significand has no digits", ".".parse::().unwrap_err().0); + assert_eq!("Significand has no digits", "+.".parse::().unwrap_err().0); + assert_eq!("Significand has no digits", "-.".parse::().unwrap_err().0); + + assert_eq!("Significand has no digits", "e".parse::().unwrap_err().0); + assert_eq!("Significand has no digits", "+e".parse::().unwrap_err().0); + assert_eq!("Significand has no digits", "-e".parse::().unwrap_err().0); + + assert_eq!("Significand has no digits", "e1".parse::().unwrap_err().0); + assert_eq!("Significand has no digits", "+e1".parse::().unwrap_err().0); + assert_eq!("Significand has no digits", "-e1".parse::().unwrap_err().0); + + assert_eq!("Significand has no digits", ".e1".parse::().unwrap_err().0); + assert_eq!("Significand has no digits", "+.e1".parse::().unwrap_err().0); + assert_eq!("Significand has no digits", "-.e1".parse::().unwrap_err().0); + + assert_eq!("Significand has no digits", ".e".parse::().unwrap_err().0); + assert_eq!("Significand has no digits", "+.e".parse::().unwrap_err().0); + assert_eq!("Significand has no digits", "-.e".parse::().unwrap_err().0); } #[test] -fn string_hexadecimal_death() { - assert_eq!("0x".parse::(), Err(ParseError("Invalid string"))); - assert_eq!("+0x".parse::(), Err(ParseError("Invalid string"))); - assert_eq!("-0x".parse::(), Err(ParseError("Invalid string"))); - - assert_eq!("0x0".parse::(), Err(ParseError("Hex strings require an exponent"))); - assert_eq!("+0x0".parse::(), Err(ParseError("Hex strings require an exponent"))); - assert_eq!("-0x0".parse::(), Err(ParseError("Hex strings require an exponent"))); - - assert_eq!("0x0.".parse::(), Err(ParseError("Hex strings require an exponent"))); - assert_eq!("+0x0.".parse::(), Err(ParseError("Hex strings require an exponent"))); - assert_eq!("-0x0.".parse::(), Err(ParseError("Hex strings require an exponent"))); - - assert_eq!("0x.0".parse::(), Err(ParseError("Hex strings require an exponent"))); - assert_eq!("+0x.0".parse::(), Err(ParseError("Hex strings require an exponent"))); - assert_eq!("-0x.0".parse::(), Err(ParseError("Hex strings require an exponent"))); - - assert_eq!("0x0.0".parse::(), Err(ParseError("Hex strings require an exponent"))); - assert_eq!("+0x0.0".parse::(), Err(ParseError("Hex strings require an exponent"))); - assert_eq!("-0x0.0".parse::(), Err(ParseError("Hex strings require an exponent"))); - - assert_eq!("0x\0".parse::(), Err(ParseError("Invalid character in significand"))); - assert_eq!("0x1\0".parse::(), Err(ParseError("Invalid character in significand"))); - assert_eq!("0x1\02".parse::(), Err(ParseError("Invalid character in significand"))); - assert_eq!("0x1\02p1".parse::(), Err(ParseError("Invalid character in significand"))); - assert_eq!("0x1p\0".parse::(), Err(ParseError("Invalid character in exponent"))); - assert_eq!("0x1p1\0".parse::(), Err(ParseError("Invalid character in exponent"))); - assert_eq!("0x1p1\02".parse::(), Err(ParseError("Invalid character in exponent"))); - - assert_eq!("0x1p0f".parse::(), Err(ParseError("Invalid character in exponent"))); - - assert_eq!("0x..p1".parse::(), Err(ParseError("String contains multiple dots"))); - assert_eq!("0x..0p1".parse::(), Err(ParseError("String contains multiple dots"))); - assert_eq!("0x1.0.0p1".parse::(), Err(ParseError("String contains multiple dots"))); +fn string_hexadecimal_error() { + assert_eq!("Invalid string", "0x".parse::().unwrap_err().0); + assert_eq!("Invalid string", "+0x".parse::().unwrap_err().0); + assert_eq!("Invalid string", "-0x".parse::().unwrap_err().0); + + assert_eq!("Hex strings require an exponent", "0x0".parse::().unwrap_err().0); + assert_eq!("Hex strings require an exponent", "+0x0".parse::().unwrap_err().0); + assert_eq!("Hex strings require an exponent", "-0x0".parse::().unwrap_err().0); + + assert_eq!("Hex strings require an exponent", "0x0.".parse::().unwrap_err().0); + assert_eq!("Hex strings require an exponent", "+0x0.".parse::().unwrap_err().0); + assert_eq!("Hex strings require an exponent", "-0x0.".parse::().unwrap_err().0); + + assert_eq!("Hex strings require an exponent", "0x.0".parse::().unwrap_err().0); + assert_eq!("Hex strings require an exponent", "+0x.0".parse::().unwrap_err().0); + assert_eq!("Hex strings require an exponent", "-0x.0".parse::().unwrap_err().0); + + assert_eq!("Hex strings require an exponent", "0x0.0".parse::().unwrap_err().0); + assert_eq!("Hex strings require an exponent", "+0x0.0".parse::().unwrap_err().0); + assert_eq!("Hex strings require an exponent", "-0x0.0".parse::().unwrap_err().0); + + assert_eq!("Invalid character in significand", "0x\0".parse::().unwrap_err().0); + assert_eq!("Invalid character in significand", "0x1\0".parse::().unwrap_err().0); + assert_eq!("Invalid character in significand", "0x1\02".parse::().unwrap_err().0); + assert_eq!("Invalid character in significand", "0x1\02p1".parse::().unwrap_err().0); + assert_eq!("Invalid character in exponent", "0x1p\0".parse::().unwrap_err().0); + assert_eq!("Invalid character in exponent", "0x1p1\0".parse::().unwrap_err().0); + assert_eq!("Invalid character in exponent", "0x1p1\02".parse::().unwrap_err().0); + + assert_eq!("Invalid character in exponent", "0x1p0f".parse::().unwrap_err().0); + + assert_eq!("String contains multiple dots", "0x..p1".parse::().unwrap_err().0); + assert_eq!("String contains multiple dots", "0x..0p1".parse::().unwrap_err().0); + assert_eq!("String contains multiple dots", "0x1.0.0p1".parse::().unwrap_err().0); } #[test] -fn string_hexadecimal_significand_death() { - assert_eq!("0x.".parse::(), Err(ParseError("Significand has no digits"))); - assert_eq!("+0x.".parse::(), Err(ParseError("Significand has no digits"))); - assert_eq!("-0x.".parse::(), Err(ParseError("Significand has no digits"))); - - assert_eq!("0xp".parse::(), Err(ParseError("Significand has no digits"))); - assert_eq!("+0xp".parse::(), Err(ParseError("Significand has no digits"))); - assert_eq!("-0xp".parse::(), Err(ParseError("Significand has no digits"))); - - assert_eq!("0xp+".parse::(), Err(ParseError("Significand has no digits"))); - assert_eq!("+0xp+".parse::(), Err(ParseError("Significand has no digits"))); - assert_eq!("-0xp+".parse::(), Err(ParseError("Significand has no digits"))); - - assert_eq!("0xp-".parse::(), Err(ParseError("Significand has no digits"))); - assert_eq!("+0xp-".parse::(), Err(ParseError("Significand has no digits"))); - assert_eq!("-0xp-".parse::(), Err(ParseError("Significand has no digits"))); - - assert_eq!("0x.p".parse::(), Err(ParseError("Significand has no digits"))); - assert_eq!("+0x.p".parse::(), Err(ParseError("Significand has no digits"))); - assert_eq!("-0x.p".parse::(), Err(ParseError("Significand has no digits"))); - - assert_eq!("0x.p+".parse::(), Err(ParseError("Significand has no digits"))); - assert_eq!("+0x.p+".parse::(), Err(ParseError("Significand has no digits"))); - assert_eq!("-0x.p+".parse::(), Err(ParseError("Significand has no digits"))); - - assert_eq!("0x.p-".parse::(), Err(ParseError("Significand has no digits"))); - assert_eq!("+0x.p-".parse::(), Err(ParseError("Significand has no digits"))); - assert_eq!("-0x.p-".parse::(), Err(ParseError("Significand has no digits"))); +fn string_hexadecimal_significand_error() { + assert_eq!("Significand has no digits", "0x.".parse::().unwrap_err().0); + assert_eq!("Significand has no digits", "+0x.".parse::().unwrap_err().0); + assert_eq!("Significand has no digits", "-0x.".parse::().unwrap_err().0); + + assert_eq!("Significand has no digits", "0xp".parse::().unwrap_err().0); + assert_eq!("Significand has no digits", "+0xp".parse::().unwrap_err().0); + assert_eq!("Significand has no digits", "-0xp".parse::().unwrap_err().0); + + assert_eq!("Significand has no digits", "0xp+".parse::().unwrap_err().0); + assert_eq!("Significand has no digits", "+0xp+".parse::().unwrap_err().0); + assert_eq!("Significand has no digits", "-0xp+".parse::().unwrap_err().0); + + assert_eq!("Significand has no digits", "0xp-".parse::().unwrap_err().0); + assert_eq!("Significand has no digits", "+0xp-".parse::().unwrap_err().0); + assert_eq!("Significand has no digits", "-0xp-".parse::().unwrap_err().0); + + assert_eq!("Significand has no digits", "0x.p".parse::().unwrap_err().0); + assert_eq!("Significand has no digits", "+0x.p".parse::().unwrap_err().0); + assert_eq!("Significand has no digits", "-0x.p".parse::().unwrap_err().0); + + assert_eq!("Significand has no digits", "0x.p+".parse::().unwrap_err().0); + assert_eq!("Significand has no digits", "+0x.p+".parse::().unwrap_err().0); + assert_eq!("Significand has no digits", "-0x.p+".parse::().unwrap_err().0); + + assert_eq!("Significand has no digits", "0x.p-".parse::().unwrap_err().0); + assert_eq!("Significand has no digits", "+0x.p-".parse::().unwrap_err().0); + assert_eq!("Significand has no digits", "-0x.p-".parse::().unwrap_err().0); } #[test] -fn string_hexadecimal_exponent_death() { - assert_eq!("0x1p".parse::(), Err(ParseError("Exponent has no digits"))); - assert_eq!("+0x1p".parse::(), Err(ParseError("Exponent has no digits"))); - assert_eq!("-0x1p".parse::(), Err(ParseError("Exponent has no digits"))); - - assert_eq!("0x1p+".parse::(), Err(ParseError("Exponent has no digits"))); - assert_eq!("+0x1p+".parse::(), Err(ParseError("Exponent has no digits"))); - assert_eq!("-0x1p+".parse::(), Err(ParseError("Exponent has no digits"))); - - assert_eq!("0x1p-".parse::(), Err(ParseError("Exponent has no digits"))); - assert_eq!("+0x1p-".parse::(), Err(ParseError("Exponent has no digits"))); - assert_eq!("-0x1p-".parse::(), Err(ParseError("Exponent has no digits"))); - - assert_eq!("0x1.p".parse::(), Err(ParseError("Exponent has no digits"))); - assert_eq!("+0x1.p".parse::(), Err(ParseError("Exponent has no digits"))); - assert_eq!("-0x1.p".parse::(), Err(ParseError("Exponent has no digits"))); - - assert_eq!("0x1.p+".parse::(), Err(ParseError("Exponent has no digits"))); - assert_eq!("+0x1.p+".parse::(), Err(ParseError("Exponent has no digits"))); - assert_eq!("-0x1.p+".parse::(), Err(ParseError("Exponent has no digits"))); - - assert_eq!("0x1.p-".parse::(), Err(ParseError("Exponent has no digits"))); - assert_eq!("+0x1.p-".parse::(), Err(ParseError("Exponent has no digits"))); - assert_eq!("-0x1.p-".parse::(), Err(ParseError("Exponent has no digits"))); - - assert_eq!("0x.1p".parse::(), Err(ParseError("Exponent has no digits"))); - assert_eq!("+0x.1p".parse::(), Err(ParseError("Exponent has no digits"))); - assert_eq!("-0x.1p".parse::(), Err(ParseError("Exponent has no digits"))); - - assert_eq!("0x.1p+".parse::(), Err(ParseError("Exponent has no digits"))); - assert_eq!("+0x.1p+".parse::(), Err(ParseError("Exponent has no digits"))); - assert_eq!("-0x.1p+".parse::(), Err(ParseError("Exponent has no digits"))); - - assert_eq!("0x.1p-".parse::(), Err(ParseError("Exponent has no digits"))); - assert_eq!("+0x.1p-".parse::(), Err(ParseError("Exponent has no digits"))); - assert_eq!("-0x.1p-".parse::(), Err(ParseError("Exponent has no digits"))); - - assert_eq!("0x1.1p".parse::(), Err(ParseError("Exponent has no digits"))); - assert_eq!("+0x1.1p".parse::(), Err(ParseError("Exponent has no digits"))); - assert_eq!("-0x1.1p".parse::(), Err(ParseError("Exponent has no digits"))); - - assert_eq!("0x1.1p+".parse::(), Err(ParseError("Exponent has no digits"))); - assert_eq!("+0x1.1p+".parse::(), Err(ParseError("Exponent has no digits"))); - assert_eq!("-0x1.1p+".parse::(), Err(ParseError("Exponent has no digits"))); - - assert_eq!("0x1.1p-".parse::(), Err(ParseError("Exponent has no digits"))); - assert_eq!("+0x1.1p-".parse::(), Err(ParseError("Exponent has no digits"))); - assert_eq!("-0x1.1p-".parse::(), Err(ParseError("Exponent has no digits"))); +fn string_hexadecimal_exponent_error() { + assert_eq!("Exponent has no digits", "0x1p".parse::().unwrap_err().0); + assert_eq!("Exponent has no digits", "+0x1p".parse::().unwrap_err().0); + assert_eq!("Exponent has no digits", "-0x1p".parse::().unwrap_err().0); + + assert_eq!("Exponent has no digits", "0x1p+".parse::().unwrap_err().0); + assert_eq!("Exponent has no digits", "+0x1p+".parse::().unwrap_err().0); + assert_eq!("Exponent has no digits", "-0x1p+".parse::().unwrap_err().0); + + assert_eq!("Exponent has no digits", "0x1p-".parse::().unwrap_err().0); + assert_eq!("Exponent has no digits", "+0x1p-".parse::().unwrap_err().0); + assert_eq!("Exponent has no digits", "-0x1p-".parse::().unwrap_err().0); + + assert_eq!("Exponent has no digits", "0x1.p".parse::().unwrap_err().0); + assert_eq!("Exponent has no digits", "+0x1.p".parse::().unwrap_err().0); + assert_eq!("Exponent has no digits", "-0x1.p".parse::().unwrap_err().0); + + assert_eq!("Exponent has no digits", "0x1.p+".parse::().unwrap_err().0); + assert_eq!("Exponent has no digits", "+0x1.p+".parse::().unwrap_err().0); + assert_eq!("Exponent has no digits", "-0x1.p+".parse::().unwrap_err().0); + + assert_eq!("Exponent has no digits", "0x1.p-".parse::().unwrap_err().0); + assert_eq!("Exponent has no digits", "+0x1.p-".parse::().unwrap_err().0); + assert_eq!("Exponent has no digits", "-0x1.p-".parse::().unwrap_err().0); + + assert_eq!("Exponent has no digits", "0x.1p".parse::().unwrap_err().0); + assert_eq!("Exponent has no digits", "+0x.1p".parse::().unwrap_err().0); + assert_eq!("Exponent has no digits", "-0x.1p".parse::().unwrap_err().0); + + assert_eq!("Exponent has no digits", "0x.1p+".parse::().unwrap_err().0); + assert_eq!("Exponent has no digits", "+0x.1p+".parse::().unwrap_err().0); + assert_eq!("Exponent has no digits", "-0x.1p+".parse::().unwrap_err().0); + + assert_eq!("Exponent has no digits", "0x.1p-".parse::().unwrap_err().0); + assert_eq!("Exponent has no digits", "+0x.1p-".parse::().unwrap_err().0); + assert_eq!("Exponent has no digits", "-0x.1p-".parse::().unwrap_err().0); + + assert_eq!("Exponent has no digits", "0x1.1p".parse::().unwrap_err().0); + assert_eq!("Exponent has no digits", "+0x1.1p".parse::().unwrap_err().0); + assert_eq!("Exponent has no digits", "-0x1.1p".parse::().unwrap_err().0); + + assert_eq!("Exponent has no digits", "0x1.1p+".parse::().unwrap_err().0); + assert_eq!("Exponent has no digits", "+0x1.1p+".parse::().unwrap_err().0); + assert_eq!("Exponent has no digits", "-0x1.1p+".parse::().unwrap_err().0); + + assert_eq!("Exponent has no digits", "0x1.1p-".parse::().unwrap_err().0); + assert_eq!("Exponent has no digits", "+0x1.1p-".parse::().unwrap_err().0); + assert_eq!("Exponent has no digits", "-0x1.1p-".parse::().unwrap_err().0); } #[test] @@ -1351,6 +1630,104 @@ fn round_to_integral() { assert!(p.to_f64().is_infinite() && p.to_f64() > 0.0); let p = (-Double::INFINITY).round_to_integral(Round::TowardZero).value; assert!(p.to_f64().is_infinite() && p.to_f64() < 0.0); + + let mut status; + + let p = unpack!(status=, Double::NAN.round_to_integral(Round::TowardZero)); + assert!(p.is_nan()); + assert!(!p.is_negative()); + assert_eq!(Status::OK, status); + + let p = unpack!(status=, (-Double::NAN).round_to_integral(Round::TowardZero)); + assert!(p.is_nan()); + assert!(p.is_negative()); + assert_eq!(Status::OK, status); + + let p = unpack!(status=, Double::snan(None).round_to_integral(Round::TowardZero)); + assert!(p.is_nan()); + assert!(!p.is_signaling()); + assert!(!p.is_negative()); + assert_eq!(Status::INVALID_OP, status); + + let p = unpack!(status=, (-Double::snan(None)).round_to_integral(Round::TowardZero)); + assert!(p.is_nan()); + assert!(!p.is_signaling()); + assert!(p.is_negative()); + assert_eq!(Status::INVALID_OP, status); + + let p = unpack!(status=, Double::INFINITY.round_to_integral(Round::TowardZero)); + assert!(p.is_infinite()); + assert!(!p.is_negative()); + assert_eq!(Status::OK, status); + + let p = unpack!(status=, (-Double::INFINITY).round_to_integral(Round::TowardZero)); + assert!(p.is_infinite()); + assert!(p.is_negative()); + assert_eq!(Status::OK, status); + + let p = unpack!(status=, Double::ZERO.round_to_integral(Round::TowardZero)); + assert!(p.is_zero()); + assert!(!p.is_negative()); + assert_eq!(Status::OK, status); + + let p = unpack!(status=, Double::ZERO.round_to_integral(Round::TowardNegative)); + assert!(p.is_zero()); + assert!(!p.is_negative()); + assert_eq!(Status::OK, status); + + let p = unpack!(status=, (-Double::ZERO).round_to_integral(Round::TowardZero)); + assert!(p.is_zero()); + assert!(p.is_negative()); + assert_eq!(Status::OK, status); + + let p = unpack!(status=, (-Double::ZERO).round_to_integral(Round::TowardNegative)); + assert!(p.is_zero()); + assert!(p.is_negative()); + assert_eq!(Status::OK, status); + + let p = unpack!(status=, Double::from_f64(1E-100).round_to_integral(Round::TowardNegative)); + assert!(p.is_zero()); + assert!(!p.is_negative()); + assert_eq!(Status::INEXACT, status); + + let p = unpack!(status=, Double::from_f64(1E-100).round_to_integral(Round::TowardPositive)); + assert_eq!(1.0, p.to_f64()); + assert!(!p.is_negative()); + assert_eq!(Status::INEXACT, status); + + let p = unpack!(status=, Double::from_f64(-1E-100).round_to_integral(Round::TowardNegative)); + assert!(p.is_negative()); + assert_eq!(-1.0, p.to_f64()); + assert_eq!(Status::INEXACT, status); + + let p = unpack!(status=, Double::from_f64(-1E-100).round_to_integral(Round::TowardPositive)); + assert!(p.is_zero()); + assert!(p.is_negative()); + assert_eq!(Status::INEXACT, status); + + let p = unpack!(status=, Double::from_f64(10.0).round_to_integral(Round::TowardZero)); + assert_eq!(10.0, p.to_f64()); + assert_eq!(Status::OK, status); + + let p = unpack!(status=, Double::from_f64(10.5).round_to_integral(Round::TowardZero)); + assert_eq!(10.0, p.to_f64()); + assert_eq!(Status::INEXACT, status); + + let p = unpack!(status=, Double::from_f64(10.5).round_to_integral(Round::TowardPositive)); + assert_eq!(11.0, p.to_f64()); + assert_eq!(Status::INEXACT, status); + + let p = unpack!(status=, Double::from_f64(10.5).round_to_integral(Round::TowardNegative)); + assert_eq!(10.0, p.to_f64()); + assert_eq!(Status::INEXACT, status); + + let p = unpack!(status=, Double::from_f64(10.5).round_to_integral(Round::NearestTiesToAway)); + assert_eq!(11.0, p.to_f64()); + assert_eq!(Status::INEXACT, status); + + let p = unpack!(status=, Double::from_f64(10.5).round_to_integral(Round::NearestTiesToEven)); + assert_eq!(10.0, p.to_f64()); + assert_eq!(Status::INEXACT, status); } #[test] @@ -1373,6 +1750,7 @@ fn is_integer() { fn largest() { assert_eq!(3.402823466e+38, Single::largest().to_f32()); assert_eq!(1.7976931348623158e+308, Double::largest().to_f64()); + assert_eq!(448.0, Float8E4M3FN::largest().to_f64()); } #[test] @@ -1414,6 +1792,7 @@ fn smallest_normalized() { assert!(test.is_finite_non_zero()); assert!(!test.is_denormal()); assert!(test.bitwise_eq(expected)); + assert!(test.is_smallest_normalized()); let test = -Single::smallest_normalized(); let expected = "-0x1p-126".parse::().unwrap(); @@ -1421,6 +1800,23 @@ fn smallest_normalized() { assert!(test.is_finite_non_zero()); assert!(!test.is_denormal()); assert!(test.bitwise_eq(expected)); + assert!(test.is_smallest_normalized()); + + let test = Double::smallest_normalized(); + let expected = "0x1p-1022".parse::().unwrap(); + assert!(!test.is_negative()); + assert!(test.is_finite_non_zero()); + assert!(!test.is_denormal()); + assert!(test.bitwise_eq(expected)); + assert!(test.is_smallest_normalized()); + + let test = -Double::smallest_normalized(); + let expected = "-0x1p-1022".parse::().unwrap(); + assert!(test.is_negative()); + assert!(test.is_finite_non_zero()); + assert!(!test.is_denormal()); + assert!(test.bitwise_eq(expected)); + assert!(test.is_smallest_normalized()); let test = Quad::smallest_normalized(); let expected = "0x1p-16382".parse::().unwrap(); @@ -1428,6 +1824,7 @@ fn smallest_normalized() { assert!(test.is_finite_non_zero()); assert!(!test.is_denormal()); assert!(test.bitwise_eq(expected)); + assert!(test.is_smallest_normalized()); let test = -Quad::smallest_normalized(); let expected = "-0x1p-16382".parse::().unwrap(); @@ -1435,6 +1832,7 @@ fn smallest_normalized() { assert!(test.is_finite_non_zero()); assert!(!test.is_denormal()); assert!(test.bitwise_eq(expected)); + assert!(test.is_smallest_normalized()); } #[test] @@ -1447,10 +1845,10 @@ fn zero() { assert_eq!(-0.0, Double::from_f64(-0.0).to_f64()); assert!(Double::from_f64(-0.0).is_negative()); - fn test(sign: bool, bits: u128) { - let test = if sign { -T::ZERO } else { T::ZERO }; + fn test(sign: bool, bits: u128) { + let test = if sign { -F::ZERO } else { F::ZERO }; let pattern = if sign { "-0x0p+0" } else { "0x0p+0" }; - let expected = pattern.parse::().unwrap(); + let expected = pattern.parse::().unwrap(); assert!(test.is_zero()); assert_eq!(sign, test.is_negative()); assert!(test.bitwise_eq(expected)); @@ -1466,6 +1864,10 @@ fn zero() { test::(true, 0x8000000000000000_0000000000000000); test::(false, 0); test::(true, 0x8000_0000000000000000); + test::(false, 0); + test::(true, 0x80); + test::(false, 0); + test::(true, 0x80); } #[test] @@ -1479,6 +1881,8 @@ fn copy_sign() { #[test] fn convert() { let mut loses_info = false; + let mut status; + let test = "1.0".parse::().unwrap(); let test: Single = test.convert(&mut loses_info).value; assert_eq!(1.0, test.to_f32()); @@ -1504,10 +1908,11 @@ fn convert() { assert!(!loses_info); let test = Single::snan(None); - let x87_snan = X87DoubleExtended::snan(None); - let test: X87DoubleExtended = test.convert(&mut loses_info).value; - assert!(test.bitwise_eq(x87_snan)); + let test: X87DoubleExtended = unpack!(status=, test.convert(&mut loses_info)); + // Conversion quiets the SNAN, so now 2 bits of the 64-bit significand should be set. + assert!(test.bitwise_eq(X87DoubleExtended::qnan(Some(0x6000000000000000)))); assert!(!loses_info); + assert_eq!(status, Status::INVALID_OP); let test = Single::qnan(None); let x87_qnan = X87DoubleExtended::qnan(None); @@ -1515,15 +1920,75 @@ fn convert() { assert!(test.bitwise_eq(x87_qnan)); assert!(!loses_info); - let test = X87DoubleExtended::snan(None); + // NOTE(eddyb) these were mistakenly noops upstream, here they're already + // fixed (by instead converting from `Double` to `X87DoubleExtended`), + // see also upstream issue https://github.com/llvm/llvm-project/issues/63842. + let test = Double::snan(None); let test: X87DoubleExtended = test.convert(&mut loses_info).value; - assert!(test.bitwise_eq(x87_snan)); + // Conversion quiets the SNAN, so now 2 bits of the 64-bit significand should be set. + assert!(test.bitwise_eq(X87DoubleExtended::qnan(Some(0x6000000000000000)))); assert!(!loses_info); - let test = X87DoubleExtended::qnan(None); + let test = Double::qnan(None); let test: X87DoubleExtended = test.convert(&mut loses_info).value; assert!(test.bitwise_eq(x87_qnan)); assert!(!loses_info); + + // The payload is lost in truncation, but we retain NaN by setting the quiet bit. + let test = Double::snan(Some(1)); + let test: Single = unpack!(status=, test.convert(&mut loses_info)); + assert_eq!(0x7fc00000, test.to_bits()); + assert!(loses_info); + assert_eq!(status, Status::INVALID_OP); + + // The payload is lost in truncation. QNaN remains QNaN. + let test = Double::qnan(Some(1)); + let test: Single = unpack!(status=, test.convert(&mut loses_info)); + assert_eq!(0x7fc00000, test.to_bits()); + assert!(loses_info); + assert_eq!(status, Status::OK); + + // Test that subnormals are handled correctly in double to float conversion + let test = "0x0.0000010000000p-1022".parse::().unwrap(); + let test: Single = unpack!(status=, test.convert(&mut loses_info)); + assert_eq!(0.0, test.to_f32()); + assert!(loses_info); + + let test = "0x0.0000010000001p-1022".parse::().unwrap(); + let test: Single = unpack!(status=, test.convert(&mut loses_info)); + assert_eq!(0.0, test.to_f32()); + assert!(loses_info); + + let test = "-0x0.0000010000001p-1022".parse::().unwrap(); + let test: Single = unpack!(status=, test.convert(&mut loses_info)); + assert_eq!(0.0, test.to_f32()); + assert!(loses_info); + + let test = "0x0.0000020000000p-1022".parse::().unwrap(); + let test: Single = unpack!(status=, test.convert(&mut loses_info)); + assert_eq!(0.0, test.to_f32()); + assert!(loses_info); + + let test = "0x0.0000020000001p-1022".parse::().unwrap(); + let test: Single = unpack!(status=, test.convert(&mut loses_info)); + assert_eq!(0.0, test.to_f32()); + assert!(loses_info); + + // Test subnormal conversion to bfloat + let test = "0x0.01p-126".parse::().unwrap(); + let test: BFloat = unpack!(status=, test.convert(&mut loses_info)); + assert_eq!(0.0, test.to_f32()); + assert!(loses_info); + + let test = "0x0.02p-126".parse::().unwrap(); + let test: BFloat = unpack!(status=, test.convert(&mut loses_info)); + assert_eq!(0x01, test.to_bits()); + assert!(!loses_info); + + let test = "0x0.01p-126".parse::().unwrap(); + let test: BFloat = unpack!(status=, test.convert_r(Round::NearestTiesToAway, &mut loses_info)); + assert_eq!(0x01, test.to_bits()); + assert!(loses_info); } #[test] @@ -1573,7 +2038,17 @@ fn is_finite() { fn is_infinite() { let t = "0x1p+0".parse::().unwrap(); assert!(!t.is_infinite()); - assert!(Single::INFINITY.is_infinite()); + + let pos_inf = Single::INFINITY; + let neg_inf = -Single::INFINITY; + + assert!(pos_inf.is_infinite()); + assert!(pos_inf.is_pos_infinity()); + assert!(!pos_inf.is_neg_infinity()); + assert!(neg_inf.is_infinite()); + assert!(!neg_inf.is_pos_infinity()); + assert!(neg_inf.is_neg_infinity()); + assert!(!Single::ZERO.is_infinite()); assert!(!Single::NAN.is_infinite()); assert!(!Single::snan(None).is_infinite()); @@ -1624,16 +2099,12 @@ fn is_finite_non_zero() { fn add() { // Test Special Cases against each other and normal values. - // FIXMES/NOTES: - // 1. Since we perform only default exception handling all operations with - // signaling NaNs should have a result that is a quiet NaN. Currently they - // return sNaN. - let p_inf = Single::INFINITY; let m_inf = -Single::INFINITY; let p_zero = Single::ZERO; let m_zero = -Single::ZERO; let qnan = Single::NAN; + let snan = "snan123".parse::().unwrap(); let p_normal_value = "0x1p+0".parse::().unwrap(); let m_normal_value = "-0x1p+0".parse::().unwrap(); let p_largest_value = Single::largest(); @@ -1651,10 +2122,7 @@ fn add() { (p_inf, p_zero, "inf", Status::OK, Category::Infinity), (p_inf, m_zero, "inf", Status::OK, Category::Infinity), (p_inf, qnan, "nan", Status::OK, Category::NaN), - /* - // See Note 1. - (p_inf, snan, "nan", Status::INVALID_OP, Category::NaN), - */ + (p_inf, snan, "nan123", Status::INVALID_OP, Category::NaN), (p_inf, p_normal_value, "inf", Status::OK, Category::Infinity), (p_inf, m_normal_value, "inf", Status::OK, Category::Infinity), (p_inf, p_largest_value, "inf", Status::OK, Category::Infinity), @@ -1668,10 +2136,7 @@ fn add() { (m_inf, p_zero, "-inf", Status::OK, Category::Infinity), (m_inf, m_zero, "-inf", Status::OK, Category::Infinity), (m_inf, qnan, "nan", Status::OK, Category::NaN), - /* - // See Note 1. - (m_inf, snan, "nan", Status::INVALID_OP, Category::NaN), - */ + (m_inf, snan, "nan123", Status::INVALID_OP, Category::NaN), (m_inf, p_normal_value, "-inf", Status::OK, Category::Infinity), (m_inf, m_normal_value, "-inf", Status::OK, Category::Infinity), (m_inf, p_largest_value, "-inf", Status::OK, Category::Infinity), @@ -1685,10 +2150,7 @@ fn add() { (p_zero, p_zero, "0x0p+0", Status::OK, Category::Zero), (p_zero, m_zero, "0x0p+0", Status::OK, Category::Zero), (p_zero, qnan, "nan", Status::OK, Category::NaN), - /* - // See Note 1. - (p_zero, snan, "nan", Status::INVALID_OP, Category::NaN), - */ + (p_zero, snan, "nan123", Status::INVALID_OP, Category::NaN), (p_zero, p_normal_value, "0x1p+0", Status::OK, Category::Normal), (p_zero, m_normal_value, "-0x1p+0", Status::OK, Category::Normal), (p_zero, p_largest_value, "0x1.fffffep+127", Status::OK, Category::Normal), @@ -1702,10 +2164,7 @@ fn add() { (m_zero, p_zero, "0x0p+0", Status::OK, Category::Zero), (m_zero, m_zero, "-0x0p+0", Status::OK, Category::Zero), (m_zero, qnan, "nan", Status::OK, Category::NaN), - /* - // See Note 1. - (m_zero, snan, "nan", Status::INVALID_OP, Category::NaN), - */ + (m_zero, snan, "nan123", Status::INVALID_OP, Category::NaN), (m_zero, p_normal_value, "0x1p+0", Status::OK, Category::Normal), (m_zero, m_normal_value, "-0x1p+0", Status::OK, Category::Normal), (m_zero, p_largest_value, "0x1.fffffep+127", Status::OK, Category::Normal), @@ -1719,10 +2178,7 @@ fn add() { (qnan, p_zero, "nan", Status::OK, Category::NaN), (qnan, m_zero, "nan", Status::OK, Category::NaN), (qnan, qnan, "nan", Status::OK, Category::NaN), - /* - // See Note 1. (qnan, snan, "nan", Status::INVALID_OP, Category::NaN), - */ (qnan, p_normal_value, "nan", Status::OK, Category::NaN), (qnan, m_normal_value, "nan", Status::OK, Category::NaN), (qnan, p_largest_value, "nan", Status::OK, Category::NaN), @@ -1731,32 +2187,26 @@ fn add() { (qnan, m_smallest_value, "nan", Status::OK, Category::NaN), (qnan, p_smallest_normalized, "nan", Status::OK, Category::NaN), (qnan, m_smallest_normalized, "nan", Status::OK, Category::NaN), - /* - // See Note 1. - (snan, p_inf, "nan", Status::INVALID_OP, Category::NaN), - (snan, m_inf, "nan", Status::INVALID_OP, Category::NaN), - (snan, p_zero, "nan", Status::INVALID_OP, Category::NaN), - (snan, m_zero, "nan", Status::INVALID_OP, Category::NaN), - (snan, qnan, "nan", Status::INVALID_OP, Category::NaN), - (snan, snan, "nan", Status::INVALID_OP, Category::NaN), - (snan, p_normal_value, "nan", Status::INVALID_OP, Category::NaN), - (snan, m_normal_value, "nan", Status::INVALID_OP, Category::NaN), - (snan, p_largest_value, "nan", Status::INVALID_OP, Category::NaN), - (snan, m_largest_value, "nan", Status::INVALID_OP, Category::NaN), - (snan, p_smallest_value, "nan", Status::INVALID_OP, Category::NaN), - (snan, m_smallest_value, "nan", Status::INVALID_OP, Category::NaN), - (snan, p_smallest_normalized, "nan", Status::INVALID_OP, Category::NaN), - (snan, m_smallest_normalized, "nan", Status::INVALID_OP, Category::NaN), - */ + (snan, p_inf, "nan123", Status::INVALID_OP, Category::NaN), + (snan, m_inf, "nan123", Status::INVALID_OP, Category::NaN), + (snan, p_zero, "nan123", Status::INVALID_OP, Category::NaN), + (snan, m_zero, "nan123", Status::INVALID_OP, Category::NaN), + (snan, qnan, "nan123", Status::INVALID_OP, Category::NaN), + (snan, snan, "nan123", Status::INVALID_OP, Category::NaN), + (snan, p_normal_value, "nan123", Status::INVALID_OP, Category::NaN), + (snan, m_normal_value, "nan123", Status::INVALID_OP, Category::NaN), + (snan, p_largest_value, "nan123", Status::INVALID_OP, Category::NaN), + (snan, m_largest_value, "nan123", Status::INVALID_OP, Category::NaN), + (snan, p_smallest_value, "nan123", Status::INVALID_OP, Category::NaN), + (snan, m_smallest_value, "nan123", Status::INVALID_OP, Category::NaN), + (snan, p_smallest_normalized, "nan123", Status::INVALID_OP, Category::NaN), + (snan, m_smallest_normalized, "nan123", Status::INVALID_OP, Category::NaN), (p_normal_value, p_inf, "inf", Status::OK, Category::Infinity), (p_normal_value, m_inf, "-inf", Status::OK, Category::Infinity), (p_normal_value, p_zero, "0x1p+0", Status::OK, Category::Normal), (p_normal_value, m_zero, "0x1p+0", Status::OK, Category::Normal), (p_normal_value, qnan, "nan", Status::OK, Category::NaN), - /* - // See Note 1. - (p_normal_value, snan, "nan", Status::INVALID_OP, Category::NaN), - */ + (p_normal_value, snan, "nan123", Status::INVALID_OP, Category::NaN), (p_normal_value, p_normal_value, "0x1p+1", Status::OK, Category::Normal), (p_normal_value, m_normal_value, "0x0p+0", Status::OK, Category::Zero), (p_normal_value, p_largest_value, "0x1.fffffep+127", Status::INEXACT, Category::Normal), @@ -1770,10 +2220,7 @@ fn add() { (m_normal_value, p_zero, "-0x1p+0", Status::OK, Category::Normal), (m_normal_value, m_zero, "-0x1p+0", Status::OK, Category::Normal), (m_normal_value, qnan, "nan", Status::OK, Category::NaN), - /* - // See Note 1. - (m_normal_value, snan, "nan", Status::INVALID_OP, Category::NaN), - */ + (m_normal_value, snan, "nan123", Status::INVALID_OP, Category::NaN), (m_normal_value, p_normal_value, "0x0p+0", Status::OK, Category::Zero), (m_normal_value, m_normal_value, "-0x1p+1", Status::OK, Category::Normal), (m_normal_value, p_largest_value, "0x1.fffffep+127", Status::INEXACT, Category::Normal), @@ -1787,10 +2234,7 @@ fn add() { (p_largest_value, p_zero, "0x1.fffffep+127", Status::OK, Category::Normal), (p_largest_value, m_zero, "0x1.fffffep+127", Status::OK, Category::Normal), (p_largest_value, qnan, "nan", Status::OK, Category::NaN), - /* - // See Note 1. - (p_largest_value, snan, "nan", Status::INVALID_OP, Category::NaN), - */ + (p_largest_value, snan, "nan123", Status::INVALID_OP, Category::NaN), (p_largest_value, p_normal_value, "0x1.fffffep+127", Status::INEXACT, Category::Normal), (p_largest_value, m_normal_value, "0x1.fffffep+127", Status::INEXACT, Category::Normal), (p_largest_value, p_largest_value, "inf", overflow_status, Category::Infinity), @@ -1804,10 +2248,7 @@ fn add() { (m_largest_value, p_zero, "-0x1.fffffep+127", Status::OK, Category::Normal), (m_largest_value, m_zero, "-0x1.fffffep+127", Status::OK, Category::Normal), (m_largest_value, qnan, "nan", Status::OK, Category::NaN), - /* - // See Note 1. - (m_largest_value, snan, "nan", Status::INVALID_OP, Category::NaN), - */ + (m_largest_value, snan, "nan123", Status::INVALID_OP, Category::NaN), (m_largest_value, p_normal_value, "-0x1.fffffep+127", Status::INEXACT, Category::Normal), (m_largest_value, m_normal_value, "-0x1.fffffep+127", Status::INEXACT, Category::Normal), (m_largest_value, p_largest_value, "0x0p+0", Status::OK, Category::Zero), @@ -1821,10 +2262,7 @@ fn add() { (p_smallest_value, p_zero, "0x1p-149", Status::OK, Category::Normal), (p_smallest_value, m_zero, "0x1p-149", Status::OK, Category::Normal), (p_smallest_value, qnan, "nan", Status::OK, Category::NaN), - /* - // See Note 1. - (p_smallest_value, snan, "nan", Status::INVALID_OP, Category::NaN), - */ + (p_smallest_value, snan, "nan123", Status::INVALID_OP, Category::NaN), (p_smallest_value, p_normal_value, "0x1p+0", Status::INEXACT, Category::Normal), (p_smallest_value, m_normal_value, "-0x1p+0", Status::INEXACT, Category::Normal), (p_smallest_value, p_largest_value, "0x1.fffffep+127", Status::INEXACT, Category::Normal), @@ -1838,10 +2276,7 @@ fn add() { (m_smallest_value, p_zero, "-0x1p-149", Status::OK, Category::Normal), (m_smallest_value, m_zero, "-0x1p-149", Status::OK, Category::Normal), (m_smallest_value, qnan, "nan", Status::OK, Category::NaN), - /* - // See Note 1. - (m_smallest_value, snan, "nan", Status::INVALID_OP, Category::NaN), - */ + (m_smallest_value, snan, "nan123", Status::INVALID_OP, Category::NaN), (m_smallest_value, p_normal_value, "0x1p+0", Status::INEXACT, Category::Normal), (m_smallest_value, m_normal_value, "-0x1p+0", Status::INEXACT, Category::Normal), (m_smallest_value, p_largest_value, "0x1.fffffep+127", Status::INEXACT, Category::Normal), @@ -1855,10 +2290,7 @@ fn add() { (p_smallest_normalized, p_zero, "0x1p-126", Status::OK, Category::Normal), (p_smallest_normalized, m_zero, "0x1p-126", Status::OK, Category::Normal), (p_smallest_normalized, qnan, "nan", Status::OK, Category::NaN), - /* - // See Note 1. - (p_smallest_normalized, snan, "nan", Status::INVALID_OP, Category::NaN), - */ + (p_smallest_normalized, snan, "nan123", Status::INVALID_OP, Category::NaN), (p_smallest_normalized, p_normal_value, "0x1p+0", Status::INEXACT, Category::Normal), (p_smallest_normalized, m_normal_value, "-0x1p+0", Status::INEXACT, Category::Normal), (p_smallest_normalized, p_largest_value, "0x1.fffffep+127", Status::INEXACT, Category::Normal), @@ -1872,10 +2304,7 @@ fn add() { (m_smallest_normalized, p_zero, "-0x1p-126", Status::OK, Category::Normal), (m_smallest_normalized, m_zero, "-0x1p-126", Status::OK, Category::Normal), (m_smallest_normalized, qnan, "nan", Status::OK, Category::NaN), - /* - // See Note 1. - (m_smallest_normalized, snan, "nan", Status::INVALID_OP, Category::NaN), - */ + (m_smallest_normalized, snan, "nan123", Status::INVALID_OP, Category::NaN), (m_smallest_normalized, p_normal_value, "0x1p+0", Status::INEXACT, Category::Normal), (m_smallest_normalized, m_normal_value, "-0x1p+0", Status::INEXACT, Category::Normal), (m_smallest_normalized, p_largest_value, "0x1.fffffep+127", Status::INEXACT, Category::Normal), @@ -1886,12 +2315,12 @@ fn add() { (m_smallest_normalized, m_smallest_normalized, "-0x1p-125", Status::OK, Category::Normal), ]; - for &(x, y, e_result, e_status, e_category) in &special_cases[..] { + for case @ &(x, y, e_result, e_status, e_category) in &special_cases[..] { let status; let result = unpack!(status=, x + y); - assert_eq!(status, e_status); - assert_eq!(result.category(), e_category); - assert!(result.bitwise_eq(e_result.parse::().unwrap())); + assert_eq!(e_status, status); + assert_eq!(e_category, result.category()); + assert!(result.bitwise_eq(e_result.parse::().unwrap()), "result = {result:?}, case = {case:?}"); } } @@ -1899,16 +2328,12 @@ fn add() { fn subtract() { // Test Special Cases against each other and normal values. - // FIXMES/NOTES: - // 1. Since we perform only default exception handling all operations with - // signaling NaNs should have a result that is a quiet NaN. Currently they - // return sNaN. - let p_inf = Single::INFINITY; let m_inf = -Single::INFINITY; let p_zero = Single::ZERO; let m_zero = -Single::ZERO; let qnan = Single::NAN; + let snan = "snan123".parse::().unwrap(); let p_normal_value = "0x1p+0".parse::().unwrap(); let m_normal_value = "-0x1p+0".parse::().unwrap(); let p_largest_value = Single::largest(); @@ -1925,11 +2350,8 @@ fn subtract() { (p_inf, m_inf, "inf", Status::OK, Category::Infinity), (p_inf, p_zero, "inf", Status::OK, Category::Infinity), (p_inf, m_zero, "inf", Status::OK, Category::Infinity), - (p_inf, qnan, "-nan", Status::OK, Category::NaN), - /* - // See Note 1. - (p_inf, snan, "-nan", Status::INVALID_OP, Category::NaN), - */ + (p_inf, qnan, "nan", Status::OK, Category::NaN), + (p_inf, snan, "nan123", Status::INVALID_OP, Category::NaN), (p_inf, p_normal_value, "inf", Status::OK, Category::Infinity), (p_inf, m_normal_value, "inf", Status::OK, Category::Infinity), (p_inf, p_largest_value, "inf", Status::OK, Category::Infinity), @@ -1942,11 +2364,8 @@ fn subtract() { (m_inf, m_inf, "nan", Status::INVALID_OP, Category::NaN), (m_inf, p_zero, "-inf", Status::OK, Category::Infinity), (m_inf, m_zero, "-inf", Status::OK, Category::Infinity), - (m_inf, qnan, "-nan", Status::OK, Category::NaN), - /* - // See Note 1. - (m_inf, snan, "-nan", Status::INVALID_OP, Category::NaN), - */ + (m_inf, qnan, "nan", Status::OK, Category::NaN), + (m_inf, snan, "nan123", Status::INVALID_OP, Category::NaN), (m_inf, p_normal_value, "-inf", Status::OK, Category::Infinity), (m_inf, m_normal_value, "-inf", Status::OK, Category::Infinity), (m_inf, p_largest_value, "-inf", Status::OK, Category::Infinity), @@ -1959,11 +2378,8 @@ fn subtract() { (p_zero, m_inf, "inf", Status::OK, Category::Infinity), (p_zero, p_zero, "0x0p+0", Status::OK, Category::Zero), (p_zero, m_zero, "0x0p+0", Status::OK, Category::Zero), - (p_zero, qnan, "-nan", Status::OK, Category::NaN), - /* - // See Note 1. - (p_zero, snan, "-nan", Status::INVALID_OP, Category::NaN), - */ + (p_zero, qnan, "nan", Status::OK, Category::NaN), + (p_zero, snan, "nan123", Status::INVALID_OP, Category::NaN), (p_zero, p_normal_value, "-0x1p+0", Status::OK, Category::Normal), (p_zero, m_normal_value, "0x1p+0", Status::OK, Category::Normal), (p_zero, p_largest_value, "-0x1.fffffep+127", Status::OK, Category::Normal), @@ -1976,11 +2392,8 @@ fn subtract() { (m_zero, m_inf, "inf", Status::OK, Category::Infinity), (m_zero, p_zero, "-0x0p+0", Status::OK, Category::Zero), (m_zero, m_zero, "0x0p+0", Status::OK, Category::Zero), - (m_zero, qnan, "-nan", Status::OK, Category::NaN), - /* - // See Note 1. - (m_zero, snan, "-nan", Status::INVALID_OP, Category::NaN), - */ + (m_zero, qnan, "nan", Status::OK, Category::NaN), + (m_zero, snan, "nan123", Status::INVALID_OP, Category::NaN), (m_zero, p_normal_value, "-0x1p+0", Status::OK, Category::Normal), (m_zero, m_normal_value, "0x1p+0", Status::OK, Category::Normal), (m_zero, p_largest_value, "-0x1.fffffep+127", Status::OK, Category::Normal), @@ -1994,10 +2407,7 @@ fn subtract() { (qnan, p_zero, "nan", Status::OK, Category::NaN), (qnan, m_zero, "nan", Status::OK, Category::NaN), (qnan, qnan, "nan", Status::OK, Category::NaN), - /* - // See Note 1. (qnan, snan, "nan", Status::INVALID_OP, Category::NaN), - */ (qnan, p_normal_value, "nan", Status::OK, Category::NaN), (qnan, m_normal_value, "nan", Status::OK, Category::NaN), (qnan, p_largest_value, "nan", Status::OK, Category::NaN), @@ -2006,32 +2416,26 @@ fn subtract() { (qnan, m_smallest_value, "nan", Status::OK, Category::NaN), (qnan, p_smallest_normalized, "nan", Status::OK, Category::NaN), (qnan, m_smallest_normalized, "nan", Status::OK, Category::NaN), - /* - // See Note 1. - (snan, p_inf, "nan", Status::INVALID_OP, Category::NaN), - (snan, m_inf, "nan", Status::INVALID_OP, Category::NaN), - (snan, p_zero, "nan", Status::INVALID_OP, Category::NaN), - (snan, m_zero, "nan", Status::INVALID_OP, Category::NaN), - (snan, qnan, "nan", Status::INVALID_OP, Category::NaN), - (snan, snan, "nan", Status::INVALID_OP, Category::NaN), - (snan, p_normal_value, "nan", Status::INVALID_OP, Category::NaN), - (snan, m_normal_value, "nan", Status::INVALID_OP, Category::NaN), - (snan, p_largest_value, "nan", Status::INVALID_OP, Category::NaN), - (snan, m_largest_value, "nan", Status::INVALID_OP, Category::NaN), - (snan, p_smallest_value, "nan", Status::INVALID_OP, Category::NaN), - (snan, m_smallest_value, "nan", Status::INVALID_OP, Category::NaN), - (snan, p_smallest_normalized, "nan", Status::INVALID_OP, Category::NaN), - (snan, m_smallest_normalized, "nan", Status::INVALID_OP, Category::NaN), - */ + (snan, p_inf, "nan123", Status::INVALID_OP, Category::NaN), + (snan, m_inf, "nan123", Status::INVALID_OP, Category::NaN), + (snan, p_zero, "nan123", Status::INVALID_OP, Category::NaN), + (snan, m_zero, "nan123", Status::INVALID_OP, Category::NaN), + (snan, qnan, "nan123", Status::INVALID_OP, Category::NaN), + (snan, snan, "nan123", Status::INVALID_OP, Category::NaN), + (snan, p_normal_value, "nan123", Status::INVALID_OP, Category::NaN), + (snan, m_normal_value, "nan123", Status::INVALID_OP, Category::NaN), + (snan, p_largest_value, "nan123", Status::INVALID_OP, Category::NaN), + (snan, m_largest_value, "nan123", Status::INVALID_OP, Category::NaN), + (snan, p_smallest_value, "nan123", Status::INVALID_OP, Category::NaN), + (snan, m_smallest_value, "nan123", Status::INVALID_OP, Category::NaN), + (snan, p_smallest_normalized, "nan123", Status::INVALID_OP, Category::NaN), + (snan, m_smallest_normalized, "nan123", Status::INVALID_OP, Category::NaN), (p_normal_value, p_inf, "-inf", Status::OK, Category::Infinity), (p_normal_value, m_inf, "inf", Status::OK, Category::Infinity), (p_normal_value, p_zero, "0x1p+0", Status::OK, Category::Normal), (p_normal_value, m_zero, "0x1p+0", Status::OK, Category::Normal), - (p_normal_value, qnan, "-nan", Status::OK, Category::NaN), - /* - // See Note 1. - (p_normal_value, snan, "-nan", Status::INVALID_OP, Category::NaN), - */ + (p_normal_value, qnan, "nan", Status::OK, Category::NaN), + (p_normal_value, snan, "nan123", Status::INVALID_OP, Category::NaN), (p_normal_value, p_normal_value, "0x0p+0", Status::OK, Category::Zero), (p_normal_value, m_normal_value, "0x1p+1", Status::OK, Category::Normal), (p_normal_value, p_largest_value, "-0x1.fffffep+127", Status::INEXACT, Category::Normal), @@ -2044,11 +2448,8 @@ fn subtract() { (m_normal_value, m_inf, "inf", Status::OK, Category::Infinity), (m_normal_value, p_zero, "-0x1p+0", Status::OK, Category::Normal), (m_normal_value, m_zero, "-0x1p+0", Status::OK, Category::Normal), - (m_normal_value, qnan, "-nan", Status::OK, Category::NaN), - /* - // See Note 1. - (m_normal_value, snan, "-nan", Status::INVALID_OP, Category::NaN), - */ + (m_normal_value, qnan, "nan", Status::OK, Category::NaN), + (m_normal_value, snan, "nan123", Status::INVALID_OP, Category::NaN), (m_normal_value, p_normal_value, "-0x1p+1", Status::OK, Category::Normal), (m_normal_value, m_normal_value, "0x0p+0", Status::OK, Category::Zero), (m_normal_value, p_largest_value, "-0x1.fffffep+127", Status::INEXACT, Category::Normal), @@ -2061,11 +2462,8 @@ fn subtract() { (p_largest_value, m_inf, "inf", Status::OK, Category::Infinity), (p_largest_value, p_zero, "0x1.fffffep+127", Status::OK, Category::Normal), (p_largest_value, m_zero, "0x1.fffffep+127", Status::OK, Category::Normal), - (p_largest_value, qnan, "-nan", Status::OK, Category::NaN), - /* - // See Note 1. - (p_largest_value, snan, "-nan", Status::INVALID_OP, Category::NaN), - */ + (p_largest_value, qnan, "nan", Status::OK, Category::NaN), + (p_largest_value, snan, "nan123", Status::INVALID_OP, Category::NaN), (p_largest_value, p_normal_value, "0x1.fffffep+127", Status::INEXACT, Category::Normal), (p_largest_value, m_normal_value, "0x1.fffffep+127", Status::INEXACT, Category::Normal), (p_largest_value, p_largest_value, "0x0p+0", Status::OK, Category::Zero), @@ -2078,11 +2476,8 @@ fn subtract() { (m_largest_value, m_inf, "inf", Status::OK, Category::Infinity), (m_largest_value, p_zero, "-0x1.fffffep+127", Status::OK, Category::Normal), (m_largest_value, m_zero, "-0x1.fffffep+127", Status::OK, Category::Normal), - (m_largest_value, qnan, "-nan", Status::OK, Category::NaN), - /* - // See Note 1. - (m_largest_value, snan, "-nan", Status::INVALID_OP, Category::NaN), - */ + (m_largest_value, qnan, "nan", Status::OK, Category::NaN), + (m_largest_value, snan, "nan123", Status::INVALID_OP, Category::NaN), (m_largest_value, p_normal_value, "-0x1.fffffep+127", Status::INEXACT, Category::Normal), (m_largest_value, m_normal_value, "-0x1.fffffep+127", Status::INEXACT, Category::Normal), (m_largest_value, p_largest_value, "-inf", overflow_status, Category::Infinity), @@ -2095,11 +2490,8 @@ fn subtract() { (p_smallest_value, m_inf, "inf", Status::OK, Category::Infinity), (p_smallest_value, p_zero, "0x1p-149", Status::OK, Category::Normal), (p_smallest_value, m_zero, "0x1p-149", Status::OK, Category::Normal), - (p_smallest_value, qnan, "-nan", Status::OK, Category::NaN), - /* - // See Note 1. - (p_smallest_value, snan, "-nan", Status::INVALID_OP, Category::NaN), - */ + (p_smallest_value, qnan, "nan", Status::OK, Category::NaN), + (p_smallest_value, snan, "nan123", Status::INVALID_OP, Category::NaN), (p_smallest_value, p_normal_value, "-0x1p+0", Status::INEXACT, Category::Normal), (p_smallest_value, m_normal_value, "0x1p+0", Status::INEXACT, Category::Normal), (p_smallest_value, p_largest_value, "-0x1.fffffep+127", Status::INEXACT, Category::Normal), @@ -2112,11 +2504,8 @@ fn subtract() { (m_smallest_value, m_inf, "inf", Status::OK, Category::Infinity), (m_smallest_value, p_zero, "-0x1p-149", Status::OK, Category::Normal), (m_smallest_value, m_zero, "-0x1p-149", Status::OK, Category::Normal), - (m_smallest_value, qnan, "-nan", Status::OK, Category::NaN), - /* - // See Note 1. - (m_smallest_value, snan, "-nan", Status::INVALID_OP, Category::NaN), - */ + (m_smallest_value, qnan, "nan", Status::OK, Category::NaN), + (m_smallest_value, snan, "nan123", Status::INVALID_OP, Category::NaN), (m_smallest_value, p_normal_value, "-0x1p+0", Status::INEXACT, Category::Normal), (m_smallest_value, m_normal_value, "0x1p+0", Status::INEXACT, Category::Normal), (m_smallest_value, p_largest_value, "-0x1.fffffep+127", Status::INEXACT, Category::Normal), @@ -2129,11 +2518,8 @@ fn subtract() { (p_smallest_normalized, m_inf, "inf", Status::OK, Category::Infinity), (p_smallest_normalized, p_zero, "0x1p-126", Status::OK, Category::Normal), (p_smallest_normalized, m_zero, "0x1p-126", Status::OK, Category::Normal), - (p_smallest_normalized, qnan, "-nan", Status::OK, Category::NaN), - /* - // See Note 1. - (p_smallest_normalized, snan, "-nan", Status::INVALID_OP, Category::NaN), - */ + (p_smallest_normalized, qnan, "nan", Status::OK, Category::NaN), + (p_smallest_normalized, snan, "nan123", Status::INVALID_OP, Category::NaN), (p_smallest_normalized, p_normal_value, "-0x1p+0", Status::INEXACT, Category::Normal), (p_smallest_normalized, m_normal_value, "0x1p+0", Status::INEXACT, Category::Normal), (p_smallest_normalized, p_largest_value, "-0x1.fffffep+127", Status::INEXACT, Category::Normal), @@ -2146,11 +2532,8 @@ fn subtract() { (m_smallest_normalized, m_inf, "inf", Status::OK, Category::Infinity), (m_smallest_normalized, p_zero, "-0x1p-126", Status::OK, Category::Normal), (m_smallest_normalized, m_zero, "-0x1p-126", Status::OK, Category::Normal), - (m_smallest_normalized, qnan, "-nan", Status::OK, Category::NaN), - /* - // See Note 1. - (m_smallest_normalized, snan, "-nan", Status::INVALID_OP, Category::NaN), - */ + (m_smallest_normalized, qnan, "nan", Status::OK, Category::NaN), + (m_smallest_normalized, snan, "nan123", Status::INVALID_OP, Category::NaN), (m_smallest_normalized, p_normal_value, "-0x1p+0", Status::INEXACT, Category::Normal), (m_smallest_normalized, m_normal_value, "0x1p+0", Status::INEXACT, Category::Normal), (m_smallest_normalized, p_largest_value, "-0x1.fffffep+127", Status::INEXACT, Category::Normal), @@ -2161,12 +2544,12 @@ fn subtract() { (m_smallest_normalized, m_smallest_normalized, "0x0p+0", Status::OK, Category::Zero), ]; - for &(x, y, e_result, e_status, e_category) in &special_cases[..] { + for case @ &(x, y, e_result, e_status, e_category) in &special_cases[..] { let status; let result = unpack!(status=, x - y); - assert_eq!(status, e_status); - assert_eq!(result.category(), e_category); - assert!(result.bitwise_eq(e_result.parse::().unwrap())); + assert_eq!(e_status, status); + assert_eq!(e_category, result.category()); + assert!(result.bitwise_eq(e_result.parse::().unwrap()), "result = {result:?}, case = {case:?}"); } } @@ -2174,16 +2557,12 @@ fn subtract() { fn multiply() { // Test Special Cases against each other and normal values. - // FIXMES/NOTES: - // 1. Since we perform only default exception handling all operations with - // signaling NaNs should have a result that is a quiet NaN. Currently they - // return sNaN. - let p_inf = Single::INFINITY; let m_inf = -Single::INFINITY; let p_zero = Single::ZERO; let m_zero = -Single::ZERO; let qnan = Single::NAN; + let snan = "snan123".parse::().unwrap(); let p_normal_value = "0x1p+0".parse::().unwrap(); let m_normal_value = "-0x1p+0".parse::().unwrap(); let p_largest_value = Single::largest(); @@ -2193,19 +2572,20 @@ fn multiply() { let p_smallest_normalized = Single::smallest_normalized(); let m_smallest_normalized = -Single::smallest_normalized(); + let max_quad = "0x1.ffffffffffffffffffffffffffffp+16383".parse::().unwrap(); + let min_quad = "0x0.0000000000000000000000000001p-16382".parse::().unwrap(); + let n_min_quad = "-0x0.0000000000000000000000000001p-16382".parse::().unwrap(); + let overflow_status = Status::OVERFLOW | Status::INEXACT; let underflow_status = Status::UNDERFLOW | Status::INEXACT; - let special_cases = [ + let single_special_cases = [ (p_inf, p_inf, "inf", Status::OK, Category::Infinity), (p_inf, m_inf, "-inf", Status::OK, Category::Infinity), (p_inf, p_zero, "nan", Status::INVALID_OP, Category::NaN), (p_inf, m_zero, "nan", Status::INVALID_OP, Category::NaN), (p_inf, qnan, "nan", Status::OK, Category::NaN), - /* - // See Note 1. - (p_inf, snan, "nan", Status::INVALID_OP, Category::NaN), - */ + (p_inf, snan, "nan123", Status::INVALID_OP, Category::NaN), (p_inf, p_normal_value, "inf", Status::OK, Category::Infinity), (p_inf, m_normal_value, "-inf", Status::OK, Category::Infinity), (p_inf, p_largest_value, "inf", Status::OK, Category::Infinity), @@ -2219,10 +2599,7 @@ fn multiply() { (m_inf, p_zero, "nan", Status::INVALID_OP, Category::NaN), (m_inf, m_zero, "nan", Status::INVALID_OP, Category::NaN), (m_inf, qnan, "nan", Status::OK, Category::NaN), - /* - // See Note 1. - (m_inf, snan, "nan", Status::INVALID_OP, Category::NaN), - */ + (m_inf, snan, "nan123", Status::INVALID_OP, Category::NaN), (m_inf, p_normal_value, "-inf", Status::OK, Category::Infinity), (m_inf, m_normal_value, "inf", Status::OK, Category::Infinity), (m_inf, p_largest_value, "-inf", Status::OK, Category::Infinity), @@ -2236,10 +2613,7 @@ fn multiply() { (p_zero, p_zero, "0x0p+0", Status::OK, Category::Zero), (p_zero, m_zero, "-0x0p+0", Status::OK, Category::Zero), (p_zero, qnan, "nan", Status::OK, Category::NaN), - /* - // See Note 1. - (p_zero, snan, "nan", Status::INVALID_OP, Category::NaN), - */ + (p_zero, snan, "nan123", Status::INVALID_OP, Category::NaN), (p_zero, p_normal_value, "0x0p+0", Status::OK, Category::Zero), (p_zero, m_normal_value, "-0x0p+0", Status::OK, Category::Zero), (p_zero, p_largest_value, "0x0p+0", Status::OK, Category::Zero), @@ -2253,10 +2627,7 @@ fn multiply() { (m_zero, p_zero, "-0x0p+0", Status::OK, Category::Zero), (m_zero, m_zero, "0x0p+0", Status::OK, Category::Zero), (m_zero, qnan, "nan", Status::OK, Category::NaN), - /* - // See Note 1. - (m_zero, snan, "nan", Status::INVALID_OP, Category::NaN), - */ + (m_zero, snan, "nan123", Status::INVALID_OP, Category::NaN), (m_zero, p_normal_value, "-0x0p+0", Status::OK, Category::Zero), (m_zero, m_normal_value, "0x0p+0", Status::OK, Category::Zero), (m_zero, p_largest_value, "-0x0p+0", Status::OK, Category::Zero), @@ -2270,10 +2641,7 @@ fn multiply() { (qnan, p_zero, "nan", Status::OK, Category::NaN), (qnan, m_zero, "nan", Status::OK, Category::NaN), (qnan, qnan, "nan", Status::OK, Category::NaN), - /* - // See Note 1. (qnan, snan, "nan", Status::INVALID_OP, Category::NaN), - */ (qnan, p_normal_value, "nan", Status::OK, Category::NaN), (qnan, m_normal_value, "nan", Status::OK, Category::NaN), (qnan, p_largest_value, "nan", Status::OK, Category::NaN), @@ -2282,32 +2650,26 @@ fn multiply() { (qnan, m_smallest_value, "nan", Status::OK, Category::NaN), (qnan, p_smallest_normalized, "nan", Status::OK, Category::NaN), (qnan, m_smallest_normalized, "nan", Status::OK, Category::NaN), - /* - // See Note 1. - (snan, p_inf, "nan", Status::INVALID_OP, Category::NaN), - (snan, m_inf, "nan", Status::INVALID_OP, Category::NaN), - (snan, p_zero, "nan", Status::INVALID_OP, Category::NaN), - (snan, m_zero, "nan", Status::INVALID_OP, Category::NaN), - (snan, qnan, "nan", Status::INVALID_OP, Category::NaN), - (snan, snan, "nan", Status::INVALID_OP, Category::NaN), - (snan, p_normal_value, "nan", Status::INVALID_OP, Category::NaN), - (snan, m_normal_value, "nan", Status::INVALID_OP, Category::NaN), - (snan, p_largest_value, "nan", Status::INVALID_OP, Category::NaN), - (snan, m_largest_value, "nan", Status::INVALID_OP, Category::NaN), - (snan, p_smallest_value, "nan", Status::INVALID_OP, Category::NaN), - (snan, m_smallest_value, "nan", Status::INVALID_OP, Category::NaN), - (snan, p_smallest_normalized, "nan", Status::INVALID_OP, Category::NaN), - (snan, m_smallest_normalized, "nan", Status::INVALID_OP, Category::NaN), - */ + (snan, p_inf, "nan123", Status::INVALID_OP, Category::NaN), + (snan, m_inf, "nan123", Status::INVALID_OP, Category::NaN), + (snan, p_zero, "nan123", Status::INVALID_OP, Category::NaN), + (snan, m_zero, "nan123", Status::INVALID_OP, Category::NaN), + (snan, qnan, "nan123", Status::INVALID_OP, Category::NaN), + (snan, snan, "nan123", Status::INVALID_OP, Category::NaN), + (snan, p_normal_value, "nan123", Status::INVALID_OP, Category::NaN), + (snan, m_normal_value, "nan123", Status::INVALID_OP, Category::NaN), + (snan, p_largest_value, "nan123", Status::INVALID_OP, Category::NaN), + (snan, m_largest_value, "nan123", Status::INVALID_OP, Category::NaN), + (snan, p_smallest_value, "nan123", Status::INVALID_OP, Category::NaN), + (snan, m_smallest_value, "nan123", Status::INVALID_OP, Category::NaN), + (snan, p_smallest_normalized, "nan123", Status::INVALID_OP, Category::NaN), + (snan, m_smallest_normalized, "nan123", Status::INVALID_OP, Category::NaN), (p_normal_value, p_inf, "inf", Status::OK, Category::Infinity), (p_normal_value, m_inf, "-inf", Status::OK, Category::Infinity), (p_normal_value, p_zero, "0x0p+0", Status::OK, Category::Zero), (p_normal_value, m_zero, "-0x0p+0", Status::OK, Category::Zero), (p_normal_value, qnan, "nan", Status::OK, Category::NaN), - /* - // See Note 1. - (p_normal_value, snan, "nan", Status::INVALID_OP, Category::NaN), - */ + (p_normal_value, snan, "nan123", Status::INVALID_OP, Category::NaN), (p_normal_value, p_normal_value, "0x1p+0", Status::OK, Category::Normal), (p_normal_value, m_normal_value, "-0x1p+0", Status::OK, Category::Normal), (p_normal_value, p_largest_value, "0x1.fffffep+127", Status::OK, Category::Normal), @@ -2321,10 +2683,7 @@ fn multiply() { (m_normal_value, p_zero, "-0x0p+0", Status::OK, Category::Zero), (m_normal_value, m_zero, "0x0p+0", Status::OK, Category::Zero), (m_normal_value, qnan, "nan", Status::OK, Category::NaN), - /* - // See Note 1. - (m_normal_value, snan, "nan", Status::INVALID_OP, Category::NaN), - */ + (m_normal_value, snan, "nan123", Status::INVALID_OP, Category::NaN), (m_normal_value, p_normal_value, "-0x1p+0", Status::OK, Category::Normal), (m_normal_value, m_normal_value, "0x1p+0", Status::OK, Category::Normal), (m_normal_value, p_largest_value, "-0x1.fffffep+127", Status::OK, Category::Normal), @@ -2338,10 +2697,7 @@ fn multiply() { (p_largest_value, p_zero, "0x0p+0", Status::OK, Category::Zero), (p_largest_value, m_zero, "-0x0p+0", Status::OK, Category::Zero), (p_largest_value, qnan, "nan", Status::OK, Category::NaN), - /* - // See Note 1. - (p_largest_value, snan, "nan", Status::INVALID_OP, Category::NaN), - */ + (p_largest_value, snan, "nan123", Status::INVALID_OP, Category::NaN), (p_largest_value, p_normal_value, "0x1.fffffep+127", Status::OK, Category::Normal), (p_largest_value, m_normal_value, "-0x1.fffffep+127", Status::OK, Category::Normal), (p_largest_value, p_largest_value, "inf", overflow_status, Category::Infinity), @@ -2355,10 +2711,7 @@ fn multiply() { (m_largest_value, p_zero, "-0x0p+0", Status::OK, Category::Zero), (m_largest_value, m_zero, "0x0p+0", Status::OK, Category::Zero), (m_largest_value, qnan, "nan", Status::OK, Category::NaN), - /* - // See Note 1. - (m_largest_value, snan, "nan", Status::INVALID_OP, Category::NaN), - */ + (m_largest_value, snan, "nan123", Status::INVALID_OP, Category::NaN), (m_largest_value, p_normal_value, "-0x1.fffffep+127", Status::OK, Category::Normal), (m_largest_value, m_normal_value, "0x1.fffffep+127", Status::OK, Category::Normal), (m_largest_value, p_largest_value, "-inf", overflow_status, Category::Infinity), @@ -2372,10 +2725,7 @@ fn multiply() { (p_smallest_value, p_zero, "0x0p+0", Status::OK, Category::Zero), (p_smallest_value, m_zero, "-0x0p+0", Status::OK, Category::Zero), (p_smallest_value, qnan, "nan", Status::OK, Category::NaN), - /* - // See Note 1. - (p_smallest_value, snan, "nan", Status::INVALID_OP, Category::NaN), - */ + (p_smallest_value, snan, "nan123", Status::INVALID_OP, Category::NaN), (p_smallest_value, p_normal_value, "0x1p-149", Status::OK, Category::Normal), (p_smallest_value, m_normal_value, "-0x1p-149", Status::OK, Category::Normal), (p_smallest_value, p_largest_value, "0x1.fffffep-22", Status::OK, Category::Normal), @@ -2389,10 +2739,7 @@ fn multiply() { (m_smallest_value, p_zero, "-0x0p+0", Status::OK, Category::Zero), (m_smallest_value, m_zero, "0x0p+0", Status::OK, Category::Zero), (m_smallest_value, qnan, "nan", Status::OK, Category::NaN), - /* - // See Note 1. - (m_smallest_value, snan, "nan", Status::INVALID_OP, Category::NaN), - */ + (m_smallest_value, snan, "nan123", Status::INVALID_OP, Category::NaN), (m_smallest_value, p_normal_value, "-0x1p-149", Status::OK, Category::Normal), (m_smallest_value, m_normal_value, "0x1p-149", Status::OK, Category::Normal), (m_smallest_value, p_largest_value, "-0x1.fffffep-22", Status::OK, Category::Normal), @@ -2406,10 +2753,7 @@ fn multiply() { (p_smallest_normalized, p_zero, "0x0p+0", Status::OK, Category::Zero), (p_smallest_normalized, m_zero, "-0x0p+0", Status::OK, Category::Zero), (p_smallest_normalized, qnan, "nan", Status::OK, Category::NaN), - /* - // See Note 1. - (p_smallest_normalized, snan, "nan", Status::INVALID_OP, Category::NaN), - */ + (p_smallest_normalized, snan, "nan123", Status::INVALID_OP, Category::NaN), (p_smallest_normalized, p_normal_value, "0x1p-126", Status::OK, Category::Normal), (p_smallest_normalized, m_normal_value, "-0x1p-126", Status::OK, Category::Normal), (p_smallest_normalized, p_largest_value, "0x1.fffffep+1", Status::OK, Category::Normal), @@ -2423,10 +2767,7 @@ fn multiply() { (m_smallest_normalized, p_zero, "-0x0p+0", Status::OK, Category::Zero), (m_smallest_normalized, m_zero, "0x0p+0", Status::OK, Category::Zero), (m_smallest_normalized, qnan, "nan", Status::OK, Category::NaN), - /* - // See Note 1. - (m_smallest_normalized, snan, "nan", Status::INVALID_OP, Category::NaN), - */ + (m_smallest_normalized, snan, "nan123", Status::INVALID_OP, Category::NaN), (m_smallest_normalized, p_normal_value, "-0x1p-126", Status::OK, Category::Normal), (m_smallest_normalized, m_normal_value, "0x1p-126", Status::OK, Category::Normal), (m_smallest_normalized, p_largest_value, "-0x1.fffffep+1", Status::OK, Category::Normal), @@ -2436,13 +2777,138 @@ fn multiply() { (m_smallest_normalized, p_smallest_normalized, "-0x0p+0", underflow_status, Category::Zero), (m_smallest_normalized, m_smallest_normalized, "0x0p+0", underflow_status, Category::Zero), ]; + let quad_special_cases = [ + ( + max_quad, + min_quad, + "0x1.ffffffffffffffffffffffffffffp-111", + Status::OK, + Category::Normal, + Round::NearestTiesToEven, + ), + ( + max_quad, + min_quad, + "0x1.ffffffffffffffffffffffffffffp-111", + Status::OK, + Category::Normal, + Round::TowardPositive, + ), + ( + max_quad, + min_quad, + "0x1.ffffffffffffffffffffffffffffp-111", + Status::OK, + Category::Normal, + Round::TowardNegative, + ), + (max_quad, min_quad, "0x1.ffffffffffffffffffffffffffffp-111", Status::OK, Category::Normal, Round::TowardZero), + ( + max_quad, + min_quad, + "0x1.ffffffffffffffffffffffffffffp-111", + Status::OK, + Category::Normal, + Round::NearestTiesToAway, + ), + ( + max_quad, + n_min_quad, + "-0x1.ffffffffffffffffffffffffffffp-111", + Status::OK, + Category::Normal, + Round::NearestTiesToEven, + ), + ( + max_quad, + n_min_quad, + "-0x1.ffffffffffffffffffffffffffffp-111", + Status::OK, + Category::Normal, + Round::TowardPositive, + ), + ( + max_quad, + n_min_quad, + "-0x1.ffffffffffffffffffffffffffffp-111", + Status::OK, + Category::Normal, + Round::TowardNegative, + ), + ( + max_quad, + n_min_quad, + "-0x1.ffffffffffffffffffffffffffffp-111", + Status::OK, + Category::Normal, + Round::TowardZero, + ), + ( + max_quad, + n_min_quad, + "-0x1.ffffffffffffffffffffffffffffp-111", + Status::OK, + Category::Normal, + Round::NearestTiesToAway, + ), + (max_quad, max_quad, "inf", overflow_status, Category::Infinity, Round::NearestTiesToEven), + (max_quad, max_quad, "inf", overflow_status, Category::Infinity, Round::TowardPositive), + ( + max_quad, + max_quad, + "0x1.ffffffffffffffffffffffffffffp+16383", + Status::INEXACT, + Category::Normal, + Round::TowardNegative, + ), + ( + max_quad, + max_quad, + "0x1.ffffffffffffffffffffffffffffp+16383", + Status::INEXACT, + Category::Normal, + Round::TowardZero, + ), + (max_quad, max_quad, "inf", overflow_status, Category::Infinity, Round::NearestTiesToAway), + (min_quad, min_quad, "0", underflow_status, Category::Zero, Round::NearestTiesToEven), + ( + min_quad, + min_quad, + "0x0.0000000000000000000000000001p-16382", + underflow_status, + Category::Normal, + Round::TowardPositive, + ), + (min_quad, min_quad, "0", underflow_status, Category::Zero, Round::TowardNegative), + (min_quad, min_quad, "0", underflow_status, Category::Zero, Round::TowardZero), + (min_quad, min_quad, "0", underflow_status, Category::Zero, Round::NearestTiesToAway), + (min_quad, n_min_quad, "-0", underflow_status, Category::Zero, Round::NearestTiesToEven), + (min_quad, n_min_quad, "-0", underflow_status, Category::Zero, Round::TowardPositive), + ( + min_quad, + n_min_quad, + "-0x0.0000000000000000000000000001p-16382", + underflow_status, + Category::Normal, + Round::TowardNegative, + ), + (min_quad, n_min_quad, "-0", underflow_status, Category::Zero, Round::TowardZero), + (min_quad, n_min_quad, "-0", underflow_status, Category::Zero, Round::NearestTiesToAway), + ]; - for &(x, y, e_result, e_status, e_category) in &special_cases[..] { + for case @ &(x, y, e_result, e_status, e_category) in &single_special_cases { let status; let result = unpack!(status=, x * y); - assert_eq!(status, e_status); - assert_eq!(result.category(), e_category); - assert!(result.bitwise_eq(e_result.parse::().unwrap())); + assert_eq!(e_status, status); + assert_eq!(e_category, result.category()); + assert!(result.bitwise_eq(e_result.parse::().unwrap()), "result = {result:?}, case = {case:?}"); + } + for case @ &(x, y, e_result, e_status, e_category, round) in &quad_special_cases { + let status; + let result = unpack!(status=, x.mul_r(y, round)); + assert_eq!(e_status, status); + assert_eq!(e_category, result.category()); + assert!(result.bitwise_eq(e_result.parse::().unwrap()), "result = {result:?}, case = {case:?}"); } } @@ -2450,16 +2916,12 @@ fn multiply() { fn divide() { // Test Special Cases against each other and normal values. - // FIXMES/NOTES: - // 1. Since we perform only default exception handling all operations with - // signaling NaNs should have a result that is a quiet NaN. Currently they - // return sNaN. - let p_inf = Single::INFINITY; let m_inf = -Single::INFINITY; let p_zero = Single::ZERO; let m_zero = -Single::ZERO; let qnan = Single::NAN; + let snan = "snan123".parse::().unwrap(); let p_normal_value = "0x1p+0".parse::().unwrap(); let m_normal_value = "-0x1p+0".parse::().unwrap(); let p_largest_value = Single::largest(); @@ -2469,19 +2931,20 @@ fn divide() { let p_smallest_normalized = Single::smallest_normalized(); let m_smallest_normalized = -Single::smallest_normalized(); + let max_quad = "0x1.ffffffffffffffffffffffffffffp+16383".parse::().unwrap(); + let min_quad = "0x0.0000000000000000000000000001p-16382".parse::().unwrap(); + let n_min_quad = "-0x0.0000000000000000000000000001p-16382".parse::().unwrap(); + let overflow_status = Status::OVERFLOW | Status::INEXACT; let underflow_status = Status::UNDERFLOW | Status::INEXACT; - let special_cases = [ + let single_special_cases = [ (p_inf, p_inf, "nan", Status::INVALID_OP, Category::NaN), (p_inf, m_inf, "nan", Status::INVALID_OP, Category::NaN), (p_inf, p_zero, "inf", Status::OK, Category::Infinity), (p_inf, m_zero, "-inf", Status::OK, Category::Infinity), (p_inf, qnan, "nan", Status::OK, Category::NaN), - /* - // See Note 1. - (p_inf, snan, "nan", Status::INVALID_OP, Category::NaN), - */ + (p_inf, snan, "nan123", Status::INVALID_OP, Category::NaN), (p_inf, p_normal_value, "inf", Status::OK, Category::Infinity), (p_inf, m_normal_value, "-inf", Status::OK, Category::Infinity), (p_inf, p_largest_value, "inf", Status::OK, Category::Infinity), @@ -2495,10 +2958,7 @@ fn divide() { (m_inf, p_zero, "-inf", Status::OK, Category::Infinity), (m_inf, m_zero, "inf", Status::OK, Category::Infinity), (m_inf, qnan, "nan", Status::OK, Category::NaN), - /* - // See Note 1. - (m_inf, snan, "nan", Status::INVALID_OP, Category::NaN), - */ + (m_inf, snan, "nan123", Status::INVALID_OP, Category::NaN), (m_inf, p_normal_value, "-inf", Status::OK, Category::Infinity), (m_inf, m_normal_value, "inf", Status::OK, Category::Infinity), (m_inf, p_largest_value, "-inf", Status::OK, Category::Infinity), @@ -2512,10 +2972,7 @@ fn divide() { (p_zero, p_zero, "nan", Status::INVALID_OP, Category::NaN), (p_zero, m_zero, "nan", Status::INVALID_OP, Category::NaN), (p_zero, qnan, "nan", Status::OK, Category::NaN), - /* - // See Note 1. - (p_zero, snan, "nan", Status::INVALID_OP, Category::NaN), - */ + (p_zero, snan, "nan123", Status::INVALID_OP, Category::NaN), (p_zero, p_normal_value, "0x0p+0", Status::OK, Category::Zero), (p_zero, m_normal_value, "-0x0p+0", Status::OK, Category::Zero), (p_zero, p_largest_value, "0x0p+0", Status::OK, Category::Zero), @@ -2529,10 +2986,7 @@ fn divide() { (m_zero, p_zero, "nan", Status::INVALID_OP, Category::NaN), (m_zero, m_zero, "nan", Status::INVALID_OP, Category::NaN), (m_zero, qnan, "nan", Status::OK, Category::NaN), - /* - // See Note 1. - (m_zero, snan, "nan", Status::INVALID_OP, Category::NaN), - */ + (m_zero, snan, "nan123", Status::INVALID_OP, Category::NaN), (m_zero, p_normal_value, "-0x0p+0", Status::OK, Category::Zero), (m_zero, m_normal_value, "0x0p+0", Status::OK, Category::Zero), (m_zero, p_largest_value, "-0x0p+0", Status::OK, Category::Zero), @@ -2546,10 +3000,7 @@ fn divide() { (qnan, p_zero, "nan", Status::OK, Category::NaN), (qnan, m_zero, "nan", Status::OK, Category::NaN), (qnan, qnan, "nan", Status::OK, Category::NaN), - /* - // See Note 1. (qnan, snan, "nan", Status::INVALID_OP, Category::NaN), - */ (qnan, p_normal_value, "nan", Status::OK, Category::NaN), (qnan, m_normal_value, "nan", Status::OK, Category::NaN), (qnan, p_largest_value, "nan", Status::OK, Category::NaN), @@ -2558,32 +3009,26 @@ fn divide() { (qnan, m_smallest_value, "nan", Status::OK, Category::NaN), (qnan, p_smallest_normalized, "nan", Status::OK, Category::NaN), (qnan, m_smallest_normalized, "nan", Status::OK, Category::NaN), - /* - // See Note 1. - (snan, p_inf, "nan", Status::INVALID_OP, Category::NaN), - (snan, m_inf, "nan", Status::INVALID_OP, Category::NaN), - (snan, p_zero, "nan", Status::INVALID_OP, Category::NaN), - (snan, m_zero, "nan", Status::INVALID_OP, Category::NaN), - (snan, qnan, "nan", Status::INVALID_OP, Category::NaN), - (snan, snan, "nan", Status::INVALID_OP, Category::NaN), - (snan, p_normal_value, "nan", Status::INVALID_OP, Category::NaN), - (snan, m_normal_value, "nan", Status::INVALID_OP, Category::NaN), - (snan, p_largest_value, "nan", Status::INVALID_OP, Category::NaN), - (snan, m_largest_value, "nan", Status::INVALID_OP, Category::NaN), - (snan, p_smallest_value, "nan", Status::INVALID_OP, Category::NaN), - (snan, m_smallest_value, "nan", Status::INVALID_OP, Category::NaN), - (snan, p_smallest_normalized, "nan", Status::INVALID_OP, Category::NaN), - (snan, m_smallest_normalized, "nan", Status::INVALID_OP, Category::NaN), - */ + (snan, p_inf, "nan123", Status::INVALID_OP, Category::NaN), + (snan, m_inf, "nan123", Status::INVALID_OP, Category::NaN), + (snan, p_zero, "nan123", Status::INVALID_OP, Category::NaN), + (snan, m_zero, "nan123", Status::INVALID_OP, Category::NaN), + (snan, qnan, "nan123", Status::INVALID_OP, Category::NaN), + (snan, snan, "nan123", Status::INVALID_OP, Category::NaN), + (snan, p_normal_value, "nan123", Status::INVALID_OP, Category::NaN), + (snan, m_normal_value, "nan123", Status::INVALID_OP, Category::NaN), + (snan, p_largest_value, "nan123", Status::INVALID_OP, Category::NaN), + (snan, m_largest_value, "nan123", Status::INVALID_OP, Category::NaN), + (snan, p_smallest_value, "nan123", Status::INVALID_OP, Category::NaN), + (snan, m_smallest_value, "nan123", Status::INVALID_OP, Category::NaN), + (snan, p_smallest_normalized, "nan123", Status::INVALID_OP, Category::NaN), + (snan, m_smallest_normalized, "nan123", Status::INVALID_OP, Category::NaN), (p_normal_value, p_inf, "0x0p+0", Status::OK, Category::Zero), (p_normal_value, m_inf, "-0x0p+0", Status::OK, Category::Zero), (p_normal_value, p_zero, "inf", Status::DIV_BY_ZERO, Category::Infinity), (p_normal_value, m_zero, "-inf", Status::DIV_BY_ZERO, Category::Infinity), (p_normal_value, qnan, "nan", Status::OK, Category::NaN), - /* - // See Note 1. - (p_normal_value, snan, "nan", Status::INVALID_OP, Category::NaN), - */ + (p_normal_value, snan, "nan123", Status::INVALID_OP, Category::NaN), (p_normal_value, p_normal_value, "0x1p+0", Status::OK, Category::Normal), (p_normal_value, m_normal_value, "-0x1p+0", Status::OK, Category::Normal), (p_normal_value, p_largest_value, "0x1p-128", underflow_status, Category::Normal), @@ -2597,10 +3042,7 @@ fn divide() { (m_normal_value, p_zero, "-inf", Status::DIV_BY_ZERO, Category::Infinity), (m_normal_value, m_zero, "inf", Status::DIV_BY_ZERO, Category::Infinity), (m_normal_value, qnan, "nan", Status::OK, Category::NaN), - /* - // See Note 1. - (m_normal_value, snan, "nan", Status::INVALID_OP, Category::NaN), - */ + (m_normal_value, snan, "nan123", Status::INVALID_OP, Category::NaN), (m_normal_value, p_normal_value, "-0x1p+0", Status::OK, Category::Normal), (m_normal_value, m_normal_value, "0x1p+0", Status::OK, Category::Normal), (m_normal_value, p_largest_value, "-0x1p-128", underflow_status, Category::Normal), @@ -2614,10 +3056,7 @@ fn divide() { (p_largest_value, p_zero, "inf", Status::DIV_BY_ZERO, Category::Infinity), (p_largest_value, m_zero, "-inf", Status::DIV_BY_ZERO, Category::Infinity), (p_largest_value, qnan, "nan", Status::OK, Category::NaN), - /* - // See Note 1. - (p_largest_value, snan, "nan", Status::INVALID_OP, Category::NaN), - */ + (p_largest_value, snan, "nan123", Status::INVALID_OP, Category::NaN), (p_largest_value, p_normal_value, "0x1.fffffep+127", Status::OK, Category::Normal), (p_largest_value, m_normal_value, "-0x1.fffffep+127", Status::OK, Category::Normal), (p_largest_value, p_largest_value, "0x1p+0", Status::OK, Category::Normal), @@ -2631,10 +3070,7 @@ fn divide() { (m_largest_value, p_zero, "-inf", Status::DIV_BY_ZERO, Category::Infinity), (m_largest_value, m_zero, "inf", Status::DIV_BY_ZERO, Category::Infinity), (m_largest_value, qnan, "nan", Status::OK, Category::NaN), - /* - // See Note 1. - (m_largest_value, snan, "nan", Status::INVALID_OP, Category::NaN), - */ + (m_largest_value, snan, "nan123", Status::INVALID_OP, Category::NaN), (m_largest_value, p_normal_value, "-0x1.fffffep+127", Status::OK, Category::Normal), (m_largest_value, m_normal_value, "0x1.fffffep+127", Status::OK, Category::Normal), (m_largest_value, p_largest_value, "-0x1p+0", Status::OK, Category::Normal), @@ -2648,10 +3084,7 @@ fn divide() { (p_smallest_value, p_zero, "inf", Status::DIV_BY_ZERO, Category::Infinity), (p_smallest_value, m_zero, "-inf", Status::DIV_BY_ZERO, Category::Infinity), (p_smallest_value, qnan, "nan", Status::OK, Category::NaN), - /* - // See Note 1. - (p_smallest_value, snan, "nan", Status::INVALID_OP, Category::NaN), - */ + (p_smallest_value, snan, "nan123", Status::INVALID_OP, Category::NaN), (p_smallest_value, p_normal_value, "0x1p-149", Status::OK, Category::Normal), (p_smallest_value, m_normal_value, "-0x1p-149", Status::OK, Category::Normal), (p_smallest_value, p_largest_value, "0x0p+0", underflow_status, Category::Zero), @@ -2665,10 +3098,7 @@ fn divide() { (m_smallest_value, p_zero, "-inf", Status::DIV_BY_ZERO, Category::Infinity), (m_smallest_value, m_zero, "inf", Status::DIV_BY_ZERO, Category::Infinity), (m_smallest_value, qnan, "nan", Status::OK, Category::NaN), - /* - // See Note 1. - (m_smallest_value, snan, "nan", Status::INVALID_OP, Category::NaN), - */ + (m_smallest_value, snan, "nan123", Status::INVALID_OP, Category::NaN), (m_smallest_value, p_normal_value, "-0x1p-149", Status::OK, Category::Normal), (m_smallest_value, m_normal_value, "0x1p-149", Status::OK, Category::Normal), (m_smallest_value, p_largest_value, "-0x0p+0", underflow_status, Category::Zero), @@ -2682,10 +3112,7 @@ fn divide() { (p_smallest_normalized, p_zero, "inf", Status::DIV_BY_ZERO, Category::Infinity), (p_smallest_normalized, m_zero, "-inf", Status::DIV_BY_ZERO, Category::Infinity), (p_smallest_normalized, qnan, "nan", Status::OK, Category::NaN), - /* - // See Note 1. - (p_smallest_normalized, snan, "nan", Status::INVALID_OP, Category::NaN), - */ + (p_smallest_normalized, snan, "nan123", Status::INVALID_OP, Category::NaN), (p_smallest_normalized, p_normal_value, "0x1p-126", Status::OK, Category::Normal), (p_smallest_normalized, m_normal_value, "-0x1p-126", Status::OK, Category::Normal), (p_smallest_normalized, p_largest_value, "0x0p+0", underflow_status, Category::Zero), @@ -2699,10 +3126,7 @@ fn divide() { (m_smallest_normalized, p_zero, "-inf", Status::DIV_BY_ZERO, Category::Infinity), (m_smallest_normalized, m_zero, "inf", Status::DIV_BY_ZERO, Category::Infinity), (m_smallest_normalized, qnan, "nan", Status::OK, Category::NaN), - /* - // See Note 1. - (m_smallest_normalized, snan, "nan", Status::INVALID_OP, Category::NaN), - */ + (m_smallest_normalized, snan, "nan123", Status::INVALID_OP, Category::NaN), (m_smallest_normalized, p_normal_value, "-0x1p-126", Status::OK, Category::Normal), (m_smallest_normalized, m_normal_value, "0x1p-126", Status::OK, Category::Normal), (m_smallest_normalized, p_largest_value, "-0x0p+0", underflow_status, Category::Zero), @@ -2712,13 +3136,65 @@ fn divide() { (m_smallest_normalized, p_smallest_normalized, "-0x1p+0", Status::OK, Category::Normal), (m_smallest_normalized, m_smallest_normalized, "0x1p+0", Status::OK, Category::Normal), ]; + let quad_special_cases = [ + (max_quad, n_min_quad, "-inf", overflow_status, Category::Infinity, Round::NearestTiesToEven), + ( + max_quad, + n_min_quad, + "-0x1.ffffffffffffffffffffffffffffp+16383", + Status::INEXACT, + Category::Normal, + Round::TowardPositive, + ), + (max_quad, n_min_quad, "-inf", overflow_status, Category::Infinity, Round::TowardNegative), + ( + max_quad, + n_min_quad, + "-0x1.ffffffffffffffffffffffffffffp+16383", + Status::INEXACT, + Category::Normal, + Round::TowardZero, + ), + (max_quad, n_min_quad, "-inf", overflow_status, Category::Infinity, Round::NearestTiesToAway), + (min_quad, max_quad, "0", underflow_status, Category::Zero, Round::NearestTiesToEven), + ( + min_quad, + max_quad, + "0x0.0000000000000000000000000001p-16382", + underflow_status, + Category::Normal, + Round::TowardPositive, + ), + (min_quad, max_quad, "0", underflow_status, Category::Zero, Round::TowardNegative), + (min_quad, max_quad, "0", underflow_status, Category::Zero, Round::TowardZero), + (min_quad, max_quad, "0", underflow_status, Category::Zero, Round::NearestTiesToAway), + (n_min_quad, max_quad, "-0", underflow_status, Category::Zero, Round::NearestTiesToEven), + (n_min_quad, max_quad, "-0", underflow_status, Category::Zero, Round::TowardPositive), + ( + n_min_quad, + max_quad, + "-0x0.0000000000000000000000000001p-16382", + underflow_status, + Category::Normal, + Round::TowardNegative, + ), + (n_min_quad, max_quad, "-0", underflow_status, Category::Zero, Round::TowardZero), + (n_min_quad, max_quad, "-0", underflow_status, Category::Zero, Round::NearestTiesToAway), + ]; - for &(x, y, e_result, e_status, e_category) in &special_cases[..] { + for case @ &(x, y, e_result, e_status, e_category) in &single_special_cases { let status; let result = unpack!(status=, x / y); - assert_eq!(status, e_status); - assert_eq!(result.category(), e_category); - assert!(result.bitwise_eq(e_result.parse::().unwrap())); + assert_eq!(e_status, status); + assert_eq!(e_category, result.category()); + assert!(result.bitwise_eq(e_result.parse::().unwrap()), "result = {result:?}, case = {case:?}"); + } + for case @ &(x, y, e_result, e_status, e_category, round) in &quad_special_cases { + let status; + let result = unpack!(status=, x.div_r(y, round)); + assert_eq!(e_status, status); + assert_eq!(e_category, result.category()); + assert!(result.bitwise_eq(e_result.parse::().unwrap()), "result = {result:?}, case = {case:?}"); } } @@ -2733,6 +3209,87 @@ fn operator_overloads() { assert!(one.bitwise_eq((two / two).value)); } +#[test] +fn comparisons() { + let vals = [ + /* MNan */ -Single::NAN, + /* MInf */ -Single::INFINITY, + /* MBig */ -Single::largest(), + /* MOne */ "-0x1p+0".parse::().unwrap(), + /* MZer */ -Single::ZERO, + /* PZer */ Single::ZERO, + /* POne */ "0x1p+0".parse::().unwrap(), + /* PBig */ Single::largest(), + /* PInf */ Single::INFINITY, + /* PNan */ Single::NAN, + ]; + + const LT: Option = Some(Ordering::Less); + const EQ: Option = Some(Ordering::Equal); + const GT: Option = Some(Ordering::Greater); + const UN: Option = None; + + // HACK(eddyb) for some reason the first row (MNan) gets formatted differently. + #[rustfmt::skip] + let relations = [ + // -N -I -B -1 -0 +0 +1 +B +I +N + /* MNan */ [UN, UN, UN, UN, UN, UN, UN, UN, UN, UN], + /* MInf */ [UN, EQ, LT, LT, LT, LT, LT, LT, LT, UN], + /* MBig */ [UN, GT, EQ, LT, LT, LT, LT, LT, LT, UN], + /* MOne */ [UN, GT, GT, EQ, LT, LT, LT, LT, LT, UN], + /* MZer */ [UN, GT, GT, GT, EQ, EQ, LT, LT, LT, UN], + /* PZer */ [UN, GT, GT, GT, EQ, EQ, LT, LT, LT, UN], + /* POne */ [UN, GT, GT, GT, GT, GT, EQ, LT, LT, UN], + /* PBig */ [UN, GT, GT, GT, GT, GT, GT, EQ, LT, UN], + /* PInf */ [UN, GT, GT, GT, GT, GT, GT, GT, EQ, UN], + /* PNan */ [UN, UN, UN, UN, UN, UN, UN, UN, UN, UN], + ]; + for (i, &lhs) in vals.iter().enumerate() { + for (j, &rhs) in vals.iter().enumerate() { + let relation = lhs.partial_cmp(&rhs); + assert_eq!(relation, relations[i][j]); + + // NOTE(eddyb) these checks have been kept from the C++ code which didn't + // appear to have a concept like `Option`, but in Rust they + // should be entirely redundant with the single `assert_eq!` above. + match relation { + LT => { + assert!(!(lhs == rhs)); + assert!(lhs != rhs); + assert!(lhs < rhs); + assert!(!(lhs > rhs)); + assert!(lhs <= rhs); + assert!(!(lhs >= rhs)); + } + EQ => { + assert!(lhs == rhs); + assert!(!(lhs != rhs)); + assert!(!(lhs < rhs)); + assert!(!(lhs > rhs)); + assert!(lhs <= rhs); + assert!(lhs >= rhs); + } + GT => { + assert!(!(lhs == rhs)); + assert!(lhs != rhs); + assert!(!(lhs < rhs)); + assert!(lhs > rhs); + assert!(!(lhs <= rhs)); + assert!(lhs >= rhs); + } + UN => { + assert!(!(lhs == rhs)); + assert!(lhs != rhs); + assert!(!(lhs < rhs)); + assert!(!(lhs > rhs)); + assert!(!(lhs <= rhs)); + assert!(!(lhs >= rhs)); + } + } + } + } +} + #[test] fn abs() { let p_inf = Single::INFINITY; @@ -3190,4 +3747,1181 @@ fn modulo() { assert!(unpack!(status=, f1 % f2).is_nan()); assert_eq!(status, Status::INVALID_OP); } + { + let f1 = "-4.0".parse::().unwrap(); + let f2 = "-2.0".parse::().unwrap(); + let expected = "-0.0".parse::().unwrap(); + assert!(unpack!(status=, f1 % f2).bitwise_eq(expected)); + assert_eq!(status, Status::OK); + } + { + let f1 = "-4.0".parse::().unwrap(); + let f2 = "2.0".parse::().unwrap(); + let expected = "-0.0".parse::().unwrap(); + assert!(unpack!(status=, f1 % f2).bitwise_eq(expected)); + assert_eq!(status, Status::OK); + } + { + // Test E4M3FN mod where the LHS exponent is maxExponent (8) and the RHS is + // the max value whose exponent is minExponent (-6). This requires special + // logic in the mod implementation to prevent overflow to NaN. + let f1 = "0x1p8".parse::().unwrap(); // 256 + let f2 = "0x1.ep-6".parse::().unwrap(); // 0.029296875 + let expected = "0x1p-8".parse::().unwrap(); // 0.00390625 + assert!(unpack!(status=, f1 % f2).bitwise_eq(expected)); + assert_eq!(status, Status::OK); + } +} + +#[test] +fn remainder() { + // Test Special Cases against each other and normal values. + + let p_inf = Single::INFINITY; + let m_inf = -Single::INFINITY; + let p_zero = Single::ZERO; + let m_zero = -Single::ZERO; + let qnan = Single::NAN; + let snan = "snan123".parse::().unwrap(); + let p_normal_value = "0x1p+0".parse::().unwrap(); + let m_normal_value = "-0x1p+0".parse::().unwrap(); + let p_largest_value = Single::largest(); + let m_largest_value = -Single::largest(); + let p_smallest_value = Single::SMALLEST; + let m_smallest_value = -Single::SMALLEST; + let p_smallest_normalized = Single::smallest_normalized(); + let m_smallest_normalized = -Single::smallest_normalized(); + + let p_val1 = "0x1.fffffep+126".parse::().unwrap(); + let m_val1 = "-0x1.fffffep+126".parse::().unwrap(); + let p_val2 = "0x1.fffffep-126".parse::().unwrap(); + let m_val2 = "-0x1.fffffep-126".parse::().unwrap(); + let p_val3 = "0x1p-125".parse::().unwrap(); + let m_val3 = "-0x1p-125".parse::().unwrap(); + let p_val4 = "0x1p+127".parse::().unwrap(); + let m_val4 = "-0x1p+127".parse::().unwrap(); + let p_val5 = "1.5".parse::().unwrap(); + let m_val5 = "-1.5".parse::().unwrap(); + let p_val6 = "1".parse::().unwrap(); + let m_val6 = "-1".parse::().unwrap(); + + let special_cases = [ + (p_inf, p_inf, "nan", Status::INVALID_OP, Category::NaN), + (p_inf, m_inf, "nan", Status::INVALID_OP, Category::NaN), + (p_inf, p_zero, "nan", Status::INVALID_OP, Category::NaN), + (p_inf, m_zero, "nan", Status::INVALID_OP, Category::NaN), + (p_inf, qnan, "nan", Status::OK, Category::NaN), + (p_inf, snan, "nan123", Status::INVALID_OP, Category::NaN), + (p_inf, p_normal_value, "nan", Status::INVALID_OP, Category::NaN), + (p_inf, m_normal_value, "nan", Status::INVALID_OP, Category::NaN), + (p_inf, p_largest_value, "nan", Status::INVALID_OP, Category::NaN), + (p_inf, m_largest_value, "nan", Status::INVALID_OP, Category::NaN), + (p_inf, p_smallest_value, "nan", Status::INVALID_OP, Category::NaN), + (p_inf, m_smallest_value, "nan", Status::INVALID_OP, Category::NaN), + (p_inf, p_smallest_normalized, "nan", Status::INVALID_OP, Category::NaN), + (p_inf, m_smallest_normalized, "nan", Status::INVALID_OP, Category::NaN), + (m_inf, p_inf, "nan", Status::INVALID_OP, Category::NaN), + (m_inf, m_inf, "nan", Status::INVALID_OP, Category::NaN), + (m_inf, p_zero, "nan", Status::INVALID_OP, Category::NaN), + (m_inf, m_zero, "nan", Status::INVALID_OP, Category::NaN), + (m_inf, qnan, "nan", Status::OK, Category::NaN), + (m_inf, snan, "nan123", Status::INVALID_OP, Category::NaN), + (m_inf, p_normal_value, "nan", Status::INVALID_OP, Category::NaN), + (m_inf, m_normal_value, "nan", Status::INVALID_OP, Category::NaN), + (m_inf, p_largest_value, "nan", Status::INVALID_OP, Category::NaN), + (m_inf, m_largest_value, "nan", Status::INVALID_OP, Category::NaN), + (m_inf, p_smallest_value, "nan", Status::INVALID_OP, Category::NaN), + (m_inf, m_smallest_value, "nan", Status::INVALID_OP, Category::NaN), + (m_inf, p_smallest_normalized, "nan", Status::INVALID_OP, Category::NaN), + (m_inf, m_smallest_normalized, "nan", Status::INVALID_OP, Category::NaN), + (p_zero, p_inf, "0x0p+0", Status::OK, Category::Zero), + (p_zero, m_inf, "0x0p+0", Status::OK, Category::Zero), + (p_zero, p_zero, "nan", Status::INVALID_OP, Category::NaN), + (p_zero, m_zero, "nan", Status::INVALID_OP, Category::NaN), + (p_zero, qnan, "nan", Status::OK, Category::NaN), + (p_zero, snan, "nan123", Status::INVALID_OP, Category::NaN), + (p_zero, p_normal_value, "0x0p+0", Status::OK, Category::Zero), + (p_zero, m_normal_value, "0x0p+0", Status::OK, Category::Zero), + (p_zero, p_largest_value, "0x0p+0", Status::OK, Category::Zero), + (p_zero, m_largest_value, "0x0p+0", Status::OK, Category::Zero), + (p_zero, p_smallest_value, "0x0p+0", Status::OK, Category::Zero), + (p_zero, m_smallest_value, "0x0p+0", Status::OK, Category::Zero), + (p_zero, p_smallest_normalized, "0x0p+0", Status::OK, Category::Zero), + (p_zero, m_smallest_normalized, "0x0p+0", Status::OK, Category::Zero), + (m_zero, p_inf, "-0x0p+0", Status::OK, Category::Zero), + (m_zero, m_inf, "-0x0p+0", Status::OK, Category::Zero), + (m_zero, p_zero, "nan", Status::INVALID_OP, Category::NaN), + (m_zero, m_zero, "nan", Status::INVALID_OP, Category::NaN), + (m_zero, qnan, "nan", Status::OK, Category::NaN), + (m_zero, snan, "nan123", Status::INVALID_OP, Category::NaN), + (m_zero, p_normal_value, "-0x0p+0", Status::OK, Category::Zero), + (m_zero, m_normal_value, "-0x0p+0", Status::OK, Category::Zero), + (m_zero, p_largest_value, "-0x0p+0", Status::OK, Category::Zero), + (m_zero, m_largest_value, "-0x0p+0", Status::OK, Category::Zero), + (m_zero, p_smallest_value, "-0x0p+0", Status::OK, Category::Zero), + (m_zero, m_smallest_value, "-0x0p+0", Status::OK, Category::Zero), + (m_zero, p_smallest_normalized, "-0x0p+0", Status::OK, Category::Zero), + (m_zero, m_smallest_normalized, "-0x0p+0", Status::OK, Category::Zero), + (qnan, p_inf, "nan", Status::OK, Category::NaN), + (qnan, m_inf, "nan", Status::OK, Category::NaN), + (qnan, p_zero, "nan", Status::OK, Category::NaN), + (qnan, m_zero, "nan", Status::OK, Category::NaN), + (qnan, qnan, "nan", Status::OK, Category::NaN), + (qnan, snan, "nan", Status::INVALID_OP, Category::NaN), + (qnan, p_normal_value, "nan", Status::OK, Category::NaN), + (qnan, m_normal_value, "nan", Status::OK, Category::NaN), + (qnan, p_largest_value, "nan", Status::OK, Category::NaN), + (qnan, m_largest_value, "nan", Status::OK, Category::NaN), + (qnan, p_smallest_value, "nan", Status::OK, Category::NaN), + (qnan, m_smallest_value, "nan", Status::OK, Category::NaN), + (qnan, p_smallest_normalized, "nan", Status::OK, Category::NaN), + (qnan, m_smallest_normalized, "nan", Status::OK, Category::NaN), + (snan, p_inf, "nan123", Status::INVALID_OP, Category::NaN), + (snan, m_inf, "nan123", Status::INVALID_OP, Category::NaN), + (snan, p_zero, "nan123", Status::INVALID_OP, Category::NaN), + (snan, m_zero, "nan123", Status::INVALID_OP, Category::NaN), + (snan, qnan, "nan123", Status::INVALID_OP, Category::NaN), + (snan, snan, "nan123", Status::INVALID_OP, Category::NaN), + (snan, p_normal_value, "nan123", Status::INVALID_OP, Category::NaN), + (snan, m_normal_value, "nan123", Status::INVALID_OP, Category::NaN), + (snan, p_largest_value, "nan123", Status::INVALID_OP, Category::NaN), + (snan, m_largest_value, "nan123", Status::INVALID_OP, Category::NaN), + (snan, p_smallest_value, "nan123", Status::INVALID_OP, Category::NaN), + (snan, m_smallest_value, "nan123", Status::INVALID_OP, Category::NaN), + (snan, p_smallest_normalized, "nan123", Status::INVALID_OP, Category::NaN), + (snan, m_smallest_normalized, "nan123", Status::INVALID_OP, Category::NaN), + (p_normal_value, p_inf, "0x1p+0", Status::OK, Category::Normal), + (p_normal_value, m_inf, "0x1p+0", Status::OK, Category::Normal), + (p_normal_value, p_zero, "nan", Status::INVALID_OP, Category::NaN), + (p_normal_value, m_zero, "nan", Status::INVALID_OP, Category::NaN), + (p_normal_value, qnan, "nan", Status::OK, Category::NaN), + (p_normal_value, snan, "nan123", Status::INVALID_OP, Category::NaN), + (p_normal_value, p_normal_value, "0x0p+0", Status::OK, Category::Zero), + (p_normal_value, m_normal_value, "0x0p+0", Status::OK, Category::Zero), + (p_normal_value, p_largest_value, "0x1p+0", Status::OK, Category::Normal), + (p_normal_value, m_largest_value, "0x1p+0", Status::OK, Category::Normal), + (p_normal_value, p_smallest_value, "0x0p+0", Status::OK, Category::Zero), + (p_normal_value, m_smallest_value, "0x0p+0", Status::OK, Category::Zero), + (p_normal_value, p_smallest_normalized, "0x0p+0", Status::OK, Category::Zero), + (p_normal_value, m_smallest_normalized, "0x0p+0", Status::OK, Category::Zero), + (m_normal_value, p_inf, "-0x1p+0", Status::OK, Category::Normal), + (m_normal_value, m_inf, "-0x1p+0", Status::OK, Category::Normal), + (m_normal_value, p_zero, "nan", Status::INVALID_OP, Category::NaN), + (m_normal_value, m_zero, "nan", Status::INVALID_OP, Category::NaN), + (m_normal_value, qnan, "nan", Status::OK, Category::NaN), + (m_normal_value, snan, "nan123", Status::INVALID_OP, Category::NaN), + (m_normal_value, p_normal_value, "-0x0p+0", Status::OK, Category::Zero), + (m_normal_value, m_normal_value, "-0x0p+0", Status::OK, Category::Zero), + (m_normal_value, p_largest_value, "-0x1p+0", Status::OK, Category::Normal), + (m_normal_value, m_largest_value, "-0x1p+0", Status::OK, Category::Normal), + (m_normal_value, p_smallest_value, "-0x0p+0", Status::OK, Category::Zero), + (m_normal_value, m_smallest_value, "-0x0p+0", Status::OK, Category::Zero), + (m_normal_value, p_smallest_normalized, "-0x0p+0", Status::OK, Category::Zero), + (m_normal_value, m_smallest_normalized, "-0x0p+0", Status::OK, Category::Zero), + (p_largest_value, p_inf, "0x1.fffffep+127", Status::OK, Category::Normal), + (p_largest_value, m_inf, "0x1.fffffep+127", Status::OK, Category::Normal), + (p_largest_value, p_zero, "nan", Status::INVALID_OP, Category::NaN), + (p_largest_value, m_zero, "nan", Status::INVALID_OP, Category::NaN), + (p_largest_value, qnan, "nan", Status::OK, Category::NaN), + (p_largest_value, snan, "nan123", Status::INVALID_OP, Category::NaN), + (p_largest_value, p_normal_value, "0x0p+0", Status::OK, Category::Zero), + (p_largest_value, m_normal_value, "0x0p+0", Status::OK, Category::Zero), + (p_largest_value, p_largest_value, "0x0p+0", Status::OK, Category::Zero), + (p_largest_value, m_largest_value, "0x0p+0", Status::OK, Category::Zero), + (p_largest_value, p_smallest_value, "0x0p+0", Status::OK, Category::Zero), + (p_largest_value, m_smallest_value, "0x0p+0", Status::OK, Category::Zero), + (p_largest_value, p_smallest_normalized, "0x0p+0", Status::OK, Category::Zero), + (p_largest_value, m_smallest_normalized, "0x0p+0", Status::OK, Category::Zero), + (m_largest_value, p_inf, "-0x1.fffffep+127", Status::OK, Category::Normal), + (m_largest_value, m_inf, "-0x1.fffffep+127", Status::OK, Category::Normal), + (m_largest_value, p_zero, "nan", Status::INVALID_OP, Category::NaN), + (m_largest_value, m_zero, "nan", Status::INVALID_OP, Category::NaN), + (m_largest_value, qnan, "nan", Status::OK, Category::NaN), + (m_largest_value, snan, "nan123", Status::INVALID_OP, Category::NaN), + (m_largest_value, p_normal_value, "-0x0p+0", Status::OK, Category::Zero), + (m_largest_value, m_normal_value, "-0x0p+0", Status::OK, Category::Zero), + (m_largest_value, p_largest_value, "-0x0p+0", Status::OK, Category::Zero), + (m_largest_value, m_largest_value, "-0x0p+0", Status::OK, Category::Zero), + (m_largest_value, p_smallest_value, "-0x0p+0", Status::OK, Category::Zero), + (m_largest_value, m_smallest_value, "-0x0p+0", Status::OK, Category::Zero), + (m_largest_value, p_smallest_normalized, "-0x0p+0", Status::OK, Category::Zero), + (m_largest_value, m_smallest_normalized, "-0x0p+0", Status::OK, Category::Zero), + (p_smallest_value, p_inf, "0x1p-149", Status::OK, Category::Normal), + (p_smallest_value, m_inf, "0x1p-149", Status::OK, Category::Normal), + (p_smallest_value, p_zero, "nan", Status::INVALID_OP, Category::NaN), + (p_smallest_value, m_zero, "nan", Status::INVALID_OP, Category::NaN), + (p_smallest_value, qnan, "nan", Status::OK, Category::NaN), + (p_smallest_value, snan, "nan123", Status::INVALID_OP, Category::NaN), + (p_smallest_value, p_normal_value, "0x1p-149", Status::OK, Category::Normal), + (p_smallest_value, m_normal_value, "0x1p-149", Status::OK, Category::Normal), + (p_smallest_value, p_largest_value, "0x1p-149", Status::OK, Category::Normal), + (p_smallest_value, m_largest_value, "0x1p-149", Status::OK, Category::Normal), + (p_smallest_value, p_smallest_value, "0x0p+0", Status::OK, Category::Zero), + (p_smallest_value, m_smallest_value, "0x0p+0", Status::OK, Category::Zero), + (p_smallest_value, p_smallest_normalized, "0x1p-149", Status::OK, Category::Normal), + (p_smallest_value, m_smallest_normalized, "0x1p-149", Status::OK, Category::Normal), + (m_smallest_value, p_inf, "-0x1p-149", Status::OK, Category::Normal), + (m_smallest_value, m_inf, "-0x1p-149", Status::OK, Category::Normal), + (m_smallest_value, p_zero, "nan", Status::INVALID_OP, Category::NaN), + (m_smallest_value, m_zero, "nan", Status::INVALID_OP, Category::NaN), + (m_smallest_value, qnan, "nan", Status::OK, Category::NaN), + (m_smallest_value, snan, "nan123", Status::INVALID_OP, Category::NaN), + (m_smallest_value, p_normal_value, "-0x1p-149", Status::OK, Category::Normal), + (m_smallest_value, m_normal_value, "-0x1p-149", Status::OK, Category::Normal), + (m_smallest_value, p_largest_value, "-0x1p-149", Status::OK, Category::Normal), + (m_smallest_value, m_largest_value, "-0x1p-149", Status::OK, Category::Normal), + (m_smallest_value, p_smallest_value, "-0x0p+0", Status::OK, Category::Zero), + (m_smallest_value, m_smallest_value, "-0x0p+0", Status::OK, Category::Zero), + (m_smallest_value, p_smallest_normalized, "-0x1p-149", Status::OK, Category::Normal), + (m_smallest_value, m_smallest_normalized, "-0x1p-149", Status::OK, Category::Normal), + (p_smallest_normalized, p_inf, "0x1p-126", Status::OK, Category::Normal), + (p_smallest_normalized, m_inf, "0x1p-126", Status::OK, Category::Normal), + (p_smallest_normalized, p_zero, "nan", Status::INVALID_OP, Category::NaN), + (p_smallest_normalized, m_zero, "nan", Status::INVALID_OP, Category::NaN), + (p_smallest_normalized, qnan, "nan", Status::OK, Category::NaN), + (p_smallest_normalized, snan, "nan123", Status::INVALID_OP, Category::NaN), + (p_smallest_normalized, p_normal_value, "0x1p-126", Status::OK, Category::Normal), + (p_smallest_normalized, m_normal_value, "0x1p-126", Status::OK, Category::Normal), + (p_smallest_normalized, p_largest_value, "0x1p-126", Status::OK, Category::Normal), + (p_smallest_normalized, m_largest_value, "0x1p-126", Status::OK, Category::Normal), + (p_smallest_normalized, p_smallest_value, "0x0p+0", Status::OK, Category::Zero), + (p_smallest_normalized, m_smallest_value, "0x0p+0", Status::OK, Category::Zero), + (p_smallest_normalized, p_smallest_normalized, "0x0p+0", Status::OK, Category::Zero), + (p_smallest_normalized, m_smallest_normalized, "0x0p+0", Status::OK, Category::Zero), + (m_smallest_normalized, p_inf, "-0x1p-126", Status::OK, Category::Normal), + (m_smallest_normalized, m_inf, "-0x1p-126", Status::OK, Category::Normal), + (m_smallest_normalized, p_zero, "nan", Status::INVALID_OP, Category::NaN), + (m_smallest_normalized, m_zero, "nan", Status::INVALID_OP, Category::NaN), + (m_smallest_normalized, qnan, "nan", Status::OK, Category::NaN), + (m_smallest_normalized, snan, "nan123", Status::INVALID_OP, Category::NaN), + (m_smallest_normalized, p_normal_value, "-0x1p-126", Status::OK, Category::Normal), + (m_smallest_normalized, m_normal_value, "-0x1p-126", Status::OK, Category::Normal), + (m_smallest_normalized, p_largest_value, "-0x1p-126", Status::OK, Category::Normal), + (m_smallest_normalized, m_largest_value, "-0x1p-126", Status::OK, Category::Normal), + (m_smallest_normalized, p_smallest_value, "-0x0p+0", Status::OK, Category::Zero), + (m_smallest_normalized, m_smallest_value, "-0x0p+0", Status::OK, Category::Zero), + (m_smallest_normalized, p_smallest_normalized, "-0x0p+0", Status::OK, Category::Zero), + (m_smallest_normalized, m_smallest_normalized, "-0x0p+0", Status::OK, Category::Zero), + (p_val1, p_val1, "0x0p+0", Status::OK, Category::Zero), + (p_val1, m_val1, "0x0p+0", Status::OK, Category::Zero), + (p_val1, p_val2, "0x0p+0", Status::OK, Category::Zero), + (p_val1, m_val2, "0x0p+0", Status::OK, Category::Zero), + (p_val1, p_val3, "0x0p+0", Status::OK, Category::Zero), + (p_val1, m_val3, "0x0p+0", Status::OK, Category::Zero), + (p_val1, p_val4, "-0x1p+103", Status::OK, Category::Normal), + (p_val1, m_val4, "-0x1p+103", Status::OK, Category::Normal), + (p_val1, p_val5, "0x0p+0", Status::OK, Category::Zero), + (p_val1, m_val5, "0x0p+0", Status::OK, Category::Zero), + (p_val1, p_val6, "0x0p+0", Status::OK, Category::Zero), + (p_val1, m_val6, "0x0p+0", Status::OK, Category::Zero), + (m_val1, p_val1, "-0x0p+0", Status::OK, Category::Zero), + (m_val1, m_val1, "-0x0p+0", Status::OK, Category::Zero), + (m_val1, p_val2, "-0x0p+0", Status::OK, Category::Zero), + (m_val1, m_val2, "-0x0p+0", Status::OK, Category::Zero), + (m_val1, p_val3, "-0x0p+0", Status::OK, Category::Zero), + (m_val1, m_val3, "-0x0p+0", Status::OK, Category::Zero), + (m_val1, p_val4, "0x1p+103", Status::OK, Category::Normal), + (m_val1, m_val4, "0x1p+103", Status::OK, Category::Normal), + (m_val1, p_val5, "-0x0p+0", Status::OK, Category::Zero), + (m_val1, m_val5, "-0x0p+0", Status::OK, Category::Zero), + (m_val1, p_val6, "-0x0p+0", Status::OK, Category::Zero), + (m_val1, m_val6, "-0x0p+0", Status::OK, Category::Zero), + (p_val2, p_val1, "0x1.fffffep-126", Status::OK, Category::Normal), + (p_val2, m_val1, "0x1.fffffep-126", Status::OK, Category::Normal), + (p_val2, p_val2, "0x0p+0", Status::OK, Category::Zero), + (p_val2, m_val2, "0x0p+0", Status::OK, Category::Zero), + (p_val2, p_val3, "-0x0.000002p-126", Status::OK, Category::Normal), + (p_val2, m_val3, "-0x0.000002p-126", Status::OK, Category::Normal), + (p_val2, p_val4, "0x1.fffffep-126", Status::OK, Category::Normal), + (p_val2, m_val4, "0x1.fffffep-126", Status::OK, Category::Normal), + (p_val2, p_val5, "0x1.fffffep-126", Status::OK, Category::Normal), + (p_val2, m_val5, "0x1.fffffep-126", Status::OK, Category::Normal), + (p_val2, p_val6, "0x1.fffffep-126", Status::OK, Category::Normal), + (p_val2, m_val6, "0x1.fffffep-126", Status::OK, Category::Normal), + (m_val2, p_val1, "-0x1.fffffep-126", Status::OK, Category::Normal), + (m_val2, m_val1, "-0x1.fffffep-126", Status::OK, Category::Normal), + (m_val2, p_val2, "-0x0p+0", Status::OK, Category::Zero), + (m_val2, m_val2, "-0x0p+0", Status::OK, Category::Zero), + (m_val2, p_val3, "0x0.000002p-126", Status::OK, Category::Normal), + (m_val2, m_val3, "0x0.000002p-126", Status::OK, Category::Normal), + (m_val2, p_val4, "-0x1.fffffep-126", Status::OK, Category::Normal), + (m_val2, m_val4, "-0x1.fffffep-126", Status::OK, Category::Normal), + (m_val2, p_val5, "-0x1.fffffep-126", Status::OK, Category::Normal), + (m_val2, m_val5, "-0x1.fffffep-126", Status::OK, Category::Normal), + (m_val2, p_val6, "-0x1.fffffep-126", Status::OK, Category::Normal), + (m_val2, m_val6, "-0x1.fffffep-126", Status::OK, Category::Normal), + (p_val3, p_val1, "0x1p-125", Status::OK, Category::Normal), + (p_val3, m_val1, "0x1p-125", Status::OK, Category::Normal), + (p_val3, p_val2, "0x0.000002p-126", Status::OK, Category::Normal), + (p_val3, m_val2, "0x0.000002p-126", Status::OK, Category::Normal), + (p_val3, p_val3, "0x0p+0", Status::OK, Category::Zero), + (p_val3, m_val3, "0x0p+0", Status::OK, Category::Zero), + (p_val3, p_val4, "0x1p-125", Status::OK, Category::Normal), + (p_val3, m_val4, "0x1p-125", Status::OK, Category::Normal), + (p_val3, p_val5, "0x1p-125", Status::OK, Category::Normal), + (p_val3, m_val5, "0x1p-125", Status::OK, Category::Normal), + (p_val3, p_val6, "0x1p-125", Status::OK, Category::Normal), + (p_val3, m_val6, "0x1p-125", Status::OK, Category::Normal), + (m_val3, p_val1, "-0x1p-125", Status::OK, Category::Normal), + (m_val3, m_val1, "-0x1p-125", Status::OK, Category::Normal), + (m_val3, p_val2, "-0x0.000002p-126", Status::OK, Category::Normal), + (m_val3, m_val2, "-0x0.000002p-126", Status::OK, Category::Normal), + (m_val3, p_val3, "-0x0p+0", Status::OK, Category::Zero), + (m_val3, m_val3, "-0x0p+0", Status::OK, Category::Zero), + (m_val3, p_val4, "-0x1p-125", Status::OK, Category::Normal), + (m_val3, m_val4, "-0x1p-125", Status::OK, Category::Normal), + (m_val3, p_val5, "-0x1p-125", Status::OK, Category::Normal), + (m_val3, m_val5, "-0x1p-125", Status::OK, Category::Normal), + (m_val3, p_val6, "-0x1p-125", Status::OK, Category::Normal), + (m_val3, m_val6, "-0x1p-125", Status::OK, Category::Normal), + (p_val4, p_val1, "0x1p+103", Status::OK, Category::Normal), + (p_val4, m_val1, "0x1p+103", Status::OK, Category::Normal), + (p_val4, p_val2, "0x0.002p-126", Status::OK, Category::Normal), + (p_val4, m_val2, "0x0.002p-126", Status::OK, Category::Normal), + (p_val4, p_val3, "0x0p+0", Status::OK, Category::Zero), + (p_val4, m_val3, "0x0p+0", Status::OK, Category::Zero), + (p_val4, p_val4, "0x0p+0", Status::OK, Category::Zero), + (p_val4, m_val4, "0x0p+0", Status::OK, Category::Zero), + (p_val4, p_val5, "0.5", Status::OK, Category::Normal), + (p_val4, m_val5, "0.5", Status::OK, Category::Normal), + (p_val4, p_val6, "0x0p+0", Status::OK, Category::Zero), + (p_val4, m_val6, "0x0p+0", Status::OK, Category::Zero), + (m_val4, p_val1, "-0x1p+103", Status::OK, Category::Normal), + (m_val4, m_val1, "-0x1p+103", Status::OK, Category::Normal), + (m_val4, p_val2, "-0x0.002p-126", Status::OK, Category::Normal), + (m_val4, m_val2, "-0x0.002p-126", Status::OK, Category::Normal), + (m_val4, p_val3, "-0x0p+0", Status::OK, Category::Zero), + (m_val4, m_val3, "-0x0p+0", Status::OK, Category::Zero), + (m_val4, p_val4, "-0x0p+0", Status::OK, Category::Zero), + (m_val4, m_val4, "-0x0p+0", Status::OK, Category::Zero), + (m_val4, p_val5, "-0.5", Status::OK, Category::Normal), + (m_val4, m_val5, "-0.5", Status::OK, Category::Normal), + (m_val4, p_val6, "-0x0p+0", Status::OK, Category::Zero), + (m_val4, m_val6, "-0x0p+0", Status::OK, Category::Zero), + (p_val5, p_val1, "1.5", Status::OK, Category::Normal), + (p_val5, m_val1, "1.5", Status::OK, Category::Normal), + (p_val5, p_val2, "0x0.00006p-126", Status::OK, Category::Normal), + (p_val5, m_val2, "0x0.00006p-126", Status::OK, Category::Normal), + (p_val5, p_val3, "0x0p+0", Status::OK, Category::Zero), + (p_val5, m_val3, "0x0p+0", Status::OK, Category::Zero), + (p_val5, p_val4, "1.5", Status::OK, Category::Normal), + (p_val5, m_val4, "1.5", Status::OK, Category::Normal), + (p_val5, p_val5, "0x0p+0", Status::OK, Category::Zero), + (p_val5, m_val5, "0x0p+0", Status::OK, Category::Zero), + (p_val5, p_val6, "-0.5", Status::OK, Category::Normal), + (p_val5, m_val6, "-0.5", Status::OK, Category::Normal), + (m_val5, p_val1, "-1.5", Status::OK, Category::Normal), + (m_val5, m_val1, "-1.5", Status::OK, Category::Normal), + (m_val5, p_val2, "-0x0.00006p-126", Status::OK, Category::Normal), + (m_val5, m_val2, "-0x0.00006p-126", Status::OK, Category::Normal), + (m_val5, p_val3, "-0x0p+0", Status::OK, Category::Zero), + (m_val5, m_val3, "-0x0p+0", Status::OK, Category::Zero), + (m_val5, p_val4, "-1.5", Status::OK, Category::Normal), + (m_val5, m_val4, "-1.5", Status::OK, Category::Normal), + (m_val5, p_val5, "-0x0p+0", Status::OK, Category::Zero), + (m_val5, m_val5, "-0x0p+0", Status::OK, Category::Zero), + (m_val5, p_val6, "0.5", Status::OK, Category::Normal), + (m_val5, m_val6, "0.5", Status::OK, Category::Normal), + (p_val6, p_val1, "0x1p+0", Status::OK, Category::Normal), + (p_val6, m_val1, "0x1p+0", Status::OK, Category::Normal), + (p_val6, p_val2, "0x0.00004p-126", Status::OK, Category::Normal), + (p_val6, m_val2, "0x0.00004p-126", Status::OK, Category::Normal), + (p_val6, p_val3, "0x0p+0", Status::OK, Category::Zero), + (p_val6, m_val3, "0x0p+0", Status::OK, Category::Zero), + (p_val6, p_val4, "0x1p+0", Status::OK, Category::Normal), + (p_val6, m_val4, "0x1p+0", Status::OK, Category::Normal), + (p_val6, p_val5, "-0.5", Status::OK, Category::Normal), + (p_val6, m_val5, "-0.5", Status::OK, Category::Normal), + (p_val6, p_val6, "0x0p+0", Status::OK, Category::Zero), + (p_val6, m_val6, "0x0p+0", Status::OK, Category::Zero), + (m_val6, p_val1, "-0x1p+0", Status::OK, Category::Normal), + (m_val6, m_val1, "-0x1p+0", Status::OK, Category::Normal), + (m_val6, p_val2, "-0x0.00004p-126", Status::OK, Category::Normal), + (m_val6, m_val2, "-0x0.00004p-126", Status::OK, Category::Normal), + (m_val6, p_val3, "-0x0p+0", Status::OK, Category::Zero), + (m_val6, m_val3, "-0x0p+0", Status::OK, Category::Zero), + (m_val6, p_val4, "-0x1p+0", Status::OK, Category::Normal), + (m_val6, m_val4, "-0x1p+0", Status::OK, Category::Normal), + (m_val6, p_val5, "0.5", Status::OK, Category::Normal), + (m_val6, m_val5, "0.5", Status::OK, Category::Normal), + (m_val6, p_val6, "-0x0p+0", Status::OK, Category::Zero), + (m_val6, m_val6, "-0x0p+0", Status::OK, Category::Zero), + ]; + + for case @ &(x, y, e_result, e_status, e_category) in &special_cases { + let status; + let result = unpack!(status=, x.ieee_rem(y)); + assert_eq!(e_status, status); + assert_eq!(e_category, result.category()); + assert!(result.bitwise_eq(e_result.parse::().unwrap()), "result = {result:?}, case = {case:?}"); + } + + let mut status; + { + let f1 = "0x1.3333333333333p-2".parse::().unwrap(); // 0.3 + let f2 = "0x1.47ae147ae147bp-7".parse::().unwrap(); // 0.01 + let expected = "-0x1.4p-56".parse::().unwrap(); + assert!(unpack!(status=, f1.ieee_rem(f2)).bitwise_eq(expected)); + assert_eq!(status, Status::OK); + } + { + let f1 = "0x1p64".parse::().unwrap(); // 1.8446744073709552e19 + let f2 = "1.5".parse::().unwrap(); + let expected = "-0.5".parse::().unwrap(); + assert!(unpack!(status=, f1.ieee_rem(f2)).bitwise_eq(expected)); + assert_eq!(status, Status::OK); + } + { + let f1 = "0x1p1000".parse::().unwrap(); + let f2 = "0x1p-1000".parse::().unwrap(); + let expected = "0.0".parse::().unwrap(); + assert!(unpack!(status=, f1.ieee_rem(f2)).bitwise_eq(expected)); + assert_eq!(status, Status::OK); + } + { + let f1 = Double::INFINITY; + let f2 = "1.0".parse::().unwrap(); + assert!(unpack!(status=, f1.ieee_rem(f2)).is_nan()); + assert_eq!(status, Status::INVALID_OP); + } + { + let f1 = "-4.0".parse::().unwrap(); + let f2 = "-2.0".parse::().unwrap(); + let expected = "-0.0".parse::().unwrap(); + assert!(unpack!(status=, f1.ieee_rem(f2)).bitwise_eq(expected)); + assert_eq!(status, Status::OK); + } + { + let f1 = "-4.0".parse::().unwrap(); + let f2 = "2.0".parse::().unwrap(); + let expected = "-0.0".parse::().unwrap(); + assert!(unpack!(status=, f1.ieee_rem(f2)).bitwise_eq(expected)); + assert_eq!(status, Status::OK); + } +} + +#[test] +fn x87_largest() { + assert!(X87DoubleExtended::largest().is_largest()); +} + +#[test] +fn x87_next() { + assert_eq!("-1.0".parse::().unwrap().next_up().value.ilogb(), -1); +} + +#[test] +fn convert_e4m3fn_to_e5m2() { + let mut status; + let mut loses_info = false; + + let test = "1.0".parse::().unwrap(); + let test: Float8E5M2 = unpack!(status=, test.convert(&mut loses_info)); + assert_eq!(1.0, test.to_f32()); + assert!(!loses_info); + assert_eq!(status, Status::OK); + + let test = "0.0".parse::().unwrap(); + let test: Float8E5M2 = unpack!(status=, test.convert(&mut loses_info)); + assert_eq!(0.0, test.to_f32()); + assert!(!loses_info); + assert_eq!(status, Status::OK); + + let test = "0x1.2p0".parse::().unwrap(); // 1.125 + let test: Float8E5M2 = unpack!(status=, test.convert(&mut loses_info)); + assert_eq!(/* 0x1.0p0 */ 1.0, test.to_f32()); + assert!(loses_info); + assert_eq!(status, Status::INEXACT); + + let test = "0x1.6p0".parse::().unwrap(); // 1.375 + let test: Float8E5M2 = unpack!(status=, test.convert(&mut loses_info)); + assert_eq!(/* 0x1.8p0 */ 1.5, test.to_f32()); + assert!(loses_info); + assert_eq!(status, Status::INEXACT); + + // Convert E4M3 denormal to E5M2 normal. Should not be truncated, despite the + // destination format having one fewer significand bit + let test = "0x1.Cp-7".parse::().unwrap(); + let test: Float8E5M2 = unpack!(status=, test.convert(&mut loses_info)); + assert_eq!(/* 0x1.Cp-7 */ 0.013671875, test.to_f32()); + assert!(!loses_info); + assert_eq!(status, Status::OK); + + // Test convert from NaN + let test = "nan".parse::().unwrap(); + let test: Float8E5M2 = unpack!(status=, test.convert(&mut loses_info)); + assert!(test.to_f32().is_nan()); + assert!(!loses_info); + assert_eq!(status, Status::OK); +} + +#[test] +fn convert_e5m2_to_e4m3fn() { + let mut status; + let mut loses_info = false; + + let test = "1.0".parse::().unwrap(); + let test: Float8E4M3FN = unpack!(status=, test.convert(&mut loses_info)); + assert_eq!(1.0, test.to_f32()); + assert!(!loses_info); + assert_eq!(status, Status::OK); + + let test = "0.0".parse::().unwrap(); + let test: Float8E4M3FN = unpack!(status=, test.convert(&mut loses_info)); + assert_eq!(0.0, test.to_f32()); + assert!(!loses_info); + assert_eq!(status, Status::OK); + + let test = "0x1.Cp8".parse::().unwrap(); // 448 + let test: Float8E4M3FN = unpack!(status=, test.convert(&mut loses_info)); + assert_eq!(/* 0x1.Cp8 */ 448.0, test.to_f32()); + assert!(!loses_info); + assert_eq!(status, Status::OK); + + // Test overflow + let test = "0x1.0p9".parse::().unwrap(); // 512 + let test: Float8E4M3FN = unpack!(status=, test.convert(&mut loses_info)); + assert!(test.to_f32().is_nan()); + assert!(loses_info); + assert_eq!(status, Status::OVERFLOW | Status::INEXACT); + + // Test underflow + let test = "0x1.0p-10".parse::().unwrap(); + let test: Float8E4M3FN = unpack!(status=, test.convert(&mut loses_info)); + assert_eq!(0., test.to_f32()); + assert!(loses_info); + assert_eq!(status, Status::UNDERFLOW | Status::INEXACT); + + // Test rounding up to smallest denormal number + let test = "0x1.8p-10".parse::().unwrap(); + let test: Float8E4M3FN = unpack!(status=, test.convert(&mut loses_info)); + assert_eq!(/* 0x1.0p-9 */ 0.001953125, test.to_f32()); + assert!(loses_info); + assert_eq!(status, Status::UNDERFLOW | Status::INEXACT); + + // Testing inexact rounding to denormal number + let test = "0x1.8p-9".parse::().unwrap(); + let test: Float8E4M3FN = unpack!(status=, test.convert(&mut loses_info)); + assert_eq!(/* 0x1.0p-8 */ 0.00390625, test.to_f32()); + assert!(loses_info); + assert_eq!(status, Status::UNDERFLOW | Status::INEXACT); + + let nan = "nan".parse::().unwrap(); + + // Testing convert from Inf + let test = "inf".parse::().unwrap(); + let test: Float8E4M3FN = unpack!(status=, test.convert(&mut loses_info)); + assert!(test.to_f32().is_nan()); + assert!(loses_info); + assert_eq!(status, Status::INEXACT); + assert!(test.bitwise_eq(nan)); + + // Testing convert from quiet NaN + let test = "nan".parse::().unwrap(); + let test: Float8E4M3FN = unpack!(status=, test.convert(&mut loses_info)); + assert!(test.to_f32().is_nan()); + assert!(loses_info); + assert_eq!(status, Status::OK); + assert!(test.bitwise_eq(nan)); + + // Testing convert from signaling NaN + let test = "snan".parse::().unwrap(); + let test: Float8E4M3FN = unpack!(status=, test.convert(&mut loses_info)); + assert!(test.to_f32().is_nan()); + assert!(loses_info); + assert_eq!(status, Status::INVALID_OP); + assert!(test.bitwise_eq(nan)); +} + +#[test] +fn float8e4m3fn_infinity() { + let t = Float8E4M3FN::INFINITY; + assert!(t.is_nan()); + assert!(!t.is_infinite()); +} + +#[test] +fn float8e4m3fn_from_string() { + // Exactly representable + assert_eq!(448.0, "448".parse::().unwrap().to_f64()); + // Round down to maximum value + assert_eq!(448.0, "464".parse::().unwrap().to_f64()); + // Round up, causing overflow to NaN + assert!("465".parse::().unwrap().is_nan()); + // Overflow without rounding + assert!("480".parse::().unwrap().is_nan()); + // Inf converted to NaN + assert!("inf".parse::().unwrap().is_nan()); + // NaN converted to NaN + assert!("nan".parse::().unwrap().is_nan()); +} + +#[test] +fn float8e4m3fn_add() { + let qnan = Float8E4M3FN::NAN; + + let from_str = |s: &str| s.parse::().unwrap(); + + let addition_tests = [ + // Test addition operations involving NaN, overflow, and the max E4M3 + // value (448) because E4M3 differs from IEEE-754 types in these regards + (from_str("448"), from_str("16"), "448", Status::INEXACT, Category::Normal, Round::NearestTiesToEven), + ( + from_str("448"), + from_str("18"), + "NaN", + Status::OVERFLOW | Status::INEXACT, + Category::NaN, + Round::NearestTiesToEven, + ), + ( + from_str("448"), + from_str("32"), + "NaN", + Status::OVERFLOW | Status::INEXACT, + Category::NaN, + Round::NearestTiesToEven, + ), + ( + from_str("-448"), + from_str("-32"), + "-NaN", + Status::OVERFLOW | Status::INEXACT, + Category::NaN, + Round::NearestTiesToEven, + ), + (qnan, from_str("-448"), "NaN", Status::OK, Category::NaN, Round::NearestTiesToEven), + (from_str("448"), from_str("-32"), "416", Status::OK, Category::Normal, Round::NearestTiesToEven), + (from_str("448"), from_str("0"), "448", Status::OK, Category::Normal, Round::NearestTiesToEven), + (from_str("448"), from_str("32"), "448", Status::INEXACT, Category::Normal, Round::TowardZero), + (from_str("448"), from_str("448"), "448", Status::INEXACT, Category::Normal, Round::TowardZero), + ]; + + for case @ &(x, y, e_result, e_status, e_category, round) in &addition_tests { + let status; + let result = unpack!(status=, x.add_r(y, round)); + assert_eq!(e_status, status); + assert_eq!(e_category, result.category()); + assert!(result.bitwise_eq(e_result.parse::().unwrap()), "result = {result:?}, case = {case:?}"); + } +} + +#[test] +fn float8e4m3fn_divide_by_zero() { + let x = "1".parse::().unwrap(); + let zero = "0".parse::().unwrap(); + let status; + assert!(unpack!(status=, x / zero).is_nan()); + assert_eq!(status, Status::DIV_BY_ZERO); +} + +#[test] +fn float8e4m3fn_next() { + let mut status; + + // nextUp on positive numbers + for i in 0..127 { + let test = Float8E4M3FN::from_bits(i); + let expected = Float8E4M3FN::from_bits(i + 1); + assert!(unpack!(status=, test.next_up()).bitwise_eq(expected)); + assert_eq!(status, Status::OK); + } + + // nextUp on negative zero + let test = -Float8E4M3FN::ZERO; + let expected = Float8E4M3FN::SMALLEST; + assert!(unpack!(status=, test.next_up()).bitwise_eq(expected)); + assert_eq!(status, Status::OK); + + // nextUp on negative nonzero numbers + for i in 129..255 { + let test = Float8E4M3FN::from_bits(i); + let expected = Float8E4M3FN::from_bits(i - 1); + assert!(unpack!(status=, test.next_up()).bitwise_eq(expected)); + assert_eq!(status, Status::OK); + } + + // nextUp on NaN + let test = Float8E4M3FN::qnan(None); + let expected = Float8E4M3FN::qnan(None); + assert!(unpack!(status=, test.next_up()).bitwise_eq(expected)); + assert_eq!(status, Status::OK); + + // nextDown on positive nonzero finite numbers + for i in 1..127 { + let test = Float8E4M3FN::from_bits(i); + let expected = Float8E4M3FN::from_bits(i - 1); + assert!(unpack!(status=, test.next_down()).bitwise_eq(expected)); + assert_eq!(status, Status::OK); + } + + // nextDown on positive zero + let test = -Float8E4M3FN::ZERO; + let expected = -Float8E4M3FN::SMALLEST; + assert!(unpack!(status=, test.next_down()).bitwise_eq(expected)); + assert_eq!(status, Status::OK); + + // nextDown on negative finite numbers + for i in 128..255 { + let test = Float8E4M3FN::from_bits(i); + let expected = Float8E4M3FN::from_bits(i + 1); + assert!(unpack!(status=, test.next_down()).bitwise_eq(expected)); + assert_eq!(status, Status::OK); + } + + // nextDown on NaN + let test = Float8E4M3FN::qnan(None); + let expected = Float8E4M3FN::qnan(None); + assert!(unpack!(status=, test.next_down()).bitwise_eq(expected)); + assert_eq!(status, Status::OK); +} + +#[test] +fn float8e4m3fn_exhaustive() { + // Test each of the 256 Float8E4M3FN values. + for i in 0..=u8::MAX { + let test = Float8E4M3FN::from_bits(i.into()); + + // isLargest + if i == 126 || i == 254 { + assert!(test.is_largest()); + assert_eq!(test.abs().to_f64(), 448.); + } else { + assert!(!test.is_largest()); + } + + // isSmallest + if i == 1 || i == 129 { + assert!(test.is_smallest()); + assert_eq!(test.abs().to_f64(), /* 0x1p-9 */ 0.001953125); + } else { + assert!(!test.is_smallest()); + } + + // convert to BFloat + let status; + let mut loses_info = false; + let test2: BFloat = unpack!(status=, test.convert(&mut loses_info)); + assert_eq!(status, Status::OK); + assert!(!loses_info); + if i == 127 || i == 255 { + assert!(test2.is_nan()); + } else { + assert_eq!(test.to_f32(), test2.to_f32()); + } + + // bitcastToAPInt + assert_eq!(u128::from(i), test.to_bits()); + } +} + +#[test] +fn float8e4m3fn_exhaustive_pair() { + // Test each pair of Float8E4M3FN values. + for i in 0..=u8::MAX { + for j in 0..=u8::MAX { + let x = Float8E4M3FN::from_bits(i.into()); + let y = Float8E4M3FN::from_bits(j.into()); + + let mut loses_info = false; + let x16: Half = x.convert(&mut loses_info).value; + assert!(!loses_info); + let y16: Half = y.convert(&mut loses_info).value; + assert!(!loses_info); + + // Add + let z = (x + y).value; + let z16 = (x16 + y16).value; + assert!(z.bitwise_eq(z16.convert(&mut loses_info).value), "i={i}, j={j}"); + + // Subtract + let z = (x - y).value; + let z16 = (x16 - y16).value; + assert!(z.bitwise_eq(z16.convert(&mut loses_info).value), "i={i}, j={j}"); + + // Multiply + let z = (x * y).value; + let z16 = (x16 * y16).value; + assert!(z.bitwise_eq(z16.convert(&mut loses_info).value), "i={i}, j={j}"); + + // Divide + let z = (x / y).value; + let z16 = (x16 / y16).value; + assert!(z.bitwise_eq(z16.convert(&mut loses_info).value), "i={i}, j={j}"); + + // Mod + let z = (x % y).value; + let z16 = (x16 % y16).value; + assert!(z.bitwise_eq(z16.convert(&mut loses_info).value), "i={i}, j={j}"); + + // Remainder + let z = x.ieee_rem(y).value; + let z16 = x16.ieee_rem(y16).value; + assert!(z.bitwise_eq(z16.convert(&mut loses_info).value), "i={i}, j={j}"); + } + } +} + +#[test] +fn f8_to_string() { + for_each_float_type!(for test::()); + fn test() { + if F::BITS != 8 { + return; + } + + // NOTE(eddyb) this was buggy upstream as it didn't test `F` but `Float8E5M2`, + // https://github.com/llvm/llvm-project/commit/6109e70c72fc5171d25c4467fc3cfe6eb2029f50 + // fixed it upstream so we've effectively backported that commit. + for i in 0..=u8::MAX { + let test = F::from_bits(i.into()); + let str = test.to_string(); + + if test.is_nan() { + assert_eq!(str, "NaN"); + } else { + assert!(test.bitwise_eq(str.parse::().unwrap())); + } + } + } +} + +// HACK(eddyb) C`{FLT,DBL}_TRUE_MIN` / C++ `std::numeric_limits::denorm_min` +// equivalents, for the two tests below, as Rust seems to lack anything like them, +// but their bit-patterns are thankfuly trivial, with the main caveat that they +// can't be `const` (subnormals and NaNs are banned from CTFE `{to,from}_bits`). +fn f64_smallest_subnormal() -> f64 { + f64::from_bits(1) +} +fn f32_smallest_subnormal() -> f32 { + f32::from_bits(1) +} + +#[test] +fn double_to_f64() { + let d_pos_zero = Double::from_f64(0.0); + assert!(Double::from_f64(d_pos_zero.to_f64()).is_pos_zero()); + let d_neg_zero = Double::from_f64(-0.0); + assert!(Double::from_f64(d_neg_zero.to_f64()).is_neg_zero()); + + let d_one = Double::from_f64(1.0); + assert_eq!(1.0, d_one.to_f64()); + let d_pos_largest = Double::largest(); + assert_eq!(f64::MAX, d_pos_largest.to_f64()); + let d_neg_largest = -Double::largest(); + assert_eq!(-f64::MAX, d_neg_largest.to_f64()); + let d_pos_smallest = Double::smallest_normalized(); + assert_eq!(f64::MIN_POSITIVE, d_pos_smallest.to_f64()); + let d_neg_smallest = -Double::smallest_normalized(); + assert_eq!(-f64::MIN_POSITIVE, d_neg_smallest.to_f64()); + + let d_smallest_denorm = Double::SMALLEST; + assert_eq!(f64_smallest_subnormal(), d_smallest_denorm.to_f64()); + let d_largest_denorm = "0x0.FFFFFFFFFFFFFp-1022".parse::().unwrap(); + assert_eq!(/*0x0.FFFFFFFFFFFFFp-1022*/ 2.225073858507201e-308, d_largest_denorm.to_f64()); + + let d_pos_inf = Double::INFINITY; + assert_eq!(f64::INFINITY, d_pos_inf.to_f64()); + let d_neg_inf = -Double::INFINITY; + assert_eq!(-f64::INFINITY, d_neg_inf.to_f64()); + let d_qnan = Double::qnan(None); + assert!(d_qnan.to_f64().is_nan()); +} + +#[test] +fn single_to_f64() { + let f_pos_zero = Single::from_f32(0.0); + assert!(Double::from_f64(f_pos_zero.to_f64()).is_pos_zero()); + let f_neg_zero = Single::from_f32(-0.0); + assert!(Double::from_f64(f_neg_zero.to_f64()).is_neg_zero()); + + let f_one = Single::from_f32(1.0); + assert_eq!(1.0, f_one.to_f64()); + let f_pos_largest = Single::largest(); + assert_eq!(f32::MAX as f64, f_pos_largest.to_f64()); + let f_neg_largest = -Single::largest(); + assert_eq!(-f32::MAX as f64, f_neg_largest.to_f64()); + let f_pos_smallest = Single::smallest_normalized(); + assert_eq!(f32::MIN_POSITIVE as f64, f_pos_smallest.to_f64()); + let f_neg_smallest = -Single::smallest_normalized(); + assert_eq!(-f32::MIN_POSITIVE as f64, f_neg_smallest.to_f64()); + + let f_smallest_denorm = Single::SMALLEST; + assert_eq!(f32_smallest_subnormal() as f64, f_smallest_denorm.to_f64()); + let f_largest_denorm = "0x0.FFFFFEp-126".parse::().unwrap(); + assert_eq!(/*0x0.FFFFFEp-126*/ 1.1754942106924411e-38, f_largest_denorm.to_f64()); + + let f_pos_inf = Single::INFINITY; + assert_eq!(f64::INFINITY, f_pos_inf.to_f64()); + let f_neg_inf = -Single::INFINITY; + assert_eq!(-f64::INFINITY, f_neg_inf.to_f64()); + let f_qnan = Single::qnan(None); + assert!(f_qnan.to_f64().is_nan()); + + let h_pos_zero = Half::ZERO; + assert!(Double::from_f64(h_pos_zero.to_f64()).is_pos_zero()); + let h_neg_zero = -Half::ZERO; + assert!(Double::from_f64(h_neg_zero.to_f64()).is_neg_zero()); +} + +#[test] +fn half_to_f64() { + let h_one = "1.0".parse::().unwrap(); + assert_eq!(1.0, h_one.to_f64()); + let h_pos_largest = Half::largest(); + assert_eq!(65504.0, h_pos_largest.to_f64()); + let h_neg_largest = -Half::largest(); + assert_eq!(-65504.0, h_neg_largest.to_f64()); + let h_pos_smallest = Half::smallest_normalized(); + assert_eq!(/*0x1.p-14*/ 6.103515625e-05, h_pos_smallest.to_f64()); + let h_neg_smallest = -Half::smallest_normalized(); + assert_eq!(/*-0x1.p-14*/ -6.103515625e-05, h_neg_smallest.to_f64()); + + let h_smallest_denorm = Half::SMALLEST; + assert_eq!(/*0x1.p-24*/ 5.960464477539063e-08, h_smallest_denorm.to_f64()); + let h_largest_denorm = "0x1.FFCp-14".parse::().unwrap(); + assert_eq!(/*0x1.FFCp-14*/ 0.00012201070785522461, h_largest_denorm.to_f64()); + + let h_pos_inf = Half::INFINITY; + assert_eq!(f64::INFINITY, h_pos_inf.to_f64()); + let h_neg_inf = -Half::INFINITY; + assert_eq!(-f64::INFINITY, h_neg_inf.to_f64()); + let h_qnan = Half::qnan(None); + assert!(h_qnan.to_f64().is_nan()); +} + +#[test] +fn bfloat_to_f64() { + let b_pos_zero = Half::ZERO; + assert!(Double::from_f64(b_pos_zero.to_f64()).is_pos_zero()); + let b_neg_zero = -Half::ZERO; + assert!(Double::from_f64(b_neg_zero.to_f64()).is_neg_zero()); + + let b_one = "1.0".parse::().unwrap(); + assert_eq!(1.0, b_one.to_f64()); + let b_pos_largest = BFloat::largest(); + assert_eq!(/*0x1.FEp127*/ 3.3895313892515355e+38, b_pos_largest.to_f64()); + let b_neg_largest = -BFloat::largest(); + assert_eq!(/*-0x1.FEp127*/ -3.3895313892515355e+38, b_neg_largest.to_f64()); + let b_pos_smallest = BFloat::smallest_normalized(); + assert_eq!(/*0x1.p-126*/ 1.1754943508222875e-38, b_pos_smallest.to_f64()); + let b_neg_smallest = -BFloat::smallest_normalized(); + assert_eq!(/*-0x1.p-126*/ -1.1754943508222875e-38, b_neg_smallest.to_f64()); + + let b_smallest_denorm = BFloat::SMALLEST; + assert_eq!(/*0x1.p-133*/ 9.183549615799121e-41, b_smallest_denorm.to_f64()); + let b_largest_denorm = "0x1.FCp-127".parse::().unwrap(); + assert_eq!(/*0x1.FCp-127*/ 1.1663108012064884e-38, b_largest_denorm.to_f64()); + + let b_pos_inf = BFloat::INFINITY; + assert_eq!(f64::INFINITY, b_pos_inf.to_f64()); + let b_neg_inf = -BFloat::INFINITY; + assert_eq!(-f64::INFINITY, b_neg_inf.to_f64()); + let b_qnan = BFloat::qnan(None); + assert!(b_qnan.to_f64().is_nan()); +} + +#[test] +fn float8e5m2_to_f64() { + let one = "1.0".parse::().unwrap(); + assert_eq!(1.0, one.to_f64()); + let two = "2.0".parse::().unwrap(); + assert_eq!(2.0, two.to_f64()); + let pos_largest = Float8E5M2::largest(); + assert_eq!(5.734400e+04, pos_largest.to_f64()); + let neg_largest = -Float8E5M2::largest(); + assert_eq!(-5.734400e+04, neg_largest.to_f64()); + let pos_smallest = Float8E5M2::smallest_normalized(); + assert_eq!(/* 0x1.p-14 */ 6.103515625e-05, pos_smallest.to_f64()); + let neg_smallest = -Float8E5M2::smallest_normalized(); + assert_eq!(/* -0x1.p-14 */ -6.103515625e-05, neg_smallest.to_f64()); + + let smallest_denorm = Float8E5M2::SMALLEST; + assert!(smallest_denorm.is_denormal()); + assert_eq!(/* 0x1p-16 */ 0.0000152587890625, smallest_denorm.to_f64()); + + let pos_inf = Float8E5M2::INFINITY; + assert_eq!(f64::INFINITY, pos_inf.to_f64()); + let neg_inf = -Float8E5M2::INFINITY; + assert_eq!(-f64::INFINITY, neg_inf.to_f64()); + let qnan = Float8E5M2::qnan(None); + assert!(qnan.to_f64().is_nan()); +} + +#[test] +fn float8e4m3fn_to_f64() { + let one = "1.0".parse::().unwrap(); + assert_eq!(1.0, one.to_f64()); + let two = "2.0".parse::().unwrap(); + assert_eq!(2.0, two.to_f64()); + let pos_largest = Float8E4M3FN::largest(); + assert_eq!(448., pos_largest.to_f64()); + let neg_largest = -Float8E4M3FN::largest(); + assert_eq!(-448., neg_largest.to_f64()); + let pos_smallest = Float8E4M3FN::smallest_normalized(); + assert_eq!(/* 0x1.p-6 */ 0.015625, pos_smallest.to_f64()); + let neg_smallest = -Float8E4M3FN::smallest_normalized(); + assert_eq!(/* -0x1.p-6 */ -0.015625, neg_smallest.to_f64()); + + let smallest_denorm = Float8E4M3FN::SMALLEST; + assert!(smallest_denorm.is_denormal()); + assert_eq!(/* 0x1p-9 */ 0.001953125, smallest_denorm.to_f64()); + + let qnan = Float8E4M3FN::qnan(None); + assert!(qnan.to_f64().is_nan()); +} + +#[test] +fn single_to_f32() { + let f_pos_zero = Single::from_f32(0.0); + assert!(Single::from_f32(f_pos_zero.to_f32()).is_pos_zero()); + let f_neg_zero = Single::from_f32(-0.0); + assert!(Single::from_f32(f_neg_zero.to_f32()).is_neg_zero()); + + let f_one = Single::from_f32(1.0); + assert_eq!(1.0, f_one.to_f32()); + let f_pos_largest = Single::largest(); + assert_eq!(f32::MAX, f_pos_largest.to_f32()); + let f_neg_largest = -Single::largest(); + assert_eq!(-f32::MAX, f_neg_largest.to_f32()); + let f_pos_smallest = Single::smallest_normalized(); + assert_eq!(f32::MIN_POSITIVE, f_pos_smallest.to_f32()); + let f_neg_smallest = -Single::smallest_normalized(); + assert_eq!(-f32::MIN_POSITIVE, f_neg_smallest.to_f32()); + + let f_smallest_denorm = Single::SMALLEST; + assert_eq!(f32_smallest_subnormal(), f_smallest_denorm.to_f32()); + let f_largest_denorm = "0x1.FFFFFEp-126".parse::().unwrap(); + assert_eq!(/*0x1.FFFFFEp-126*/ 2.3509885615147286e-38, f_largest_denorm.to_f32()); + + let f_pos_inf = Single::INFINITY; + assert_eq!(f32::INFINITY, f_pos_inf.to_f32()); + let f_neg_inf = -Single::INFINITY; + assert_eq!(-f32::INFINITY, f_neg_inf.to_f32()); + let f_qnan = Single::qnan(None); + assert!(f_qnan.to_f32().is_nan()); +} + +#[test] +fn half_to_f32() { + let h_pos_zero = Half::ZERO; + assert!(Single::from_f32(h_pos_zero.to_f32()).is_pos_zero()); + let h_neg_zero = -Half::ZERO; + assert!(Single::from_f32(h_neg_zero.to_f32()).is_neg_zero()); + + let h_one = "1.0".parse::().unwrap(); + assert_eq!(1.0, h_one.to_f32()); + let h_pos_largest = Half::largest(); + assert_eq!(/*0x1.FFCp15*/ 65504.0, h_pos_largest.to_f32()); + let h_neg_largest = -Half::largest(); + assert_eq!(/*-0x1.FFCp15*/ -65504.0, h_neg_largest.to_f32()); + let h_pos_smallest = Half::smallest_normalized(); + assert_eq!(/*0x1.p-14*/ 6.103515625e-05, h_pos_smallest.to_f32()); + let h_neg_smallest = -Half::smallest_normalized(); + assert_eq!(/*-0x1.p-14*/ -6.103515625e-05, h_neg_smallest.to_f32()); + + let h_smallest_denorm = Half::SMALLEST; + assert_eq!(/*0x1.p-24*/ 5.960464477539063e-08, h_smallest_denorm.to_f32()); + let h_largest_denorm = "0x1.FFCp-14".parse::().unwrap(); + assert_eq!(/*0x1.FFCp-14*/ 0.00012201070785522461, h_largest_denorm.to_f32()); + + let h_pos_inf = Half::INFINITY; + assert_eq!(f32::INFINITY, h_pos_inf.to_f32()); + let h_neg_inf = -Half::INFINITY; + assert_eq!(-f32::INFINITY, h_neg_inf.to_f32()); + let h_qnan = Half::qnan(None); + assert!(h_qnan.to_f32().is_nan()); +} + +#[test] +fn bfloat_to_f32() { + let b_pos_zero = BFloat::ZERO; + assert!(Single::from_f32(b_pos_zero.to_f32()).is_pos_zero()); + let b_neg_zero = -BFloat::ZERO; + assert!(Single::from_f32(b_neg_zero.to_f32()).is_neg_zero()); + + let b_one = "1.0".parse::().unwrap(); + assert_eq!(1.0, b_one.to_f32()); + let b_pos_largest = BFloat::largest(); + assert_eq!(/*0x1.FEp127*/ 3.3895313892515355e+38, b_pos_largest.to_f32()); + let b_neg_largest = -BFloat::largest(); + assert_eq!(/*-0x1.FEp127*/ -3.3895313892515355e+38, b_neg_largest.to_f32()); + let b_pos_smallest = BFloat::smallest_normalized(); + assert_eq!(/*0x1.p-126*/ 1.1754943508222875e-38, b_pos_smallest.to_f32()); + let b_neg_smallest = -BFloat::smallest_normalized(); + assert_eq!(/*-0x1.p-126*/ -1.1754943508222875e-38, b_neg_smallest.to_f32()); + + let b_smallest_denorm = BFloat::SMALLEST; + assert_eq!(/*0x1.p-133*/ 9.183549615799121e-41, b_smallest_denorm.to_f32()); + let b_largest_denorm = "0x1.FCp-127".parse::().unwrap(); + assert_eq!(/*0x1.FCp-127*/ 1.1663108012064884e-38, b_largest_denorm.to_f32()); + + let b_pos_inf = BFloat::INFINITY; + assert_eq!(f32::INFINITY, b_pos_inf.to_f32()); + let b_neg_inf = -BFloat::INFINITY; + assert_eq!(-f32::INFINITY, b_neg_inf.to_f32()); + let b_qnan = BFloat::qnan(None); + assert!(b_qnan.to_f32().is_nan()); +} + +#[test] +fn float8e5m2_to_f32() { + let pos_zero = Float8E5M2::ZERO; + assert!(Single::from_f32(pos_zero.to_f32()).is_pos_zero()); + let neg_zero = -Float8E5M2::ZERO; + assert!(Single::from_f32(neg_zero.to_f32()).is_neg_zero()); + + let one = "1.0".parse::().unwrap(); + assert_eq!(1.0, one.to_f32()); + let two = "2.0".parse::().unwrap(); + assert_eq!(2.0, two.to_f32()); + + let pos_largest = Float8E5M2::largest(); + assert_eq!(5.734400e+04, pos_largest.to_f32()); + let neg_largest = -Float8E5M2::largest(); + assert_eq!(-5.734400e+04, neg_largest.to_f32()); + let pos_smallest = Float8E5M2::smallest_normalized(); + assert_eq!(/* 0x1.p-14 */ 6.103515625e-05, pos_smallest.to_f32()); + let neg_smallest = -Float8E5M2::smallest_normalized(); + assert_eq!(/* -0x1.p-14 */ -6.103515625e-05, neg_smallest.to_f32()); + + let smallest_denorm = Float8E5M2::SMALLEST; + assert!(smallest_denorm.is_denormal()); + assert_eq!(/* 0x1.p-16 */ 0.0000152587890625, smallest_denorm.to_f32()); + + let pos_inf = Float8E5M2::INFINITY; + assert_eq!(f32::INFINITY, pos_inf.to_f32()); + let neg_inf = -Float8E5M2::INFINITY; + assert_eq!(-f32::INFINITY, neg_inf.to_f32()); + let qnan = Float8E5M2::qnan(None); + assert!(qnan.to_f32().is_nan()); +} + +#[test] +fn float8e4m3fn_to_f32() { + let pos_zero = Float8E4M3FN::ZERO; + assert!(Single::from_f32(pos_zero.to_f32()).is_pos_zero()); + let neg_zero = -Float8E4M3FN::ZERO; + assert!(Single::from_f32(neg_zero.to_f32()).is_neg_zero()); + + let one = "1.0".parse::().unwrap(); + assert_eq!(1.0, one.to_f32()); + let two = "2.0".parse::().unwrap(); + assert_eq!(2.0, two.to_f32()); + + let pos_largest = Float8E4M3FN::largest(); + assert_eq!(448., pos_largest.to_f32()); + let neg_largest = -Float8E4M3FN::largest(); + assert_eq!(-448.0, neg_largest.to_f32()); + let pos_smallest = Float8E4M3FN::smallest_normalized(); + assert_eq!(/* 0x1.p-6 */ 0.015625, pos_smallest.to_f32()); + let neg_smallest = -Float8E4M3FN::smallest_normalized(); + assert_eq!(/* -0x1.p-6 */ -0.015625, neg_smallest.to_f32()); + + let smallest_denorm = Float8E4M3FN::SMALLEST; + assert!(smallest_denorm.is_denormal()); + assert_eq!(/* 0x1.p-9 */ 0.001953125, smallest_denorm.to_f32()); + + let qnan = Float8E4M3FN::qnan(None); + assert!(qnan.to_f32().is_nan()); }