diff --git a/compiler/rustc_codegen_cranelift/build_system/utils.rs b/compiler/rustc_codegen_cranelift/build_system/utils.rs index f239976845964..d9807155a3d5d 100644 --- a/compiler/rustc_codegen_cranelift/build_system/utils.rs +++ b/compiler/rustc_codegen_cranelift/build_system/utils.rs @@ -213,11 +213,13 @@ pub(crate) fn copy_dir_recursively(from: &Path, to: &Path) { if filename == "." || filename == ".." { continue; } + let src = from.join(&filename); + let dst = to.join(&filename); if entry.metadata().unwrap().is_dir() { - fs::create_dir(to.join(&filename)).unwrap(); - copy_dir_recursively(&from.join(&filename), &to.join(&filename)); + fs::create_dir(&dst).unwrap_or_else(|e| panic!("failed to create {dst:?}: {e}")); + copy_dir_recursively(&src, &dst); } else { - fs::copy(from.join(&filename), to.join(&filename)).unwrap(); + fs::copy(&src, &dst).unwrap_or_else(|e| panic!("failed to copy {src:?}->{dst:?}: {e}")); } } } diff --git a/library/compiler-builtins/.editorconfig b/library/compiler-builtins/.editorconfig new file mode 100644 index 0000000000000..f0735cedfbd6b --- /dev/null +++ b/library/compiler-builtins/.editorconfig @@ -0,0 +1,16 @@ +# EditorConfig helps developers define and maintain consistent +# coding styles between different editors and IDEs +# editorconfig.org + +root = true + +[*] +end_of_line = lf +charset = utf-8 +trim_trailing_whitespace = true +insert_final_newline = true +indent_style = space +indent_size = 4 + +[*.yml] +indent_size = 2 diff --git a/library/compiler-builtins/.git-blame-ignore-revs b/library/compiler-builtins/.git-blame-ignore-revs new file mode 100644 index 0000000000000..2ede10da53d74 --- /dev/null +++ b/library/compiler-builtins/.git-blame-ignore-revs @@ -0,0 +1,6 @@ +# Use `git config blame.ignorerevsfile .git-blame-ignore-revs` to make +# `git blame` ignore the following commits. + +# Reformat with a new `.rustfmt.toml` +# In rust-lang/libm this was 5882cabb83c30bf7c36023f9a55a80583636b0e8 +4bb07a6275cc628ef81c65ac971dc6479963322f diff --git a/library/compiler-builtins/.github/workflows/main.yaml b/library/compiler-builtins/.github/workflows/main.yaml new file mode 100644 index 0000000000000..d13dd6b0f6462 --- /dev/null +++ b/library/compiler-builtins/.github/workflows/main.yaml @@ -0,0 +1,344 @@ +name: CI +on: + push: { branches: [master] } + pull_request: + +concurrency: + # Make sure that new pushes cancel running jobs + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + +env: + CARGO_TERM_COLOR: always + RUSTDOCFLAGS: -Dwarnings + RUSTFLAGS: -Dwarnings + RUST_BACKTRACE: full + BENCHMARK_RUSTC: nightly-2025-01-16 # Pin the toolchain for reproducable results + +jobs: + # Determine which tests should be run based on changed files. + calculate_vars: + name: Calculate workflow variables + runs-on: ubuntu-24.04 + timeout-minutes: 10 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + PR_NUMBER: ${{ github.event.pull_request.number }} + outputs: + extensive_matrix: ${{ steps.script.outputs.extensive_matrix }} + may_skip_libm_ci: ${{ steps.script.outputs.may_skip_libm_ci }} + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 500 + - name: Fetch pull request ref + run: git fetch origin "$GITHUB_REF:$GITHUB_REF" + if: github.event_name == 'pull_request' + - run: python3 ci/ci-util.py generate-matrix >> "$GITHUB_OUTPUT" + id: script + + test: + name: Build and test + timeout-minutes: 60 + strategy: + fail-fast: false + matrix: + include: + - target: aarch64-apple-darwin + os: macos-15 + - target: aarch64-unknown-linux-gnu + os: ubuntu-24.04-arm + - target: aarch64-pc-windows-msvc + os: windows-2025 + test_verbatim: 1 + build_only: 1 + - target: arm-unknown-linux-gnueabi + os: ubuntu-24.04 + - target: arm-unknown-linux-gnueabihf + os: ubuntu-24.04 + - target: armv7-unknown-linux-gnueabihf + os: ubuntu-24.04 + - target: i586-unknown-linux-gnu + os: ubuntu-24.04 + - target: i686-unknown-linux-gnu + os: ubuntu-24.04 + - target: loongarch64-unknown-linux-gnu + os: ubuntu-24.04 + - target: powerpc-unknown-linux-gnu + os: ubuntu-24.04 + - target: powerpc64-unknown-linux-gnu + os: ubuntu-24.04 + - target: powerpc64le-unknown-linux-gnu + os: ubuntu-24.04 + - target: riscv64gc-unknown-linux-gnu + os: ubuntu-24.04 + - target: thumbv6m-none-eabi + os: ubuntu-24.04 + - target: thumbv7em-none-eabi + os: ubuntu-24.04 + - target: thumbv7em-none-eabihf + os: ubuntu-24.04 + - target: thumbv7m-none-eabi + os: ubuntu-24.04 + - target: wasm32-unknown-unknown + os: ubuntu-24.04 + - target: x86_64-unknown-linux-gnu + os: ubuntu-24.04 + - target: x86_64-apple-darwin + os: macos-13 + - target: i686-pc-windows-msvc + os: windows-2025 + test_verbatim: 1 + - target: x86_64-pc-windows-msvc + os: windows-2025 + test_verbatim: 1 + - target: i686-pc-windows-gnu + os: windows-2025 + channel: nightly-i686-gnu + - target: x86_64-pc-windows-gnu + os: windows-2025 + channel: nightly-x86_64-gnu + runs-on: ${{ matrix.os }} + needs: [calculate_vars] + env: + BUILD_ONLY: ${{ matrix.build_only }} + TEST_VERBATIM: ${{ matrix.test_verbatim }} + MAY_SKIP_LIBM_CI: ${{ needs.calculate_vars.outputs.may_skip_libm_ci }} + steps: + - name: Print runner information + run: uname -a + - uses: actions/checkout@v4 + with: + submodules: true + - name: Install Rust (rustup) + shell: bash + run: | + channel="nightly" + # Account for channels that have required components (MinGW) + [ -n "${{ matrix.channel }}" ] && channel="${{ matrix.channel }}" + rustup update "$channel" --no-self-update + rustup default "$channel" + rustup target add "${{ matrix.target }}" + rustup component add llvm-tools-preview + - uses: taiki-e/install-action@nextest + - uses: Swatinem/rust-cache@v2 + with: + key: ${{ matrix.target }} + - name: Cache Docker layers + uses: actions/cache@v4 + if: matrix.os == 'ubuntu-24.04' + with: + path: /tmp/.buildx-cache + key: ${{ matrix.target }}-buildx-${{ github.sha }} + restore-keys: ${{ matrix.target }}-buildx- + # Configure buildx to use Docker layer caching + - uses: docker/setup-buildx-action@v3 + if: matrix.os == 'ubuntu-24.04' + + - name: Cache compiler-rt + id: cache-compiler-rt + uses: actions/cache@v4 + with: + path: compiler-rt + key: ${{ runner.os }}-compiler-rt-${{ hashFiles('ci/download-compiler-rt.sh') }} + - name: Download compiler-rt reference sources + if: steps.cache-compiler-rt.outputs.cache-hit != 'true' + run: ./ci/download-compiler-rt.sh + shell: bash + - run: echo "RUST_COMPILER_RT_ROOT=$(realpath ./compiler-rt)" >> "$GITHUB_ENV" + shell: bash + + - name: Verify API list + if: matrix.os == 'ubuntu-24.04' + run: python3 etc/update-api-list.py --check + + # Non-linux tests just use our raw script + - name: Run locally + if: matrix.os != 'ubuntu-24.04' + shell: bash + run: ./ci/run.sh ${{ matrix.target }} + + # Otherwise we use our docker containers to run builds + - name: Run in Docker + if: matrix.os == 'ubuntu-24.04' + run: ./ci/run-docker.sh ${{ matrix.target }} + + - name: Print test logs if available + if: always() + run: if [ -f "target/test-log.txt" ]; then cat target/test-log.txt; fi + shell: bash + + # Workaround to keep Docker cache smaller + # https://github.com/docker/build-push-action/issues/252 + # https://github.com/moby/buildkit/issues/1896 + - name: Move Docker cache + if: matrix.os == 'ubuntu-24.04' + run: | + rm -rf /tmp/.buildx-cache + mv /tmp/.buildx-cache-new /tmp/.buildx-cache + + clippy: + name: Clippy + runs-on: ubuntu-24.04 + timeout-minutes: 10 + steps: + - uses: actions/checkout@v4 + with: + submodules: true + # Unlike rustfmt, stable clippy does not work on code with nightly features. + - name: Install nightly `clippy` + run: | + rustup set profile minimal + rustup default nightly + rustup component add clippy + - uses: Swatinem/rust-cache@v2 + - run: cargo clippy --workspace --all-targets + + benchmarks: + name: Benchmarks + runs-on: ubuntu-24.04 + timeout-minutes: 20 + steps: + - uses: actions/checkout@master + with: + submodules: true + - uses: taiki-e/install-action@cargo-binstall + + - name: Set up dependencies + run: | + sudo apt-get update + sudo apt-get install -y valgrind gdb libc6-dbg # Needed for iai-callgrind + rustup update "$BENCHMARK_RUSTC" --no-self-update + rustup default "$BENCHMARK_RUSTC" + # Install the version of iai-callgrind-runner that is specified in Cargo.toml + iai_version="$(cargo metadata --format-version=1 --features icount | + jq -r '.packages[] | select(.name == "iai-callgrind").version')" + cargo binstall -y iai-callgrind-runner --version "$iai_version" + sudo apt-get install valgrind + - uses: Swatinem/rust-cache@v2 + + - name: Run icount benchmarks + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + PR_NUMBER: ${{ github.event.pull_request.number }} + run: ./ci/bench-icount.sh + + - name: Upload the benchmark baseline + uses: actions/upload-artifact@v4 + with: + name: ${{ env.BASELINE_NAME }} + path: ${{ env.BASELINE_NAME }}.tar.xz + + - name: Run wall time benchmarks + run: | + # Always use the same seed for benchmarks. Ideally we should switch to a + # non-random generator. + export LIBM_SEED=benchesbenchesbenchesbencheswoo! + cargo bench --package libm-test \ + --no-default-features \ + --features short-benchmarks,build-musl,libm/force-soft-floats + + - name: Print test logs if available + if: always() + run: if [ -f "target/test-log.txt" ]; then cat target/test-log.txt; fi + shell: bash + + miri: + name: Miri + runs-on: ubuntu-24.04 + timeout-minutes: 10 + steps: + - uses: actions/checkout@v4 + with: + submodules: true + - name: Install Rust (rustup) + run: rustup update nightly --no-self-update && rustup default nightly + shell: bash + - run: rustup component add miri + - run: cargo miri setup + - uses: Swatinem/rust-cache@v2 + - run: ./ci/miri.sh + + msrv: + name: Check libm MSRV + runs-on: ubuntu-24.04 + timeout-minutes: 10 + env: + RUSTFLAGS: # No need to check warnings on old MSRV, unset `-Dwarnings` + steps: + - uses: actions/checkout@master + - name: Install Rust + run: | + msrv="$(perl -ne 'print if s/rust-version\s*=\s*"(.*)"/\1/g' libm/Cargo.toml)" + echo "MSRV: $msrv" + rustup update "$msrv" --no-self-update && rustup default "$msrv" + - uses: Swatinem/rust-cache@v2 + - run: | + # FIXME(msrv): Remove the workspace Cargo.toml so 1.63 cargo doesn't see + # `edition = "2024"` and get spooked. + rm Cargo.toml + cargo build --manifest-path libm/Cargo.toml + + rustfmt: + name: Rustfmt + runs-on: ubuntu-24.04 + timeout-minutes: 10 + steps: + - uses: actions/checkout@v4 + with: + submodules: true + - name: Install stable `rustfmt` + run: rustup set profile minimal && rustup default stable && rustup component add rustfmt + - run: cargo fmt -- --check + + extensive: + name: Extensive tests for ${{ matrix.ty }} + needs: + # Wait on `clippy` so we have some confidence that the crate will build + - clippy + - calculate_vars + runs-on: ubuntu-24.04 + timeout-minutes: 240 # 4 hours + strategy: + matrix: + # Use the output from `calculate_vars` to create the matrix + # FIXME: it would be better to run all jobs (i.e. all types) but mark those that + # didn't change as skipped, rather than completely excluding the job. However, + # this is not currently possible https://github.com/actions/runner/issues/1985. + include: ${{ fromJSON(needs.calculate_vars.outputs.extensive_matrix).extensive_matrix }} + env: + TO_TEST: ${{ matrix.to_test }} + steps: + - uses: actions/checkout@v4 + with: + submodules: true + - name: Install Rust + run: | + rustup update nightly --no-self-update + rustup default nightly + - uses: Swatinem/rust-cache@v2 + - name: Run extensive tests + run: ./ci/run-extensive.sh + - name: Print test logs if available + run: if [ -f "target/test-log.txt" ]; then cat target/test-log.txt; fi + shell: bash + + success: + needs: + - benchmarks + - clippy + - extensive + - miri + - msrv + - rustfmt + - test + runs-on: ubuntu-24.04 + timeout-minutes: 10 + # GitHub branch protection is exceedingly silly and treats "jobs skipped because a dependency + # failed" as success. So we have to do some contortions to ensure the job fails if any of its + # dependencies fails. + if: always() # make sure this is never "skipped" + steps: + # Manually check the status of all dependencies. `if: failure()` does not work. + - name: check if any dependency failed + run: jq --exit-status 'all(.result == "success")' <<< '${{ toJson(needs) }}' diff --git a/library/compiler-builtins/.github/workflows/publish.yaml b/library/compiler-builtins/.github/workflows/publish.yaml new file mode 100644 index 0000000000000..85a33c039d2a1 --- /dev/null +++ b/library/compiler-builtins/.github/workflows/publish.yaml @@ -0,0 +1,25 @@ +name: Release-plz + +permissions: + pull-requests: write + contents: write + +on: + push: { branches: [master] } + +jobs: + release-plz: + name: Release-plz + runs-on: ubuntu-24.04 + steps: + - name: Checkout repository + uses: actions/checkout@v4 + with: + fetch-depth: 0 + - name: Install Rust (rustup) + run: rustup update nightly --no-self-update && rustup default nightly + - name: Run release-plz + uses: MarcoIeni/release-plz-action@v0.5 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + CARGO_REGISTRY_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }} diff --git a/library/compiler-builtins/.gitignore b/library/compiler-builtins/.gitignore new file mode 100644 index 0000000000000..5287a6c72be41 --- /dev/null +++ b/library/compiler-builtins/.gitignore @@ -0,0 +1,16 @@ +# Rust files +Cargo.lock +target + +# Sources for external files +compiler-rt +*.tar.gz + +# Benchmark cache +baseline-* +iai-home + +# Temporary files +*.bk +*.rs.bk +.#* diff --git a/library/compiler-builtins/.gitmodules b/library/compiler-builtins/.gitmodules new file mode 100644 index 0000000000000..792ed9ab21f06 --- /dev/null +++ b/library/compiler-builtins/.gitmodules @@ -0,0 +1,4 @@ +[submodule "crates/musl-math-sys/musl"] + path = crates/musl-math-sys/musl + url = https://git.musl-libc.org/git/musl + shallow = true diff --git a/library/compiler-builtins/.release-plz.toml b/library/compiler-builtins/.release-plz.toml new file mode 100644 index 0000000000000..8023ade9bfd2f --- /dev/null +++ b/library/compiler-builtins/.release-plz.toml @@ -0,0 +1,13 @@ +[workspace] +# As part of the release process, we delete `libm/Cargo.toml`. Since +# this is only run in CI, we shouldn't need to worry about it. +allow_dirty = true +publish_allow_dirty = true + +[[package]] +name = "compiler_builtins" +semver_check = false +changelog_include = ["libm"] # libm is included as part of builtins + +[[package]] +name = "libm" diff --git a/library/compiler-builtins/.rustfmt.toml b/library/compiler-builtins/.rustfmt.toml new file mode 100644 index 0000000000000..79ac399c1b620 --- /dev/null +++ b/library/compiler-builtins/.rustfmt.toml @@ -0,0 +1,4 @@ +# This matches rustc +style_edition = "2024" +group_imports = "StdExternalCrate" +imports_granularity = "Module" diff --git a/library/compiler-builtins/CONTRIBUTING.md b/library/compiler-builtins/CONTRIBUTING.md new file mode 100644 index 0000000000000..9f67cfc31571d --- /dev/null +++ b/library/compiler-builtins/CONTRIBUTING.md @@ -0,0 +1,167 @@ +# How to contribute + +## compiler-builtins + +1. From the [pending list](compiler-builtins/README.md#progress), pick one or + more intrinsics. +2. Port the version from [`compiler-rt`] and, if applicable, their + [tests][rt-tests]. Note that this crate has generic implementations for a lot + of routines, which may be usable without porting the entire implementation. +3. Add a test to `builtins-test`, comparing the behavior of the ported + intrinsic(s) with their implementation on the testing host. +4. Add the intrinsic to `builtins-test-intrinsics/src/main.rs` to verify it can + be linked on all targets. +5. Send a Pull Request (PR) :tada:. + +[`compiler-rt`]: https://github.com/llvm/llvm-project/tree/b6820c35c59a4da3e59c11f657093ffbd79ae1db/compiler-rt/lib/builtins +[rt-tests]: https://github.com/llvm/llvm-project/tree/b6820c35c59a4da3e59c11f657093ffbd79ae1db/compiler-rt/test/builtins + +## Porting Reminders + +1. [Rust][prec-rust] and [C][prec-c] have slightly different operator + precedence. C evaluates comparisons (`== !=`) before bitwise operations + (`& | ^`), while Rust evaluates the other way. +2. C assumes wrapping operations everywhere. Rust panics on overflow when in + debug mode. Consider using the [Wrapping][wrap-ty] type or the explicit + [wrapping_*][wrap-fn] functions where applicable. +3. Note [C implicit casts][casts], especially integer promotion. Rust is much + more explicit about casting, so be sure that any cast which affects the + output is ported to the Rust implementation. +4. Rust has [many functions][i32] for integer or floating point manipulation in + the standard library. Consider using one of these functions rather than + porting a new one. + +[prec-rust]: https://doc.rust-lang.org/reference/expressions.html#expression-precedence +[prec-c]: http://en.cppreference.com/w/c/language/operator_precedence +[wrap-ty]: https://doc.rust-lang.org/core/num/struct.Wrapping.html +[wrap-fn]: https://doc.rust-lang.org/std/primitive.i32.html#method.wrapping_add +[casts]: http://en.cppreference.com/w/cpp/language/implicit_conversion +[i32]: https://doc.rust-lang.org/std/primitive.i32.html + +## Tips and tricks + +- _IMPORTANT_ The code in this crate will end up being used in the `core` crate + so it can **not** have any external dependencies (other than a subset of + `core` itself). +- Only use relative imports within the `math` directory / module, e.g. + `use self::fabs::fabs` or `use super::k_cos`. Absolute imports from core are + OK, e.g. `use core::u64`. +- To reinterpret a float as an integer use the `to_bits` method. The MUSL code + uses the `GET_FLOAT_WORD` macro, or a union, to do this operation. +- To reinterpret an integer as a float use the `f32::from_bits` constructor. The + MUSL code uses the `SET_FLOAT_WORD` macro, or a union, to do this operation. +- You may use other methods from core like `f64::is_nan`, etc. as appropriate. +- Rust does not have hex float literals. This crate provides two `hf16!`, + `hf32!`, `hf64!`, and `hf128!` which convert string literals to floats at + compile time. + + ```rust + assert_eq!(hf32!("0x1.ffep+8").to_bits(), 0x43fff000); + assert_eq!(hf64!("0x1.ffep+8").to_bits(), 0x407ffe0000000000); + ``` + +- Rust code panics on arithmetic overflows when not optimized. You may need to + use the [`Wrapping`] newtype to avoid this problem, or individual methods like + [`wrapping_add`]. + +[`Wrapping`]: https://doc.rust-lang.org/std/num/struct.Wrapping.html +[`wrapping_add`]: https://doc.rust-lang.org/std/primitive.u32.html#method.wrapping_add + +## Testing + +Testing for these crates can be somewhat complex, so feel free to rely on CI. + +The easiest way replicate CI testing is using Docker. This can be done by +running `./ci/run-docker.sh [target]`. If no target is specified, all targets +will be run. + +Tests can also be run without Docker: + +```sh +# Run basic tests +# +# --no-default-features always needs to be passed, an unfortunate limitation +# since the `#![compiler_builtins]` feature is enabled by default. +cargo test --workspace --no-default-features + +# Test with all interesting features +cargo test --workspace --no-default-features \ + --features arch,unstable-float,unstable-intrinsics,mem + +# Run with more detailed tests for libm +cargo test --workspace --no-default-features \ + --features arch,unstable-float,unstable-intrinsics,mem \ + --features build-mpfr,build-musl \ + --profile release-checked +``` + +The multiprecision tests use the [`rug`] crate for bindings to MPFR. MPFR can be +difficult to build on non-Unix systems, refer to [`gmp_mpfr_sys`] for help. + +`build-musl` does not build with MSVC, Wasm, or Thumb. + +[`rug`]: https://docs.rs/rug/latest/rug/ +[`gmp_mpfr_sys`]: https://docs.rs/gmp-mpfr-sys/1.6.4/gmp_mpfr_sys/ + +In order to run all tests, some dependencies may be required: + +```sh +# Allow testing compiler-builtins +./ci/download-compiler-rt.sh + +# Optional, initialize musl for `--features build-musl` +git submodule init +git submodule update + +# `--release` ables more test cases +cargo test --release +``` + +### Extensive tests + +Libm also has tests that are exhaustive (for single-argument `f32` and 1- or 2- +argument `f16`) or extensive (for all other float and argument combinations). +These take quite a long time to run, but are launched in CI when relevant files +are changed. + +Exhaustive tests can be selected by passing an environment variable: + +```sh +LIBM_EXTENSIVE_TESTS=sqrt,sqrtf cargo test --features build-mpfr \ + --test z_extensive \ + --profile release-checked + +# Run all tests for one type +LIBM_EXTENSIVE_TESTS=all_f16 cargo test ... + +# Ensure `f64` tests can run exhaustively. Estimated completion test for a +# single test is 57306 years on my machine so this may be worth skipping. +LIBM_EXTENSIVE_TESTS=all LIBM_EXTENSIVE_ITERATIONS=18446744073709551615 cargo test ... +``` + +## Benchmarking + +Regular walltime benchmarks can be run with `cargo bench`: + +```sh +cargo bench --no-default-features \ + --features arch,unstable-float,unstable-intrinsics,mem \ + --features benchmarking-reports +``` + +There are also benchmarks that check instruction count behind the `icount` +feature. These require [`iai-callgrind-runner`] (via Cargo) and [Valgrind] +to be installed, which means these only run on limited platforms. + +Instruction count benchmarks are run as part of CI to flag performance +regresions. + +```sh +cargo bench --no-default-features \ + --features arch,unstable-float,unstable-intrinsics,mem \ + --features icount \ + --bench icount --bench mem_icount +``` + +[`iai-callgrind-runner`]: https://crates.io/crates/iai-callgrind-runner +[Valgrind]: https://valgrind.org/ diff --git a/library/compiler-builtins/Cargo.toml b/library/compiler-builtins/Cargo.toml new file mode 100644 index 0000000000000..b39ec8a25dac1 --- /dev/null +++ b/library/compiler-builtins/Cargo.toml @@ -0,0 +1,50 @@ +[workspace] +resolver = "2" +members = [ + "builtins-test", + "compiler-builtins", + "crates/libm-macros", + "crates/musl-math-sys", + "crates/panic-handler", + "crates/util", + "libm", + "libm-test", +] + +default-members = [ + "builtins-test", + "compiler-builtins", + "crates/libm-macros", + "libm", + "libm-test", +] + +exclude = [ + # `builtins-test-intrinsics` needs the feature `compiler-builtins` enabled + # and `mangled-names` disabled, which is the opposite of what is needed for + # other tests, so it makes sense to keep it out of the workspace. + "builtins-test-intrinsics", +] + +[profile.release] +panic = "abort" + +[profile.dev] +panic = "abort" + +# Release mode with debug assertions +[profile.release-checked] +inherits = "release" +debug-assertions = true +overflow-checks = true + +# Release with maximum optimizations, which is very slow to build. This is also +# what is needed to check `no-panic`. +[profile.release-opt] +inherits = "release" +codegen-units = 1 +lto = "fat" + +[profile.bench] +# Required for iai-callgrind +debug = true diff --git a/library/compiler-builtins/LICENSE.txt b/library/compiler-builtins/LICENSE.txt new file mode 100644 index 0000000000000..00ae6140bd54e --- /dev/null +++ b/library/compiler-builtins/LICENSE.txt @@ -0,0 +1,275 @@ +The compiler-builtins crate is available for use under both the MIT license +and the Apache-2.0 license with the LLVM exception (MIT AND Apache-2.0 WITH +LLVM-exception). + +The libm crate is available for use under the MIT license. + +As a contributor, you agree that your code may be used under any of the +following: the MIT license, the Apache-2.0 license, or the Apache-2.0 license +with the LLVM exception. In other words, original (non-derivative) work is +licensed under MIT OR Apache-2.0 OR Apache-2.0 WITH LLVM-exception. This is +the default license for all other source in this repository. + +Text of the relevant licenses is provided below: + +------------------------------------------------------------------------------ +MIT License + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +------------------------------------------------------------------------------ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + +---- LLVM Exceptions to the Apache 2.0 License ---- + +As an exception, if, as a result of your compiling your source code, portions +of this Software are embedded into an Object form of such source code, you +may redistribute such embedded portions in such Object form without complying +with the conditions of Sections 4(a), 4(b) and 4(d) of the License. + +In addition, if you combine or link compiled forms of this Software with +software that is licensed under the GPLv2 ("Combined Software") and if a +court of competent jurisdiction determines that the patent provision (Section +3), the indemnity provision (Section 9) or other Section of the License +conflicts with the conditions of the GPLv2, you may retroactively and +prospectively choose to deem waived or otherwise exclude such Section(s) of +the License, but only in their entirety and only with respect to the Combined +Software. +------------------------------------------------------------------------------ + +Portions of this software are derived from third-party works licensed under +terms compatible with the above Apache-2.0 WITH LLVM-exception AND MIT +license: + +* compiler-builtins is derived from LLVM's compiler-rt (https://llvm.org/). + Work derived from compiler-rt prior to 2019-01-19 is usable under the MIT + license, with the following copyright: + + Copyright (c) 2009-2016 by the contributors listed in CREDITS.TXT + + The relevant CREDITS.TXT is located at + https://github.com/llvm/llvm-project/blob/main/compiler-rt/CREDITS.TXT. + +* Work derived from compiler-rt after 2019-01-19 is usable under the + Apache-2.0 license with the LLVM exception. + +* The bundled `math` module is from the libm crate, usable under the MIT + license. For further details and copyrights, see see libm/LICENSE.txt at + https://github.com/rust-lang/compiler-builtins. + +Additionally, some source files may contain comments with specific copyrights +or licenses. diff --git a/library/compiler-builtins/PUBLISHING.md b/library/compiler-builtins/PUBLISHING.md new file mode 100644 index 0000000000000..3df682ab04a4b --- /dev/null +++ b/library/compiler-builtins/PUBLISHING.md @@ -0,0 +1,16 @@ +# Publishing to crates.io + +Publishing `compiler-builtins` to crates.io takes a few steps unfortunately. +It's not great, but it works for now. PRs to improve this process would be +greatly appreciated! + +1. Make sure you've got a clean working tree and it's updated with the latest + changes on `master` +2. Edit `Cargo.toml` to bump the version number +3. Commit this change +4. Run `git tag` to create a tag for this version +5. Delete the `libm/Cargo.toml` file +6. Run `cargo +nightly publish` +7. Push the tag +8. Push the commit +9. Undo changes to `Cargo.toml` and the `libm` submodule diff --git a/library/compiler-builtins/README.md b/library/compiler-builtins/README.md new file mode 100644 index 0000000000000..3130ff7b77d9d --- /dev/null +++ b/library/compiler-builtins/README.md @@ -0,0 +1,27 @@ +# `compiler-builtins` and `libm` + +This repository contains two main crates: + +* `compiler-builtins`: symbols that the compiler expects to be available at + link time +* `libm`: a Rust implementation of C math libraries, used to provide + implementations in `ocre`. + +More details are at [compiler-builtins/README.md](compiler-builtins/README.md) +and [libm/README.md](libm/README.md). + +For instructions on contributing, see [CONTRIBUTING.md](CONTRIBUTING.md). + +## License + +* `libm` may be used under the [MIT License] +* `compiler-builtins` may be used under the [MIT License] and the + [Apache License, Version 2.0] with the LLVM exception. +* All original contributions must be under all of: the MIT license, the + Apache-2.0 license, and the Apache-2.0 license with the LLVM exception. + +More details are in [LICENSE.txt](LICENSE.txt) and +[libm/LICENSE.txt](libm/LICENSE.txt). + +[MIT License]: https://opensource.org/license/mit +[Apache License, Version 2.0]: htps://www.apache.org/licenses/LICENSE-2.0 diff --git a/library/compiler-builtins/builtins-test-intrinsics/Cargo.toml b/library/compiler-builtins/builtins-test-intrinsics/Cargo.toml new file mode 100644 index 0000000000000..6e10628a41b22 --- /dev/null +++ b/library/compiler-builtins/builtins-test-intrinsics/Cargo.toml @@ -0,0 +1,19 @@ +[package] +name = "builtins-test-intrinsics" +version = "0.1.0" +edition = "2021" +publish = false +license = "MIT OR Apache-2.0" + +[dependencies] +compiler_builtins = { path = "../compiler-builtins", features = ["compiler-builtins"]} +panic-handler = { path = "../crates/panic-handler" } + +[features] +c = ["compiler_builtins/c"] + +[profile.release] +panic = "abort" + +[profile.dev] +panic = "abort" diff --git a/library/compiler-builtins/builtins-test-intrinsics/build.rs b/library/compiler-builtins/builtins-test-intrinsics/build.rs new file mode 100644 index 0000000000000..89b126ff2b2ed --- /dev/null +++ b/library/compiler-builtins/builtins-test-intrinsics/build.rs @@ -0,0 +1,11 @@ +mod builtins_configure { + include!("../compiler-builtins/configure.rs"); +} + +fn main() { + println!("cargo::rerun-if-changed=../configure.rs"); + + let target = builtins_configure::Target::from_env(); + builtins_configure::configure_f16_f128(&target); + builtins_configure::configure_aliases(&target); +} diff --git a/library/compiler-builtins/builtins-test-intrinsics/src/main.rs b/library/compiler-builtins/builtins-test-intrinsics/src/main.rs new file mode 100644 index 0000000000000..1fa7b00916f77 --- /dev/null +++ b/library/compiler-builtins/builtins-test-intrinsics/src/main.rs @@ -0,0 +1,697 @@ +// By compiling this file we check that all the intrinsics we care about continue to be provided by +// the `compiler_builtins` crate regardless of the changes we make to it. If we, by mistake, stop +// compiling a C implementation and forget to implement that intrinsic in Rust, this file will fail +// to link due to the missing intrinsic (symbol). + +#![allow(unused_features)] +#![allow(internal_features)] +#![deny(dead_code)] +#![feature(allocator_api)] +#![feature(f128)] +#![feature(f16)] +#![feature(lang_items)] +#![no_std] +#![no_main] + +extern crate panic_handler; + +#[cfg(all(not(thumb), not(windows), not(target_arch = "wasm32")))] +#[link(name = "c")] +extern "C" {} + +// Every function in this module maps will be lowered to an intrinsic by LLVM, if the platform +// doesn't have native support for the operation used in the function. ARM has a naming convention +// convention for its intrinsics that's different from other architectures; that's why some function +// have an additional comment: the function name is the ARM name for the intrinsic and the comment +// in the non-ARM name for the intrinsic. +mod intrinsics { + /* f16 operations */ + + #[cfg(f16_enabled)] + pub fn extendhfsf(x: f16) -> f32 { + x as f32 + } + + #[cfg(f16_enabled)] + pub fn extendhfdf(x: f16) -> f64 { + x as f64 + } + + #[cfg(all( + f16_enabled, + f128_enabled, + not(any(target_arch = "powerpc", target_arch = "powerpc64")) + ))] + pub fn extendhftf(x: f16) -> f128 { + x as f128 + } + + /* f32 operations */ + + #[cfg(f16_enabled)] + pub fn truncsfhf(x: f32) -> f16 { + x as f16 + } + + // extendsfdf2 + pub fn aeabi_f2d(x: f32) -> f64 { + x as f64 + } + + #[cfg(f128_enabled)] + pub fn extendsftf(x: f32) -> f128 { + x as f128 + } + + // fixsfsi + pub fn aeabi_f2iz(x: f32) -> i32 { + x as i32 + } + + // fixsfdi + pub fn aeabi_f2lz(x: f32) -> i64 { + x as i64 + } + + pub fn fixsfti(x: f32) -> i128 { + x as i128 + } + + // fixunssfsi + pub fn aeabi_f2uiz(x: f32) -> u32 { + x as u32 + } + + // fixunssfdi + pub fn aeabi_f2ulz(x: f32) -> u64 { + x as u64 + } + + pub fn fixunssfti(x: f32) -> u128 { + x as u128 + } + + // addsf3 + pub fn aeabi_fadd(a: f32, b: f32) -> f32 { + a + b + } + + // eqsf2 + pub fn aeabi_fcmpeq(a: f32, b: f32) -> bool { + a == b + } + + // gtsf2 + pub fn aeabi_fcmpgt(a: f32, b: f32) -> bool { + a > b + } + + // ltsf2 + pub fn aeabi_fcmplt(a: f32, b: f32) -> bool { + a < b + } + + // divsf3 + pub fn aeabi_fdiv(a: f32, b: f32) -> f32 { + a / b + } + + // mulsf3 + pub fn aeabi_fmul(a: f32, b: f32) -> f32 { + a * b + } + + // subsf3 + pub fn aeabi_fsub(a: f32, b: f32) -> f32 { + a - b + } + + /* f64 operations */ + + // truncdfsf2 + pub fn aeabi_d2f(x: f64) -> f32 { + x as f32 + } + + // fixdfsi + pub fn aeabi_d2i(x: f64) -> i32 { + x as i32 + } + + // fixdfdi + pub fn aeabi_d2l(x: f64) -> i64 { + x as i64 + } + + pub fn fixdfti(x: f64) -> i128 { + x as i128 + } + + // fixunsdfsi + pub fn aeabi_d2uiz(x: f64) -> u32 { + x as u32 + } + + // fixunsdfdi + pub fn aeabi_d2ulz(x: f64) -> u64 { + x as u64 + } + + pub fn fixunsdfti(x: f64) -> u128 { + x as u128 + } + + // adddf3 + pub fn aeabi_dadd(a: f64, b: f64) -> f64 { + a + b + } + + // eqdf2 + pub fn aeabi_dcmpeq(a: f64, b: f64) -> bool { + a == b + } + + // gtdf2 + pub fn aeabi_dcmpgt(a: f64, b: f64) -> bool { + a > b + } + + // ltdf2 + pub fn aeabi_dcmplt(a: f64, b: f64) -> bool { + a < b + } + + // divdf3 + pub fn aeabi_ddiv(a: f64, b: f64) -> f64 { + a / b + } + + // muldf3 + pub fn aeabi_dmul(a: f64, b: f64) -> f64 { + a * b + } + + // subdf3 + pub fn aeabi_dsub(a: f64, b: f64) -> f64 { + a - b + } + + /* f128 operations */ + + #[cfg(all( + f16_enabled, + f128_enabled, + not(any(target_arch = "powerpc", target_arch = "powerpc64")) + ))] + pub fn trunctfhf(x: f128) -> f16 { + x as f16 + } + + #[cfg(f128_enabled)] + pub fn trunctfsf(x: f128) -> f32 { + x as f32 + } + + #[cfg(f128_enabled)] + pub fn trunctfdf(x: f128) -> f64 { + x as f64 + } + + #[cfg(all( + f128_enabled, + not(any(target_arch = "powerpc", target_arch = "powerpc64")) + ))] + pub fn fixtfsi(x: f128) -> i32 { + x as i32 + } + + #[cfg(all( + f128_enabled, + not(any(target_arch = "powerpc", target_arch = "powerpc64")) + ))] + pub fn fixtfdi(x: f128) -> i64 { + x as i64 + } + + #[cfg(all( + f128_enabled, + not(any(target_arch = "powerpc", target_arch = "powerpc64")) + ))] + pub fn fixtfti(x: f128) -> i128 { + x as i128 + } + + #[cfg(all( + f128_enabled, + not(any(target_arch = "powerpc", target_arch = "powerpc64")) + ))] + pub fn fixunstfsi(x: f128) -> u32 { + x as u32 + } + + #[cfg(all( + f128_enabled, + not(any(target_arch = "powerpc", target_arch = "powerpc64")) + ))] + pub fn fixunstfdi(x: f128) -> u64 { + x as u64 + } + + #[cfg(all( + f128_enabled, + not(any(target_arch = "powerpc", target_arch = "powerpc64")) + ))] + pub fn fixunstfti(x: f128) -> u128 { + x as u128 + } + + #[cfg(f128_enabled)] + pub fn addtf(a: f128, b: f128) -> f128 { + a + b + } + + #[cfg(f128_enabled)] + pub fn eqtf(a: f128, b: f128) -> bool { + a == b + } + + #[cfg(f128_enabled)] + pub fn gttf(a: f128, b: f128) -> bool { + a > b + } + + #[cfg(f128_enabled)] + pub fn lttf(a: f128, b: f128) -> bool { + a < b + } + + #[cfg(f128_enabled)] + pub fn multf(a: f128, b: f128) -> f128 { + a * b + } + + #[cfg(f128_enabled)] + pub fn divtf(a: f128, b: f128) -> f128 { + a / b + } + + #[cfg(f128_enabled)] + pub fn subtf(a: f128, b: f128) -> f128 { + a - b + } + + /* i32 operations */ + + // floatsisf + pub fn aeabi_i2f(x: i32) -> f32 { + x as f32 + } + + // floatsidf + pub fn aeabi_i2d(x: i32) -> f64 { + x as f64 + } + + #[cfg(f128_enabled)] + pub fn floatsitf(x: i32) -> f128 { + x as f128 + } + + pub fn aeabi_idiv(a: i32, b: i32) -> i32 { + a.wrapping_div(b) + } + + pub fn aeabi_idivmod(a: i32, b: i32) -> i32 { + a % b + } + + /* i64 operations */ + + // floatdisf + pub fn aeabi_l2f(x: i64) -> f32 { + x as f32 + } + + // floatdidf + pub fn aeabi_l2d(x: i64) -> f64 { + x as f64 + } + + #[cfg(f128_enabled)] + pub fn floatditf(x: i64) -> f128 { + x as f128 + } + + pub fn mulodi4(a: i64, b: i64) -> i64 { + a * b + } + + // divdi3 + pub fn aeabi_ldivmod(a: i64, b: i64) -> i64 { + a / b + } + + pub fn moddi3(a: i64, b: i64) -> i64 { + a % b + } + + // muldi3 + pub fn aeabi_lmul(a: i64, b: i64) -> i64 { + a.wrapping_mul(b) + } + + /* i128 operations */ + + pub fn floattisf(x: i128) -> f32 { + x as f32 + } + + pub fn floattidf(x: i128) -> f64 { + x as f64 + } + + #[cfg(f128_enabled)] + pub fn floattitf(x: i128) -> f128 { + x as f128 + } + + pub fn lshrti3(a: i128, b: usize) -> i128 { + a >> b + } + + pub fn divti3(a: i128, b: i128) -> i128 { + a / b + } + + pub fn modti3(a: i128, b: i128) -> i128 { + a % b + } + + /* u32 operations */ + + // floatunsisf + pub fn aeabi_ui2f(x: u32) -> f32 { + x as f32 + } + + // floatunsidf + pub fn aeabi_ui2d(x: u32) -> f64 { + x as f64 + } + + #[cfg(f128_enabled)] + pub fn floatunsitf(x: u32) -> f128 { + x as f128 + } + + pub fn aeabi_uidiv(a: u32, b: u32) -> u32 { + a / b + } + + pub fn aeabi_uidivmod(a: u32, b: u32) -> u32 { + a % b + } + + /* u64 operations */ + + // floatundisf + pub fn aeabi_ul2f(x: u64) -> f32 { + x as f32 + } + + // floatundidf + pub fn aeabi_ul2d(x: u64) -> f64 { + x as f64 + } + + #[cfg(f128_enabled)] + pub fn floatunditf(x: u64) -> f128 { + x as f128 + } + + // udivdi3 + pub fn aeabi_uldivmod(a: u64, b: u64) -> u64 { + a * b + } + + pub fn umoddi3(a: u64, b: u64) -> u64 { + a % b + } + + /* u128 operations */ + + pub fn floatuntisf(x: u128) -> f32 { + x as f32 + } + + pub fn floatuntidf(x: u128) -> f64 { + x as f64 + } + + #[cfg(f128_enabled)] + pub fn floatuntitf(x: u128) -> f128 { + x as f128 + } + + pub fn muloti4(a: u128, b: u128) -> Option { + a.checked_mul(b) + } + + pub fn multi3(a: u128, b: u128) -> u128 { + a.wrapping_mul(b) + } + + pub fn ashlti3(a: u128, b: usize) -> u128 { + a >> b + } + + pub fn ashrti3(a: u128, b: usize) -> u128 { + a << b + } + + pub fn udivti3(a: u128, b: u128) -> u128 { + a / b + } + + pub fn umodti3(a: u128, b: u128) -> u128 { + a % b + } +} + +fn run() { + use core::hint::black_box as bb; + + use intrinsics::*; + + // FIXME(f16_f128): some PPC f128 <-> int conversion functions have the wrong names + + #[cfg(f128_enabled)] + bb(addtf(bb(2.), bb(2.))); + bb(aeabi_d2f(bb(2.))); + bb(aeabi_d2i(bb(2.))); + bb(aeabi_d2l(bb(2.))); + bb(aeabi_d2uiz(bb(2.))); + bb(aeabi_d2ulz(bb(2.))); + bb(aeabi_dadd(bb(2.), bb(3.))); + bb(aeabi_dcmpeq(bb(2.), bb(3.))); + bb(aeabi_dcmpgt(bb(2.), bb(3.))); + bb(aeabi_dcmplt(bb(2.), bb(3.))); + bb(aeabi_ddiv(bb(2.), bb(3.))); + bb(aeabi_dmul(bb(2.), bb(3.))); + bb(aeabi_dsub(bb(2.), bb(3.))); + bb(aeabi_f2d(bb(2.))); + bb(aeabi_f2iz(bb(2.))); + bb(aeabi_f2lz(bb(2.))); + bb(aeabi_f2uiz(bb(2.))); + bb(aeabi_f2ulz(bb(2.))); + bb(aeabi_fadd(bb(2.), bb(3.))); + bb(aeabi_fcmpeq(bb(2.), bb(3.))); + bb(aeabi_fcmpgt(bb(2.), bb(3.))); + bb(aeabi_fcmplt(bb(2.), bb(3.))); + bb(aeabi_fdiv(bb(2.), bb(3.))); + bb(aeabi_fmul(bb(2.), bb(3.))); + bb(aeabi_fsub(bb(2.), bb(3.))); + bb(aeabi_i2d(bb(2))); + bb(aeabi_i2f(bb(2))); + bb(aeabi_idiv(bb(2), bb(3))); + bb(aeabi_idivmod(bb(2), bb(3))); + bb(aeabi_l2d(bb(2))); + bb(aeabi_l2f(bb(2))); + bb(aeabi_ldivmod(bb(2), bb(3))); + bb(aeabi_lmul(bb(2), bb(3))); + bb(aeabi_ui2d(bb(2))); + bb(aeabi_ui2f(bb(2))); + bb(aeabi_uidiv(bb(2), bb(3))); + bb(aeabi_uidivmod(bb(2), bb(3))); + bb(aeabi_ul2d(bb(2))); + bb(aeabi_ul2f(bb(2))); + bb(aeabi_uldivmod(bb(2), bb(3))); + bb(ashlti3(bb(2), bb(2))); + bb(ashrti3(bb(2), bb(2))); + #[cfg(f128_enabled)] + bb(divtf(bb(2.), bb(2.))); + bb(divti3(bb(2), bb(2))); + #[cfg(f128_enabled)] + bb(eqtf(bb(2.), bb(2.))); + #[cfg(f16_enabled)] + bb(extendhfdf(bb(2.))); + #[cfg(f16_enabled)] + bb(extendhfsf(bb(2.))); + #[cfg(all( + f16_enabled, + f128_enabled, + not(any(target_arch = "powerpc", target_arch = "powerpc64")) + ))] + bb(extendhftf(bb(2.))); + #[cfg(f128_enabled)] + bb(extendsftf(bb(2.))); + bb(fixdfti(bb(2.))); + bb(fixsfti(bb(2.))); + #[cfg(all( + f128_enabled, + not(any(target_arch = "powerpc", target_arch = "powerpc64")) + ))] + bb(fixtfdi(bb(2.))); + #[cfg(all( + f128_enabled, + not(any(target_arch = "powerpc", target_arch = "powerpc64")) + ))] + bb(fixtfsi(bb(2.))); + #[cfg(all( + f128_enabled, + not(any(target_arch = "powerpc", target_arch = "powerpc64")) + ))] + bb(fixtfti(bb(2.))); + bb(fixunsdfti(bb(2.))); + bb(fixunssfti(bb(2.))); + #[cfg(all( + f128_enabled, + not(any(target_arch = "powerpc", target_arch = "powerpc64")) + ))] + bb(fixunstfdi(bb(2.))); + #[cfg(all( + f128_enabled, + not(any(target_arch = "powerpc", target_arch = "powerpc64")) + ))] + bb(fixunstfsi(bb(2.))); + #[cfg(all( + f128_enabled, + not(any(target_arch = "powerpc", target_arch = "powerpc64")) + ))] + bb(fixunstfti(bb(2.))); + #[cfg(f128_enabled)] + bb(floatditf(bb(2))); + #[cfg(f128_enabled)] + bb(floatsitf(bb(2))); + bb(floattidf(bb(2))); + bb(floattisf(bb(2))); + #[cfg(f128_enabled)] + bb(floattitf(bb(2))); + #[cfg(f128_enabled)] + bb(floatunditf(bb(2))); + #[cfg(f128_enabled)] + bb(floatunsitf(bb(2))); + bb(floatuntidf(bb(2))); + bb(floatuntisf(bb(2))); + #[cfg(f128_enabled)] + bb(floatuntitf(bb(2))); + #[cfg(f128_enabled)] + bb(gttf(bb(2.), bb(2.))); + bb(lshrti3(bb(2), bb(2))); + #[cfg(f128_enabled)] + bb(lttf(bb(2.), bb(2.))); + bb(moddi3(bb(2), bb(3))); + bb(modti3(bb(2), bb(2))); + bb(mulodi4(bb(2), bb(3))); + bb(muloti4(bb(2), bb(2))); + #[cfg(f128_enabled)] + bb(multf(bb(2.), bb(2.))); + bb(multi3(bb(2), bb(2))); + #[cfg(f128_enabled)] + bb(subtf(bb(2.), bb(2.))); + #[cfg(f16_enabled)] + bb(truncsfhf(bb(2.))); + #[cfg(f128_enabled)] + bb(trunctfdf(bb(2.))); + #[cfg(all( + f16_enabled, + f128_enabled, + not(any(target_arch = "powerpc", target_arch = "powerpc64")) + ))] + bb(trunctfhf(bb(2.))); + #[cfg(f128_enabled)] + bb(trunctfsf(bb(2.))); + bb(udivti3(bb(2), bb(2))); + bb(umoddi3(bb(2), bb(3))); + bb(umodti3(bb(2), bb(2))); + + something_with_a_dtor(&|| assert_eq!(bb(1), 1)); + + // FIXME(#802): This should be re-enabled once a workaround is found. + // extern "C" { + // fn rust_begin_unwind(x: usize); + // } + + // unsafe { + // rust_begin_unwind(0); + // } +} + +fn something_with_a_dtor(f: &dyn Fn()) { + struct A<'a>(&'a (dyn Fn() + 'a)); + + impl Drop for A<'_> { + fn drop(&mut self) { + (self.0)(); + } + } + let _a = A(f); + f(); +} + +#[unsafe(no_mangle)] +#[cfg(not(thumb))] +fn main(_argc: core::ffi::c_int, _argv: *const *const u8) -> core::ffi::c_int { + run(); + 0 +} + +#[unsafe(no_mangle)] +#[cfg(thumb)] +pub fn _start() -> ! { + run(); + loop {} +} + +#[cfg(windows)] +#[link(name = "kernel32")] +#[link(name = "msvcrt")] +extern "C" {} + +// ARM targets need these symbols +#[unsafe(no_mangle)] +pub fn __aeabi_unwind_cpp_pr0() {} + +#[unsafe(no_mangle)] +pub fn __aeabi_unwind_cpp_pr1() {} + +#[cfg(not(any(windows, target_os = "cygwin")))] +#[allow(non_snake_case)] +#[unsafe(no_mangle)] +pub fn _Unwind_Resume() {} + +#[cfg(not(any(windows, target_os = "cygwin")))] +#[lang = "eh_personality"] +pub extern "C" fn eh_personality() {} + +#[cfg(any(all(windows, target_env = "gnu"), target_os = "cygwin"))] +mod mingw_unwinding { + #[unsafe(no_mangle)] + pub fn rust_eh_personality() {} + #[unsafe(no_mangle)] + pub fn rust_eh_unwind_resume() {} + #[unsafe(no_mangle)] + pub fn rust_eh_register_frames() {} + #[unsafe(no_mangle)] + pub fn rust_eh_unregister_frames() {} +} diff --git a/library/compiler-builtins/builtins-test/Cargo.toml b/library/compiler-builtins/builtins-test/Cargo.toml new file mode 100644 index 0000000000000..10978c0bb7ed0 --- /dev/null +++ b/library/compiler-builtins/builtins-test/Cargo.toml @@ -0,0 +1,99 @@ +[package] +name = "builtins-test" +version = "0.1.0" +authors = ["Alex Crichton "] +edition = "2024" +publish = false +license = "MIT AND Apache-2.0 WITH LLVM-exception AND (MIT OR Apache-2.0)" + +[dependencies] +# For fuzzing tests we want a deterministic seedable RNG. We also eliminate potential +# problems with system RNGs on the variety of platforms this crate is tested on. +# `xoshiro128**` is used for its quality, size, and speed at generating `u32` shift amounts. +rand_xoshiro = "0.6" +# To compare float builtins against +rustc_apfloat = "0.2.1" +# Really a dev dependency, but dev dependencies can't be optional +iai-callgrind = { version = "0.14.0", optional = true } + +[dependencies.compiler_builtins] +path = "../compiler-builtins" +default-features = false +features = ["unstable-public-internals"] + +[dev-dependencies] +criterion = { version = "0.5.1", default-features = false, features = ["cargo_bench_support"] } +paste = "1.0.15" + +[target.'cfg(all(target_arch = "arm", not(any(target_env = "gnu", target_env = "musl")), target_os = "linux"))'.dev-dependencies] +test = { git = "https://github.com/japaric/utest" } +utest-cortex-m-qemu = { default-features = false, git = "https://github.com/japaric/utest" } +utest-macros = { git = "https://github.com/japaric/utest" } + +[features] +default = ["mangled-names"] +c = ["compiler_builtins/c"] +no-asm = ["compiler_builtins/no-asm"] +no-f16-f128 = ["compiler_builtins/no-f16-f128"] +mem = ["compiler_builtins/mem"] +mangled-names = ["compiler_builtins/mangled-names"] +# Skip tests that rely on f128 symbols being available on the system +no-sys-f128 = ["no-sys-f128-int-convert", "no-sys-f16-f128-convert"] +# Some platforms have some f128 functions but everything except integer conversions +no-sys-f128-int-convert = [] +no-sys-f16-f128-convert = [] +no-sys-f16-f64-convert = [] +# Skip tests that rely on f16 symbols being available on the system +no-sys-f16 = ["no-sys-f16-f64-convert"] + +# Enable icount benchmarks (requires iai-callgrind and valgrind) +icount = ["dep:iai-callgrind"] + +# Enable report generation without bringing in more dependencies by default +benchmarking-reports = ["criterion/plotters", "criterion/html_reports"] + +# NOTE: benchmarks must be run with `--no-default-features` or with +# `-p builtins-test`, otherwise the default `compiler-builtins` feature +# of the `compiler_builtins` crate gets activated, resulting in linker +# errors. + +[[bench]] +name = "float_add" +harness = false + +[[bench]] +name = "float_sub" +harness = false + +[[bench]] +name = "float_mul" +harness = false + +[[bench]] +name = "float_div" +harness = false + +[[bench]] +name = "float_cmp" +harness = false + +[[bench]] +name = "float_conv" +harness = false + +[[bench]] +name = "float_extend" +harness = false + +[[bench]] +name = "float_trunc" +harness = false + +[[bench]] +name = "float_pow" +harness = false + +[[bench]] +name = "mem_icount" +harness = false +required-features = ["icount"] diff --git a/library/compiler-builtins/builtins-test/benches/float_add.rs b/library/compiler-builtins/builtins-test/benches/float_add.rs new file mode 100644 index 0000000000000..197f90b319da4 --- /dev/null +++ b/library/compiler-builtins/builtins-test/benches/float_add.rs @@ -0,0 +1,93 @@ +#![cfg_attr(f128_enabled, feature(f128))] + +use builtins_test::float_bench; +use compiler_builtins::float::add; +use criterion::{Criterion, criterion_main}; + +float_bench! { + name: add_f32, + sig: (a: f32, b: f32) -> f32, + crate_fn: add::__addsf3, + sys_fn: __addsf3, + sys_available: all(), + asm: [ + #[cfg(target_arch = "x86_64")] { + asm!( + "addss {a}, {b}", + a = inout(xmm_reg) a, + b = in(xmm_reg) b, + options(nomem, nostack, pure) + ); + + a + }; + + #[cfg(target_arch = "aarch64")] { + asm!( + "fadd {a:s}, {a:s}, {b:s}", + a = inout(vreg) a, + b = in(vreg) b, + options(nomem, nostack, pure) + ); + + a + }; + ], +} + +float_bench! { + name: add_f64, + sig: (a: f64, b: f64) -> f64, + crate_fn: add::__adddf3, + sys_fn: __adddf3, + sys_available: all(), + asm: [ + #[cfg(target_arch = "x86_64")] { + asm!( + "addsd {a}, {b}", + a = inout(xmm_reg) a, + b = in(xmm_reg) b, + options(nomem, nostack, pure) + ); + + a + }; + + #[cfg(target_arch = "aarch64")] { + asm!( + "fadd {a:d}, {a:d}, {b:d}", + a = inout(vreg) a, + b = in(vreg) b, + options(nomem, nostack, pure) + ); + + a + }; + ], +} + +#[cfg(f128_enabled)] +float_bench! { + name: add_f128, + sig: (a: f128, b: f128) -> f128, + crate_fn: add::__addtf3, + crate_fn_ppc: add::__addkf3, + sys_fn: __addtf3, + sys_fn_ppc: __addkf3, + sys_available: not(feature = "no-sys-f128"), + asm: [] +} + +pub fn float_add() { + let mut criterion = Criterion::default().configure_from_args(); + + add_f32(&mut criterion); + add_f64(&mut criterion); + + #[cfg(f128_enabled)] + { + add_f128(&mut criterion); + } +} + +criterion_main!(float_add); diff --git a/library/compiler-builtins/builtins-test/benches/float_cmp.rs b/library/compiler-builtins/builtins-test/benches/float_cmp.rs new file mode 100644 index 0000000000000..42d6652397dcf --- /dev/null +++ b/library/compiler-builtins/builtins-test/benches/float_cmp.rs @@ -0,0 +1,207 @@ +#![cfg_attr(f128_enabled, feature(f128))] + +use builtins_test::float_bench; +use compiler_builtins::float::cmp; +use criterion::{Criterion, criterion_main}; + +/// `gt` symbols are allowed to return differing results, they just get compared +/// to 0. +fn gt_res_eq(a: i32, b: i32) -> bool { + let a_lt_0 = a <= 0; + let b_lt_0 = b <= 0; + (a_lt_0 && b_lt_0) || (!a_lt_0 && !b_lt_0) +} + +float_bench! { + name: cmp_f32_gt, + sig: (a: f32, b: f32) -> i32, + crate_fn: cmp::__gtsf2, + sys_fn: __gtsf2, + sys_available: all(), + output_eq: gt_res_eq, + asm: [ + #[cfg(target_arch = "x86_64")] { + let ret: i32; + asm!( + "xor {ret:e}, {ret:e}", + "ucomiss {a}, {b}", + "seta {ret:l}", + a = in(xmm_reg) a, + b = in(xmm_reg) b, + ret = out(reg) ret, + options(nomem, nostack, pure) + ); + + ret + }; + + #[cfg(target_arch = "aarch64")] { + let ret: i32; + asm!( + "fcmp {a:s}, {b:s}", + "cset {ret:w}, gt", + a = in(vreg) a, + b = in(vreg) b, + ret = out(reg) ret, + options(nomem,nostack), + ); + + ret + }; + ], +} + +float_bench! { + name: cmp_f32_unord, + sig: (a: f32, b: f32) -> i32, + crate_fn: cmp::__unordsf2, + sys_fn: __unordsf2, + sys_available: all(), + asm: [ + #[cfg(target_arch = "x86_64")] { + let ret: i32; + asm!( + "xor {ret:e}, {ret:e}", + "ucomiss {a}, {b}", + "setp {ret:l}", + a = in(xmm_reg) a, + b = in(xmm_reg) b, + ret = out(reg) ret, + options(nomem, nostack, pure) + ); + + ret + }; + + #[cfg(target_arch = "aarch64")] { + let ret: i32; + asm!( + "fcmp {a:s}, {b:s}", + "cset {ret:w}, vs", + a = in(vreg) a, + b = in(vreg) b, + ret = out(reg) ret, + options(nomem, nostack, pure) + ); + + ret + }; + ], +} + +float_bench! { + name: cmp_f64_gt, + sig: (a: f64, b: f64) -> i32, + crate_fn: cmp::__gtdf2, + sys_fn: __gtdf2, + sys_available: all(), + output_eq: gt_res_eq, + asm: [ + #[cfg(target_arch = "x86_64")] { + let ret: i32; + asm!( + "xor {ret:e}, {ret:e}", + "ucomisd {a}, {b}", + "seta {ret:l}", + a = in(xmm_reg) a, + b = in(xmm_reg) b, + ret = out(reg) ret, + options(nomem, nostack, pure) + ); + + ret + }; + + #[cfg(target_arch = "aarch64")] { + let ret: i32; + asm!( + "fcmp {a:d}, {b:d}", + "cset {ret:w}, gt", + a = in(vreg) a, + b = in(vreg) b, + ret = out(reg) ret, + options(nomem, nostack, pure) + ); + + ret + }; + ], +} + +float_bench! { + name: cmp_f64_unord, + sig: (a: f64, b: f64) -> i32, + crate_fn: cmp::__unorddf2, + sys_fn: __unorddf2, + sys_available: all(), + asm: [ + #[cfg(target_arch = "x86_64")] { + let ret: i32; + asm!( + "xor {ret:e}, {ret:e}", + "ucomisd {a}, {b}", + "setp {ret:l}", + a = in(xmm_reg) a, + b = in(xmm_reg) b, + ret = out(reg) ret, + options(nomem, nostack, pure) + ); + + ret + }; + + #[cfg(target_arch = "aarch64")] { + let ret: i32; + asm!( + "fcmp {a:d}, {b:d}", + "cset {ret:w}, vs", + a = in(vreg) a, + b = in(vreg) b, + ret = out(reg) ret, + options(nomem, nostack, pure) + ); + + ret + }; + ], +} + +float_bench! { + name: cmp_f128_gt, + sig: (a: f128, b: f128) -> i32, + crate_fn: cmp::__gttf2, + crate_fn_ppc: cmp::__gtkf2, + sys_fn: __gttf2, + sys_fn_ppc: __gtkf2, + sys_available: not(feature = "no-sys-f128"), + output_eq: gt_res_eq, + asm: [] +} + +float_bench! { + name: cmp_f128_unord, + sig: (a: f128, b: f128) -> i32, + crate_fn: cmp::__unordtf2, + crate_fn_ppc: cmp::__unordkf2, + sys_fn: __unordtf2, + sys_fn_ppc: __unordkf2, + sys_available: not(feature = "no-sys-f128"), + asm: [] +} + +pub fn float_cmp() { + let mut criterion = Criterion::default().configure_from_args(); + + cmp_f32_gt(&mut criterion); + cmp_f32_unord(&mut criterion); + cmp_f64_gt(&mut criterion); + cmp_f64_unord(&mut criterion); + + #[cfg(f128_enabled)] + { + cmp_f128_gt(&mut criterion); + cmp_f128_unord(&mut criterion); + } +} + +criterion_main!(float_cmp); diff --git a/library/compiler-builtins/builtins-test/benches/float_conv.rs b/library/compiler-builtins/builtins-test/benches/float_conv.rs new file mode 100644 index 0000000000000..d4a7346d1d588 --- /dev/null +++ b/library/compiler-builtins/builtins-test/benches/float_conv.rs @@ -0,0 +1,688 @@ +#![allow(improper_ctypes)] +#![cfg_attr(f128_enabled, feature(f128))] + +use builtins_test::float_bench; +use compiler_builtins::float::conv; +use criterion::{Criterion, criterion_main}; + +/* unsigned int -> float */ + +float_bench! { + name: conv_u32_f32, + sig: (a: u32) -> f32, + crate_fn: conv::__floatunsisf, + sys_fn: __floatunsisf, + sys_available: all(), + asm: [ + #[cfg(target_arch = "x86_64")] { + let ret: f32; + asm!( + "mov {tmp:e}, {a:e}", + "cvtsi2ss {ret}, {tmp}", + a = in(reg) a, + tmp = out(reg) _, + ret = lateout(xmm_reg) ret, + options(nomem, nostack, pure), + ); + + ret + }; + + #[cfg(target_arch = "aarch64")] { + let ret: f32; + asm!( + "ucvtf {ret:s}, {a:w}", + a = in(reg) a, + ret = lateout(vreg) ret, + options(nomem, nostack, pure), + ); + + ret + }; + ], +} + +float_bench! { + name: conv_u32_f64, + sig: (a: u32) -> f64, + crate_fn: conv::__floatunsidf, + sys_fn: __floatunsidf, + sys_available: all(), + asm: [ + #[cfg(target_arch = "x86_64")] { + let ret: f64; + asm!( + "mov {tmp:e}, {a:e}", + "cvtsi2sd {ret}, {tmp}", + a = in(reg) a, + tmp = out(reg) _, + ret = lateout(xmm_reg) ret, + options(nomem, nostack, pure), + ); + + ret + }; + + #[cfg(target_arch = "aarch64")] { + let ret: f64; + asm!( + "ucvtf {ret:d}, {a:w}", + a = in(reg) a, + ret = lateout(vreg) ret, + options(nomem, nostack, pure), + ); + + ret + }; + ], +} + +#[cfg(f128_enabled)] +float_bench! { + name: conv_u32_f128, + sig: (a: u32) -> f128, + crate_fn: conv::__floatunsitf, + crate_fn_ppc: conv::__floatunsikf, + sys_fn: __floatunsitf, + sys_fn_ppc: __floatunsikf, + sys_available: not(feature = "no-sys-f16-f128-convert"), + asm: [] +} + +float_bench! { + name: conv_u64_f32, + sig: (a: u64) -> f32, + crate_fn: conv::__floatundisf, + sys_fn: __floatundisf, + sys_available: all(), + asm: [ + #[cfg(target_arch = "aarch64")] { + let ret: f32; + asm!( + "ucvtf {ret:s}, {a:x}", + a = in(reg) a, + ret = lateout(vreg) ret, + options(nomem, nostack, pure), + ); + + ret + }; + ], +} + +float_bench! { + name: conv_u64_f64, + sig: (a: u64) -> f64, + crate_fn: conv::__floatundidf, + sys_fn: __floatundidf, + sys_available: all(), + asm: [ + #[cfg(target_arch = "aarch64")] { + let ret: f64; + asm!( + "ucvtf {ret:d}, {a:x}", + a = in(reg) a, + ret = lateout(vreg) ret, + options(nomem, nostack, pure), + ); + + ret + }; + ], +} + +#[cfg(f128_enabled)] +float_bench! { + name: conv_u64_f128, + sig: (a: u64) -> f128, + crate_fn: conv::__floatunditf, + crate_fn_ppc: conv::__floatundikf, + sys_fn: __floatunditf, + sys_fn_ppc: __floatundikf, + sys_available: not(feature = "no-sys-f16-f128-convert"), + asm: [] +} + +float_bench! { + name: conv_u128_f32, + sig: (a: u128) -> f32, + crate_fn: conv::__floatuntisf, + sys_fn: __floatuntisf, + sys_available: all(), + asm: [] +} + +float_bench! { + name: conv_u128_f64, + sig: (a: u128) -> f64, + crate_fn: conv::__floatuntidf, + sys_fn: __floatuntidf, + sys_available: all(), + asm: [] +} + +#[cfg(f128_enabled)] +float_bench! { + name: conv_u128_f128, + sig: (a: u128) -> f128, + crate_fn: conv::__floatuntitf, + crate_fn_ppc: conv::__floatuntikf, + sys_fn: __floatuntitf, + sys_fn_ppc: __floatuntikf, + sys_available: not(feature = "no-sys-f16-f128-convert"), + asm: [] +} + +/* signed int -> float */ + +float_bench! { + name: conv_i32_f32, + sig: (a: i32) -> f32, + crate_fn: conv::__floatsisf, + sys_fn: __floatsisf, + sys_available: all(), + asm: [ + #[cfg(target_arch = "x86_64")] { + let ret: f32; + asm!( + "cvtsi2ss {ret}, {a:e}", + a = in(reg) a, + ret = lateout(xmm_reg) ret, + options(nomem, nostack, pure), + ); + + ret + }; + + #[cfg(target_arch = "aarch64")] { + let ret: f32; + asm!( + "scvtf {ret:s}, {a:w}", + a = in(reg) a, + ret = lateout(vreg) ret, + options(nomem, nostack, pure), + ); + + ret + }; + ], +} + +float_bench! { + name: conv_i32_f64, + sig: (a: i32) -> f64, + crate_fn: conv::__floatsidf, + sys_fn: __floatsidf, + sys_available: all(), + asm: [ + #[cfg(target_arch = "x86_64")] { + let ret: f64; + asm!( + "cvtsi2sd {ret}, {a:e}", + a = in(reg) a, + ret = lateout(xmm_reg) ret, + options(nomem, nostack, pure), + ); + + ret + }; + + + #[cfg(target_arch = "aarch64")] { + let ret: f64; + asm!( + "scvtf {ret:d}, {a:w}", + a = in(reg) a, + ret = lateout(vreg) ret, + options(nomem, nostack, pure), + ); + + ret + }; + ], +} + +#[cfg(f128_enabled)] +float_bench! { + name: conv_i32_f128, + sig: (a: i32) -> f128, + crate_fn: conv::__floatsitf, + crate_fn_ppc: conv::__floatsikf, + sys_fn: __floatsitf, + sys_fn_ppc: __floatsikf, + sys_available: not(feature = "no-sys-f16-f128-convert"), + asm: [] +} + +float_bench! { + name: conv_i64_f32, + sig: (a: i64) -> f32, + crate_fn: conv::__floatdisf, + sys_fn: __floatdisf, + sys_available: all(), + asm: [ + #[cfg(target_arch = "x86_64")] { + let ret: f32; + asm!( + "cvtsi2ss {ret}, {a:r}", + a = in(reg) a, + ret = lateout(xmm_reg) ret, + options(nomem, nostack, pure), + ); + + ret + }; + + #[cfg(target_arch = "aarch64")] { + let ret: f32; + asm!( + "scvtf {ret:s}, {a:x}", + a = in(reg) a, + ret = lateout(vreg) ret, + options(nomem, nostack, pure), + ); + + ret + }; + ], +} + +float_bench! { + name: conv_i64_f64, + sig: (a: i64) -> f64, + crate_fn: conv::__floatdidf, + sys_fn: __floatdidf, + sys_available: all(), + asm: [ + #[cfg(target_arch = "x86_64")] { + let ret: f64; + asm!( + "cvtsi2sd {ret}, {a:r}", + a = in(reg) a, + ret = lateout(xmm_reg) ret, + options(nomem, nostack, pure), + ); + + ret + }; + + + #[cfg(target_arch = "aarch64")] { + let ret: f64; + asm!( + "scvtf {ret:d}, {a:x}", + a = in(reg) a, + ret = lateout(vreg) ret, + options(nomem, nostack, pure), + ); + + ret + }; + ], +} + +#[cfg(f128_enabled)] +float_bench! { + name: conv_i64_f128, + sig: (a: i64) -> f128, + crate_fn: conv::__floatditf, + crate_fn_ppc: conv::__floatdikf, + sys_fn: __floatditf, + sys_fn_ppc: __floatdikf, + sys_available: not(feature = "no-sys-f16-f128-convert"), + asm: [] +} + +float_bench! { + name: conv_i128_f32, + sig: (a: i128) -> f32, + crate_fn: conv::__floattisf, + sys_fn: __floattisf, + sys_available: all(), + asm: [] +} + +float_bench! { + name: conv_i128_f64, + sig: (a: i128) -> f64, + crate_fn: conv::__floattidf, + sys_fn: __floattidf, + sys_available: all(), + asm: [] +} + +#[cfg(f128_enabled)] +float_bench! { + name: conv_i128_f128, + sig: (a: i128) -> f128, + crate_fn: conv::__floattitf, + crate_fn_ppc: conv::__floattikf, + sys_fn: __floattitf, + sys_fn_ppc: __floattikf, + sys_available: not(feature = "no-sys-f16-f128-convert"), + asm: [] +} + +/* float -> unsigned int */ + +#[cfg(not(all(target_arch = "powerpc64", target_endian = "little")))] +float_bench! { + name: conv_f32_u32, + sig: (a: f32) -> u32, + crate_fn: conv::__fixunssfsi, + sys_fn: __fixunssfsi, + sys_available: all(), + asm: [ + #[cfg(target_arch = "aarch64")] { + let ret: u32; + asm!( + "fcvtzu {ret:w}, {a:s}", + a = in(vreg) a, + ret = lateout(reg) ret, + options(nomem, nostack, pure), + ); + + ret + }; + ], +} + +#[cfg(not(all(target_arch = "powerpc64", target_endian = "little")))] +float_bench! { + name: conv_f32_u64, + sig: (a: f32) -> u64, + crate_fn: conv::__fixunssfdi, + sys_fn: __fixunssfdi, + sys_available: all(), + asm: [ + #[cfg(target_arch = "aarch64")] { + let ret: u64; + asm!( + "fcvtzu {ret:x}, {a:s}", + a = in(vreg) a, + ret = lateout(reg) ret, + options(nomem, nostack, pure), + ); + + ret + }; + ], +} + +#[cfg(not(all(target_arch = "powerpc64", target_endian = "little")))] +float_bench! { + name: conv_f32_u128, + sig: (a: f32) -> u128, + crate_fn: conv::__fixunssfti, + sys_fn: __fixunssfti, + sys_available: all(), + asm: [] +} + +float_bench! { + name: conv_f64_u32, + sig: (a: f64) -> u32, + crate_fn: conv::__fixunsdfsi, + sys_fn: __fixunsdfsi, + sys_available: all(), + asm: [ + #[cfg(target_arch = "aarch64")] { + let ret: u32; + asm!( + "fcvtzu {ret:w}, {a:d}", + a = in(vreg) a, + ret = lateout(reg) ret, + options(nomem, nostack, pure), + ); + + ret + }; + ], +} + +float_bench! { + name: conv_f64_u64, + sig: (a: f64) -> u64, + crate_fn: conv::__fixunsdfdi, + sys_fn: __fixunsdfdi, + sys_available: all(), + asm: [ + #[cfg(target_arch = "aarch64")] { + let ret: u64; + asm!( + "fcvtzu {ret:x}, {a:d}", + a = in(vreg) a, + ret = lateout(reg) ret, + options(nomem, nostack, pure), + ); + + ret + }; + ], +} + +float_bench! { + name: conv_f64_u128, + sig: (a: f64) -> u128, + crate_fn: conv::__fixunsdfti, + sys_fn: __fixunsdfti, + sys_available: all(), + asm: [] +} + +#[cfg(f128_enabled)] +float_bench! { + name: conv_f128_u32, + sig: (a: f128) -> u32, + crate_fn: conv::__fixunstfsi, + crate_fn_ppc: conv::__fixunskfsi, + sys_fn: __fixunstfsi, + sys_available: not(feature = "no-sys-f16-f128-convert"), + asm: [] +} + +#[cfg(f128_enabled)] +float_bench! { + name: conv_f128_u64, + sig: (a: f128) -> u64, + crate_fn: conv::__fixunstfdi, + crate_fn_ppc: conv::__fixunskfdi, + sys_fn: __fixunstfdi, + sys_available: not(feature = "no-sys-f16-f128-convert"), + asm: [] +} + +#[cfg(f128_enabled)] +float_bench! { + name: conv_f128_u128, + sig: (a: f128) -> u128, + crate_fn: conv::__fixunstfti, + crate_fn_ppc: conv::__fixunskfti, + sys_fn: __fixunstfti, + sys_available: not(feature = "no-sys-f16-f128-convert"), + asm: [] +} + +/* float -> signed int */ + +#[cfg(not(all(target_arch = "powerpc64", target_endian = "little")))] +float_bench! { + name: conv_f32_i32, + sig: (a: f32) -> i32, + crate_fn: conv::__fixsfsi, + sys_fn: __fixsfsi, + sys_available: all(), + asm: [ + #[cfg(target_arch = "aarch64")] { + let ret: i32; + asm!( + "fcvtzs {ret:w}, {a:s}", + a = in(vreg) a, + ret = lateout(reg) ret, + options(nomem, nostack, pure), + ); + + ret + }; + ], +} + +#[cfg(not(all(target_arch = "powerpc64", target_endian = "little")))] +float_bench! { + name: conv_f32_i64, + sig: (a: f32) -> i64, + crate_fn: conv::__fixsfdi, + sys_fn: __fixsfdi, + sys_available: all(), + asm: [ + #[cfg(target_arch = "aarch64")] { + let ret: i64; + asm!( + "fcvtzs {ret:x}, {a:s}", + a = in(vreg) a, + ret = lateout(reg) ret, + options(nomem, nostack, pure), + ); + + ret + }; + ], +} + +#[cfg(not(all(target_arch = "powerpc64", target_endian = "little")))] +float_bench! { + name: conv_f32_i128, + sig: (a: f32) -> i128, + crate_fn: conv::__fixsfti, + sys_fn: __fixsfti, + sys_available: all(), + asm: [] +} + +float_bench! { + name: conv_f64_i32, + sig: (a: f64) -> i32, + crate_fn: conv::__fixdfsi, + sys_fn: __fixdfsi, + sys_available: all(), + asm: [ + #[cfg(target_arch = "aarch64")] { + let ret: i32; + asm!( + "fcvtzs {ret:w}, {a:d}", + a = in(vreg) a, + ret = lateout(reg) ret, + options(nomem, nostack, pure), + ); + + ret + }; + ], +} + +float_bench! { + name: conv_f64_i64, + sig: (a: f64) -> i64, + crate_fn: conv::__fixdfdi, + sys_fn: __fixdfdi, + sys_available: all(), + asm: [ + #[cfg(target_arch = "aarch64")] { + let ret: i64; + asm!( + "fcvtzs {ret:x}, {a:d}", + a = in(vreg) a, + ret = lateout(reg) ret, + options(nomem, nostack, pure), + ); + + ret + }; + ], +} + +float_bench! { + name: conv_f64_i128, + sig: (a: f64) -> i128, + crate_fn: conv::__fixdfti, + sys_fn: __fixdfti, + sys_available: all(), + asm: [] +} + +#[cfg(f128_enabled)] +float_bench! { + name: conv_f128_i32, + sig: (a: f128) -> i32, + crate_fn: conv::__fixtfsi, + crate_fn_ppc: conv::__fixkfsi, + sys_fn: __fixtfsi, + sys_available: not(feature = "no-sys-f16-f128-convert"), + asm: [] +} + +#[cfg(f128_enabled)] +float_bench! { + name: conv_f128_i64, + sig: (a: f128) -> i64, + crate_fn: conv::__fixtfdi, + crate_fn_ppc: conv::__fixkfdi, + sys_fn: __fixtfdi, + sys_available: not(feature = "no-sys-f16-f128-convert"), + asm: [] +} + +#[cfg(f128_enabled)] +float_bench! { + name: conv_f128_i128, + sig: (a: f128) -> i128, + crate_fn: conv::__fixtfti, + crate_fn_ppc: conv::__fixkfti, + sys_fn: __fixtfti, + sys_available: not(feature = "no-sys-f16-f128-convert"), + asm: [] +} + +pub fn float_conv() { + let mut criterion = Criterion::default().configure_from_args(); + + conv_u32_f32(&mut criterion); + conv_u32_f64(&mut criterion); + conv_u64_f32(&mut criterion); + conv_u64_f64(&mut criterion); + conv_u128_f32(&mut criterion); + conv_u128_f64(&mut criterion); + conv_i32_f32(&mut criterion); + conv_i32_f64(&mut criterion); + conv_i64_f32(&mut criterion); + conv_i64_f64(&mut criterion); + conv_i128_f32(&mut criterion); + conv_i128_f64(&mut criterion); + conv_f64_u32(&mut criterion); + conv_f64_u64(&mut criterion); + conv_f64_u128(&mut criterion); + conv_f64_i32(&mut criterion); + conv_f64_i64(&mut criterion); + conv_f64_i128(&mut criterion); + + #[cfg(f128_enabled)] + // FIXME: ppc64le has a sporadic overflow panic in the crate functions + // + #[cfg(not(all(target_arch = "powerpc64", target_endian = "little")))] + { + conv_u32_f128(&mut criterion); + conv_u64_f128(&mut criterion); + conv_u128_f128(&mut criterion); + conv_i32_f128(&mut criterion); + conv_i64_f128(&mut criterion); + conv_i128_f128(&mut criterion); + conv_f128_u32(&mut criterion); + conv_f128_u64(&mut criterion); + conv_f128_u128(&mut criterion); + conv_f128_i32(&mut criterion); + conv_f128_i64(&mut criterion); + conv_f128_i128(&mut criterion); + } +} + +criterion_main!(float_conv); diff --git a/library/compiler-builtins/builtins-test/benches/float_div.rs b/library/compiler-builtins/builtins-test/benches/float_div.rs new file mode 100644 index 0000000000000..d5b0ad0fd402b --- /dev/null +++ b/library/compiler-builtins/builtins-test/benches/float_div.rs @@ -0,0 +1,93 @@ +#![cfg_attr(f128_enabled, feature(f128))] + +use builtins_test::float_bench; +use compiler_builtins::float::div; +use criterion::{Criterion, criterion_main}; + +float_bench! { + name: div_f32, + sig: (a: f32, b: f32) -> f32, + crate_fn: div::__divsf3, + sys_fn: __divsf3, + sys_available: all(), + asm: [ + #[cfg(target_arch = "x86_64")] { + asm!( + "divss {a}, {b}", + a = inout(xmm_reg) a, + b = in(xmm_reg) b, + options(nomem, nostack, pure) + ); + + a + }; + + #[cfg(target_arch = "aarch64")] { + asm!( + "fdiv {a:s}, {a:s}, {b:s}", + a = inout(vreg) a, + b = in(vreg) b, + options(nomem, nostack, pure) + ); + + a + }; + ], +} + +float_bench! { + name: div_f64, + sig: (a: f64, b: f64) -> f64, + crate_fn: div::__divdf3, + sys_fn: __divdf3, + sys_available: all(), + asm: [ + #[cfg(target_arch = "x86_64")] { + asm!( + "divsd {a}, {b}", + a = inout(xmm_reg) a, + b = in(xmm_reg) b, + options(nomem, nostack, pure) + ); + + a + }; + + #[cfg(target_arch = "aarch64")] { + asm!( + "fdiv {a:d}, {a:d}, {b:d}", + a = inout(vreg) a, + b = in(vreg) b, + options(nomem, nostack, pure) + ); + + a + }; + ], +} + +#[cfg(f128_enabled)] +float_bench! { + name: div_f128, + sig: (a: f128, b: f128) -> f128, + crate_fn: div::__divtf3, + crate_fn_ppc: div::__divkf3, + sys_fn: __divtf3, + sys_fn_ppc: __divkf3, + sys_available: not(feature = "no-sys-f128"), + asm: [] +} + +pub fn float_div() { + let mut criterion = Criterion::default().configure_from_args(); + + div_f32(&mut criterion); + div_f64(&mut criterion); + + #[cfg(f128_enabled)] + { + div_f128(&mut criterion); + } +} + +criterion_main!(float_div); diff --git a/library/compiler-builtins/builtins-test/benches/float_extend.rs b/library/compiler-builtins/builtins-test/benches/float_extend.rs new file mode 100644 index 0000000000000..fc44e80c9e141 --- /dev/null +++ b/library/compiler-builtins/builtins-test/benches/float_extend.rs @@ -0,0 +1,133 @@ +#![allow(unused_variables)] // "unused" f16 registers +#![cfg_attr(f128_enabled, feature(f128))] +#![cfg_attr(f16_enabled, feature(f16))] + +use builtins_test::float_bench; +use compiler_builtins::float::extend; +use criterion::{Criterion, criterion_main}; + +#[cfg(f16_enabled)] +float_bench! { + name: extend_f16_f32, + sig: (a: f16) -> f32, + crate_fn: extend::__extendhfsf2, + sys_fn: __extendhfsf2, + sys_available: not(feature = "no-sys-f16"), + asm: [ + #[cfg(target_arch = "aarch64")] { + let ret: f32; + asm!( + "fcvt {ret:s}, {a:h}", + a = in(vreg) a, + ret = lateout(vreg) ret, + options(nomem, nostack, pure), + ); + + ret + }; + ], +} + +#[cfg(f16_enabled)] +float_bench! { + name: extend_f16_f64, + sig: (a: f16) -> f64, + crate_fn: extend::__extendhfdf2, + sys_fn: __extendhfdf2, + sys_available: not(feature = "no-sys-f16-f64-convert"), + asm: [ + #[cfg(target_arch = "aarch64")] { + let ret: f64; + asm!( + "fcvt {ret:d}, {a:h}", + a = in(vreg) a, + ret = lateout(vreg) ret, + options(nomem, nostack, pure), + ); + + ret + }; + ], +} + +#[cfg(all(f16_enabled, f128_enabled))] +float_bench! { + name: extend_f16_f128, + sig: (a: f16) -> f128, + crate_fn: extend::__extendhftf2, + crate_fn_ppc: extend::__extendhfkf2, + sys_fn: __extendhftf2, + sys_fn_ppc: __extendhfkf2, + sys_available: not(feature = "no-sys-f16-f128-convert"), + asm: [], +} + +float_bench! { + name: extend_f32_f64, + sig: (a: f32) -> f64, + crate_fn: extend::__extendsfdf2, + sys_fn: __extendsfdf2, + sys_available: all(), + asm: [ + #[cfg(target_arch = "aarch64")] { + let ret: f64; + asm!( + "fcvt {ret:d}, {a:s}", + a = in(vreg) a, + ret = lateout(vreg) ret, + options(nomem, nostack, pure), + ); + + ret + }; + ], +} + +#[cfg(f128_enabled)] +float_bench! { + name: extend_f32_f128, + sig: (a: f32) -> f128, + crate_fn: extend::__extendsftf2, + crate_fn_ppc: extend::__extendsfkf2, + sys_fn: __extendsftf2, + sys_fn_ppc: __extendsfkf2, + sys_available: not(feature = "no-sys-f128"), + asm: [], +} + +#[cfg(f128_enabled)] +float_bench! { + name: extend_f64_f128, + sig: (a: f64) -> f128, + crate_fn: extend::__extenddftf2, + crate_fn_ppc: extend::__extenddfkf2, + sys_fn: __extenddftf2, + sys_fn_ppc: __extenddfkf2, + sys_available: not(feature = "no-sys-f128"), + asm: [], +} + +pub fn float_extend() { + let mut criterion = Criterion::default().configure_from_args(); + + // FIXME(#655): `f16` tests disabled until we can bootstrap symbols + #[cfg(f16_enabled)] + #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] + { + extend_f16_f32(&mut criterion); + extend_f16_f64(&mut criterion); + + #[cfg(f128_enabled)] + extend_f16_f128(&mut criterion); + } + + extend_f32_f64(&mut criterion); + + #[cfg(f128_enabled)] + { + extend_f32_f128(&mut criterion); + extend_f64_f128(&mut criterion); + } +} + +criterion_main!(float_extend); diff --git a/library/compiler-builtins/builtins-test/benches/float_mul.rs b/library/compiler-builtins/builtins-test/benches/float_mul.rs new file mode 100644 index 0000000000000..a7a2d34aa0489 --- /dev/null +++ b/library/compiler-builtins/builtins-test/benches/float_mul.rs @@ -0,0 +1,93 @@ +#![cfg_attr(f128_enabled, feature(f128))] + +use builtins_test::float_bench; +use compiler_builtins::float::mul; +use criterion::{Criterion, criterion_main}; + +float_bench! { + name: mul_f32, + sig: (a: f32, b: f32) -> f32, + crate_fn: mul::__mulsf3, + sys_fn: __mulsf3, + sys_available: all(), + asm: [ + #[cfg(target_arch = "x86_64")] { + asm!( + "mulss {a}, {b}", + a = inout(xmm_reg) a, + b = in(xmm_reg) b, + options(nomem, nostack, pure) + ); + + a + }; + + #[cfg(target_arch = "aarch64")] { + asm!( + "fmul {a:s}, {a:s}, {b:s}", + a = inout(vreg) a, + b = in(vreg) b, + options(nomem, nostack, pure) + ); + + a + }; + ], +} + +float_bench! { + name: mul_f64, + sig: (a: f64, b: f64) -> f64, + crate_fn: mul::__muldf3, + sys_fn: __muldf3, + sys_available: all(), + asm: [ + #[cfg(target_arch = "x86_64")] { + asm!( + "mulsd {a}, {b}", + a = inout(xmm_reg) a, + b = in(xmm_reg) b, + options(nomem, nostack, pure) + ); + + a + }; + + #[cfg(target_arch = "aarch64")] { + asm!( + "fmul {a:d}, {a:d}, {b:d}", + a = inout(vreg) a, + b = in(vreg) b, + options(nomem, nostack, pure) + ); + + a + }; + ], +} + +#[cfg(f128_enabled)] +float_bench! { + name: mul_f128, + sig: (a: f128, b: f128) -> f128, + crate_fn: mul::__multf3, + crate_fn_ppc: mul::__mulkf3, + sys_fn: __multf3, + sys_fn_ppc: __mulkf3, + sys_available: not(feature = "no-sys-f128"), + asm: [] +} + +pub fn float_mul() { + let mut criterion = Criterion::default().configure_from_args(); + + mul_f32(&mut criterion); + mul_f64(&mut criterion); + + #[cfg(f128_enabled)] + { + mul_f128(&mut criterion); + } +} + +criterion_main!(float_mul); diff --git a/library/compiler-builtins/builtins-test/benches/float_pow.rs b/library/compiler-builtins/builtins-test/benches/float_pow.rs new file mode 100644 index 0000000000000..64e37dd32416e --- /dev/null +++ b/library/compiler-builtins/builtins-test/benches/float_pow.rs @@ -0,0 +1,49 @@ +#![cfg_attr(f128_enabled, feature(f128))] + +use builtins_test::float_bench; +use compiler_builtins::float::pow; +use criterion::{Criterion, criterion_main}; + +float_bench! { + name: powi_f32, + sig: (a: f32, b: i32) -> f32, + crate_fn: pow::__powisf2, + sys_fn: __powisf2, + sys_available: all(), + asm: [], +} + +float_bench! { + name: powi_f64, + sig: (a: f64, b: i32) -> f64, + crate_fn: pow::__powidf2, + sys_fn: __powidf2, + sys_available: all(), + asm: [], +} + +// FIXME(f16_f128): can be changed to only `f128_enabled` once `__multf3` and `__divtf3` are +// distributed by nightly. +#[cfg(all(f128_enabled, not(feature = "no-sys-f128")))] +float_bench! { + name: powi_f128, + sig: (a: f128, b: i32) -> f128, + crate_fn: pow::__powitf2, + crate_fn_ppc: pow::__powikf2, + sys_fn: __powitf2, + sys_fn_ppc: __powikf2, + sys_available: not(feature = "no-sys-f128"), + asm: [] +} + +pub fn float_pow() { + let mut criterion = Criterion::default().configure_from_args(); + + powi_f32(&mut criterion); + powi_f64(&mut criterion); + + #[cfg(all(f128_enabled, not(feature = "no-sys-f128")))] + powi_f128(&mut criterion); +} + +criterion_main!(float_pow); diff --git a/library/compiler-builtins/builtins-test/benches/float_sub.rs b/library/compiler-builtins/builtins-test/benches/float_sub.rs new file mode 100644 index 0000000000000..8bae294cd56b1 --- /dev/null +++ b/library/compiler-builtins/builtins-test/benches/float_sub.rs @@ -0,0 +1,93 @@ +#![cfg_attr(f128_enabled, feature(f128))] + +use builtins_test::float_bench; +use compiler_builtins::float::sub; +use criterion::{Criterion, criterion_main}; + +float_bench! { + name: sub_f32, + sig: (a: f32, b: f32) -> f32, + crate_fn: sub::__subsf3, + sys_fn: __subsf3, + sys_available: all(), + asm: [ + #[cfg(target_arch = "x86_64")] { + asm!( + "subss {a}, {b}", + a = inout(xmm_reg) a, + b = in(xmm_reg) b, + options(nomem, nostack, pure) + ); + + a + }; + + #[cfg(target_arch = "aarch64")] { + asm!( + "fsub {a:s}, {a:s}, {b:s}", + a = inout(vreg) a, + b = in(vreg) b, + options(nomem, nostack, pure) + ); + + a + }; + ], +} + +float_bench! { + name: sub_f64, + sig: (a: f64, b: f64) -> f64, + crate_fn: sub::__subdf3, + sys_fn: __subdf3, + sys_available: all(), + asm: [ + #[cfg(target_arch = "x86_64")] { + asm!( + "subsd {a}, {b}", + a = inout(xmm_reg) a, + b = in(xmm_reg) b, + options(nomem, nostack, pure) + ); + + a + }; + + #[cfg(target_arch = "aarch64")] { + asm!( + "fsub {a:d}, {a:d}, {b:d}", + a = inout(vreg) a, + b = in(vreg) b, + options(nomem, nostack, pure) + ); + + a + }; + ], +} + +#[cfg(f128_enabled)] +float_bench! { + name: sub_f128, + sig: (a: f128, b: f128) -> f128, + crate_fn: sub::__subtf3, + crate_fn_ppc: sub::__subkf3, + sys_fn: __subtf3, + sys_fn_ppc: __subkf3, + sys_available: not(feature = "no-sys-f128"), + asm: [] +} + +pub fn float_sub() { + let mut criterion = Criterion::default().configure_from_args(); + + sub_f32(&mut criterion); + sub_f64(&mut criterion); + + #[cfg(f128_enabled)] + { + sub_f128(&mut criterion); + } +} + +criterion_main!(float_sub); diff --git a/library/compiler-builtins/builtins-test/benches/float_trunc.rs b/library/compiler-builtins/builtins-test/benches/float_trunc.rs new file mode 100644 index 0000000000000..43310c7cfc825 --- /dev/null +++ b/library/compiler-builtins/builtins-test/benches/float_trunc.rs @@ -0,0 +1,146 @@ +#![cfg_attr(f128_enabled, feature(f128))] +#![cfg_attr(f16_enabled, feature(f16))] + +use builtins_test::float_bench; +use compiler_builtins::float::trunc; +use criterion::{Criterion, criterion_main}; + +#[cfg(f16_enabled)] +float_bench! { + name: trunc_f32_f16, + sig: (a: f32) -> f16, + crate_fn: trunc::__truncsfhf2, + sys_fn: __truncsfhf2, + sys_available: not(feature = "no-sys-f16"), + asm: [ + #[cfg(target_arch = "aarch64")] { + let ret: f16; + asm!( + "fcvt {ret:h}, {a:s}", + a = in(vreg) a, + ret = lateout(vreg) ret, + options(nomem, nostack, pure), + ); + + ret + }; + ], +} + +#[cfg(f16_enabled)] +float_bench! { + name: trunc_f64_f16, + sig: (a: f64) -> f16, + crate_fn: trunc::__truncdfhf2, + sys_fn: __truncdfhf2, + sys_available: not(feature = "no-sys-f16-f64-convert"), + asm: [ + #[cfg(target_arch = "aarch64")] { + let ret: f16; + asm!( + "fcvt {ret:h}, {a:d}", + a = in(vreg) a, + ret = lateout(vreg) ret, + options(nomem, nostack, pure), + ); + + ret + }; + ], +} + +float_bench! { + name: trunc_f64_f32, + sig: (a: f64) -> f32, + crate_fn: trunc::__truncdfsf2, + sys_fn: __truncdfsf2, + sys_available: all(), + asm: [ + #[cfg(target_arch = "x86_64")] { + let ret: f32; + asm!( + "cvtsd2ss {ret}, {a}", + a = in(xmm_reg) a, + ret = lateout(xmm_reg) ret, + options(nomem, nostack, pure), + ); + + ret + }; + + #[cfg(target_arch = "aarch64")] { + let ret: f32; + asm!( + "fcvt {ret:s}, {a:d}", + a = in(vreg) a, + ret = lateout(vreg) ret, + options(nomem, nostack, pure), + ); + + ret + }; + ], +} + +#[cfg(all(f16_enabled, f128_enabled))] +float_bench! { + name: trunc_f128_f16, + sig: (a: f128) -> f16, + crate_fn: trunc::__trunctfhf2, + crate_fn_ppc: trunc::__trunckfhf2, + sys_fn: __trunctfhf2, + sys_fn_ppc: __trunckfhf2, + sys_available: not(feature = "no-sys-f16-f128-convert"), + asm: [], +} + +#[cfg(f128_enabled)] +float_bench! { + name: trunc_f128_f32, + sig: (a: f128) -> f32, + crate_fn: trunc::__trunctfsf2, + crate_fn_ppc: trunc::__trunckfsf2, + sys_fn: __trunctfsf2, + sys_fn_ppc: __trunckfsf2, + sys_available: not(feature = "no-sys-f128"), + asm: [], +} + +#[cfg(f128_enabled)] +float_bench! { + name: trunc_f128_f64, + sig: (a: f128) -> f64, + crate_fn: trunc::__trunctfdf2, + crate_fn_ppc: trunc::__trunckfdf2, + sys_fn: __trunctfdf2, + sys_fn_ppc: __trunckfdf2, + sys_available: not(feature = "no-sys-f128"), + asm: [], +} + +pub fn float_trunc() { + let mut criterion = Criterion::default().configure_from_args(); + + // FIXME(#655): `f16` tests disabled until we can bootstrap symbols + #[cfg(f16_enabled)] + #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] + { + trunc_f32_f16(&mut criterion); + trunc_f64_f16(&mut criterion); + } + + trunc_f64_f32(&mut criterion); + + #[cfg(f128_enabled)] + { + // FIXME(#655): `f16` tests disabled until we can bootstrap symbols + #[cfg(f16_enabled)] + #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] + trunc_f128_f16(&mut criterion); + + trunc_f128_f32(&mut criterion); + trunc_f128_f64(&mut criterion); + } +} + +criterion_main!(float_trunc); diff --git a/library/compiler-builtins/builtins-test/benches/mem.rs b/library/compiler-builtins/builtins-test/benches/mem.rs new file mode 100644 index 0000000000000..3f83926b6c5a2 --- /dev/null +++ b/library/compiler-builtins/builtins-test/benches/mem.rs @@ -0,0 +1,364 @@ +#![feature(test)] + +extern crate test; +use test::{Bencher, black_box}; + +extern crate compiler_builtins; +use compiler_builtins::mem::{memcmp, memcpy, memmove, memset}; + +const WORD_SIZE: usize = core::mem::size_of::(); + +struct AlignedVec { + vec: Vec, + size: usize, +} + +impl AlignedVec { + fn new(fill: u8, size: usize) -> Self { + let mut broadcast = fill as usize; + let mut bits = 8; + while bits < WORD_SIZE * 8 { + broadcast |= broadcast << bits; + bits *= 2; + } + + let vec = vec![broadcast; (size + WORD_SIZE - 1) & !WORD_SIZE]; + AlignedVec { vec, size } + } +} + +impl core::ops::Deref for AlignedVec { + type Target = [u8]; + fn deref(&self) -> &[u8] { + unsafe { core::slice::from_raw_parts(self.vec.as_ptr() as *const u8, self.size) } + } +} + +impl core::ops::DerefMut for AlignedVec { + fn deref_mut(&mut self) -> &mut [u8] { + unsafe { core::slice::from_raw_parts_mut(self.vec.as_mut_ptr() as *mut u8, self.size) } + } +} + +fn memcpy_builtin(b: &mut Bencher, n: usize, offset1: usize, offset2: usize) { + let v1 = AlignedVec::new(1, n + offset1); + let mut v2 = AlignedVec::new(0, n + offset2); + b.bytes = n as u64; + b.iter(|| { + let src: &[u8] = black_box(&v1[offset1..]); + let dst: &mut [u8] = black_box(&mut v2[offset2..]); + dst.copy_from_slice(src); + }) +} + +fn memcpy_rust(b: &mut Bencher, n: usize, offset1: usize, offset2: usize) { + let v1 = AlignedVec::new(1, n + offset1); + let mut v2 = AlignedVec::new(0, n + offset2); + b.bytes = n as u64; + b.iter(|| { + let src: &[u8] = black_box(&v1[offset1..]); + let dst: &mut [u8] = black_box(&mut v2[offset2..]); + unsafe { memcpy(dst.as_mut_ptr(), src.as_ptr(), n) } + }) +} + +fn memset_builtin(b: &mut Bencher, n: usize, offset: usize) { + let mut v1 = AlignedVec::new(0, n + offset); + b.bytes = n as u64; + b.iter(|| { + let dst: &mut [u8] = black_box(&mut v1[offset..]); + let val: u8 = black_box(27); + for b in dst { + *b = val; + } + }) +} + +fn memset_rust(b: &mut Bencher, n: usize, offset: usize) { + let mut v1 = AlignedVec::new(0, n + offset); + b.bytes = n as u64; + b.iter(|| { + let dst: &mut [u8] = black_box(&mut v1[offset..]); + let val = black_box(27); + unsafe { memset(dst.as_mut_ptr(), val, n) } + }) +} + +fn memcmp_builtin(b: &mut Bencher, n: usize) { + let v1 = AlignedVec::new(0, n); + let mut v2 = AlignedVec::new(0, n); + v2[n - 1] = 1; + b.bytes = n as u64; + b.iter(|| { + let s1: &[u8] = black_box(&v1); + let s2: &[u8] = black_box(&v2); + s1.cmp(s2) + }) +} + +fn memcmp_builtin_unaligned(b: &mut Bencher, n: usize) { + let v1 = AlignedVec::new(0, n); + let mut v2 = AlignedVec::new(0, n); + v2[n - 1] = 1; + b.bytes = n as u64; + b.iter(|| { + let s1: &[u8] = black_box(&v1[0..]); + let s2: &[u8] = black_box(&v2[1..]); + s1.cmp(s2) + }) +} + +fn memcmp_rust(b: &mut Bencher, n: usize) { + let v1 = AlignedVec::new(0, n); + let mut v2 = AlignedVec::new(0, n); + v2[n - 1] = 1; + b.bytes = n as u64; + b.iter(|| { + let s1: &[u8] = black_box(&v1); + let s2: &[u8] = black_box(&v2); + unsafe { memcmp(s1.as_ptr(), s2.as_ptr(), n) } + }) +} + +fn memcmp_rust_unaligned(b: &mut Bencher, n: usize) { + let v1 = AlignedVec::new(0, n); + let mut v2 = AlignedVec::new(0, n); + v2[n - 1] = 1; + b.bytes = n as u64; + b.iter(|| { + let s1: &[u8] = black_box(&v1[0..]); + let s2: &[u8] = black_box(&v2[1..]); + unsafe { memcmp(s1.as_ptr(), s2.as_ptr(), n - 1) } + }) +} + +fn memmove_builtin(b: &mut Bencher, n: usize, offset: usize) { + let mut v = AlignedVec::new(0, n + n / 2 + offset); + b.bytes = n as u64; + b.iter(|| { + let s: &mut [u8] = black_box(&mut v); + s.copy_within(0..n, n / 2 + offset); + }) +} + +fn memmove_rust(b: &mut Bencher, n: usize, offset: usize) { + let mut v = AlignedVec::new(0, n + n / 2 + offset); + b.bytes = n as u64; + b.iter(|| { + let dst: *mut u8 = black_box(&mut v[n / 2 + offset..]).as_mut_ptr(); + let src: *const u8 = black_box(&v).as_ptr(); + unsafe { memmove(dst, src, n) }; + }) +} + +#[bench] +fn memcpy_builtin_4096(b: &mut Bencher) { + memcpy_builtin(b, 4096, 0, 0) +} +#[bench] +fn memcpy_rust_4096(b: &mut Bencher) { + memcpy_rust(b, 4096, 0, 0) +} +#[bench] +fn memcpy_builtin_1048576(b: &mut Bencher) { + memcpy_builtin(b, 1048576, 0, 0) +} +#[bench] +fn memcpy_rust_1048576(b: &mut Bencher) { + memcpy_rust(b, 1048576, 0, 0) +} +#[bench] +fn memcpy_builtin_4096_offset(b: &mut Bencher) { + memcpy_builtin(b, 4096, 65, 65) +} +#[bench] +fn memcpy_rust_4096_offset(b: &mut Bencher) { + memcpy_rust(b, 4096, 65, 65) +} +#[bench] +fn memcpy_builtin_1048576_offset(b: &mut Bencher) { + memcpy_builtin(b, 1048576, 65, 65) +} +#[bench] +fn memcpy_rust_1048576_offset(b: &mut Bencher) { + memcpy_rust(b, 1048576, 65, 65) +} +#[bench] +fn memcpy_builtin_4096_misalign(b: &mut Bencher) { + memcpy_builtin(b, 4096, 65, 66) +} +#[bench] +fn memcpy_rust_4096_misalign(b: &mut Bencher) { + memcpy_rust(b, 4096, 65, 66) +} +#[bench] +fn memcpy_builtin_1048576_misalign(b: &mut Bencher) { + memcpy_builtin(b, 1048576, 65, 66) +} +#[bench] +fn memcpy_rust_1048576_misalign(b: &mut Bencher) { + memcpy_rust(b, 1048576, 65, 66) +} + +#[bench] +fn memset_builtin_4096(b: &mut Bencher) { + memset_builtin(b, 4096, 0) +} +#[bench] +fn memset_rust_4096(b: &mut Bencher) { + memset_rust(b, 4096, 0) +} +#[bench] +fn memset_builtin_1048576(b: &mut Bencher) { + memset_builtin(b, 1048576, 0) +} +#[bench] +fn memset_rust_1048576(b: &mut Bencher) { + memset_rust(b, 1048576, 0) +} +#[bench] +fn memset_builtin_4096_offset(b: &mut Bencher) { + memset_builtin(b, 4096, 65) +} +#[bench] +fn memset_rust_4096_offset(b: &mut Bencher) { + memset_rust(b, 4096, 65) +} +#[bench] +fn memset_builtin_1048576_offset(b: &mut Bencher) { + memset_builtin(b, 1048576, 65) +} +#[bench] +fn memset_rust_1048576_offset(b: &mut Bencher) { + memset_rust(b, 1048576, 65) +} + +#[bench] +fn memcmp_builtin_8(b: &mut Bencher) { + memcmp_builtin(b, 8) +} +#[bench] +fn memcmp_rust_8(b: &mut Bencher) { + memcmp_rust(b, 8) +} +#[bench] +fn memcmp_builtin_16(b: &mut Bencher) { + memcmp_builtin(b, 16) +} +#[bench] +fn memcmp_rust_16(b: &mut Bencher) { + memcmp_rust(b, 16) +} +#[bench] +fn memcmp_builtin_32(b: &mut Bencher) { + memcmp_builtin(b, 32) +} +#[bench] +fn memcmp_rust_32(b: &mut Bencher) { + memcmp_rust(b, 32) +} +#[bench] +fn memcmp_builtin_64(b: &mut Bencher) { + memcmp_builtin(b, 64) +} +#[bench] +fn memcmp_rust_64(b: &mut Bencher) { + memcmp_rust(b, 64) +} +#[bench] +fn memcmp_builtin_4096(b: &mut Bencher) { + memcmp_builtin(b, 4096) +} +#[bench] +fn memcmp_rust_4096(b: &mut Bencher) { + memcmp_rust(b, 4096) +} +#[bench] +fn memcmp_builtin_1048576(b: &mut Bencher) { + memcmp_builtin(b, 1048576) +} +#[bench] +fn memcmp_rust_1048576(b: &mut Bencher) { + memcmp_rust(b, 1048576) +} +#[bench] +fn memcmp_builtin_unaligned_7(b: &mut Bencher) { + memcmp_builtin_unaligned(b, 8) +} +#[bench] +fn memcmp_rust_unaligned_7(b: &mut Bencher) { + memcmp_rust_unaligned(b, 8) +} +#[bench] +fn memcmp_builtin_unaligned_15(b: &mut Bencher) { + memcmp_builtin_unaligned(b, 16) +} +#[bench] +fn memcmp_rust_unaligned_15(b: &mut Bencher) { + memcmp_rust_unaligned(b, 16) +} +#[bench] +fn memcmp_builtin_unaligned_31(b: &mut Bencher) { + memcmp_builtin_unaligned(b, 32) +} +#[bench] +fn memcmp_rust_unaligned_31(b: &mut Bencher) { + memcmp_rust_unaligned(b, 32) +} +#[bench] +fn memcmp_builtin_unaligned_63(b: &mut Bencher) { + memcmp_builtin_unaligned(b, 64) +} +#[bench] +fn memcmp_rust_unaligned_63(b: &mut Bencher) { + memcmp_rust_unaligned(b, 64) +} +#[bench] +fn memcmp_builtin_unaligned_4095(b: &mut Bencher) { + memcmp_builtin_unaligned(b, 4096) +} +#[bench] +fn memcmp_rust_unaligned_4095(b: &mut Bencher) { + memcmp_rust_unaligned(b, 4096) +} +#[bench] +fn memcmp_builtin_unaligned_1048575(b: &mut Bencher) { + memcmp_builtin_unaligned(b, 1048576) +} +#[bench] +fn memcmp_rust_unaligned_1048575(b: &mut Bencher) { + memcmp_rust_unaligned(b, 1048576) +} + +#[bench] +fn memmove_builtin_4096(b: &mut Bencher) { + memmove_builtin(b, 4096, 0) +} +#[bench] +fn memmove_rust_4096(b: &mut Bencher) { + memmove_rust(b, 4096, 0) +} +#[bench] +fn memmove_builtin_1048576(b: &mut Bencher) { + memmove_builtin(b, 1048576, 0) +} +#[bench] +fn memmove_rust_1048576(b: &mut Bencher) { + memmove_rust(b, 1048576, 0) +} +#[bench] +fn memmove_builtin_4096_misalign(b: &mut Bencher) { + memmove_builtin(b, 4096, 1) +} +#[bench] +fn memmove_rust_4096_misalign(b: &mut Bencher) { + memmove_rust(b, 4096, 1) +} +#[bench] +fn memmove_builtin_1048576_misalign(b: &mut Bencher) { + memmove_builtin(b, 1048576, 1) +} +#[bench] +fn memmove_rust_1048576_misalign(b: &mut Bencher) { + memmove_rust(b, 1048576, 1) +} diff --git a/library/compiler-builtins/builtins-test/benches/mem_icount.rs b/library/compiler-builtins/builtins-test/benches/mem_icount.rs new file mode 100644 index 0000000000000..bd88cf80c7de2 --- /dev/null +++ b/library/compiler-builtins/builtins-test/benches/mem_icount.rs @@ -0,0 +1,500 @@ +//! Benchmarks that use Callgrind (via `iai_callgrind`) to report instruction count metrics. This +//! is stable enough to be tested in CI. + +use std::hint::black_box; +use std::{ops, slice}; + +use compiler_builtins::mem::{memcmp, memcpy, memmove, memset}; +use iai_callgrind::{library_benchmark, library_benchmark_group, main}; + +const PAGE_SIZE: usize = 0x1000; // 4 kiB +const MAX_ALIGN: usize = 512; // assume we may use avx512 operations one day +const MEG1: usize = 1 << 20; // 1 MiB + +#[derive(Clone)] +#[repr(C, align(0x1000))] +struct Page([u8; PAGE_SIZE]); + +/// A buffer that is page-aligned by default, with an optional offset to create a +/// misalignment. +struct AlignedSlice { + buf: Box<[Page]>, + len: usize, + offset: usize, +} + +impl AlignedSlice { + /// Allocate a slice aligned to ALIGN with at least `len` items, with `offset` from + /// page alignment. + fn new_zeroed(len: usize, offset: usize) -> Self { + assert!(offset < PAGE_SIZE); + let total_len = len + offset; + let items = (total_len / PAGE_SIZE) + if total_len % PAGE_SIZE > 0 { 1 } else { 0 }; + let buf = vec![Page([0u8; PAGE_SIZE]); items].into_boxed_slice(); + AlignedSlice { buf, len, offset } + } +} + +impl ops::Deref for AlignedSlice { + type Target = [u8]; + fn deref(&self) -> &Self::Target { + unsafe { slice::from_raw_parts(self.buf.as_ptr().cast::().add(self.offset), self.len) } + } +} + +impl ops::DerefMut for AlignedSlice { + fn deref_mut(&mut self) -> &mut Self::Target { + unsafe { + slice::from_raw_parts_mut( + self.buf.as_mut_ptr().cast::().add(self.offset), + self.len, + ) + } + } +} + +mod mcpy { + use super::*; + + struct Cfg { + len: usize, + s_off: usize, + d_off: usize, + } + + fn setup(cfg: Cfg) -> (usize, AlignedSlice, AlignedSlice) { + let Cfg { len, s_off, d_off } = cfg; + println!("bytes: {len} bytes, src offset: {s_off}, dst offset: {d_off}"); + let mut src = AlignedSlice::new_zeroed(len, s_off); + let dst = AlignedSlice::new_zeroed(len, d_off); + src.fill(1); + (len, src, dst) + } + + #[library_benchmark] + #[benches::aligned( + // Both aligned + args = [ + Cfg { len: 16, s_off: 0, d_off: 0 }, + Cfg { len: 32, s_off: 0, d_off: 0 }, + Cfg { len: 64, s_off: 0, d_off: 0 }, + Cfg { len: 512, s_off: 0, d_off: 0 }, + Cfg { len: 4096, s_off: 0, d_off: 0 }, + Cfg { len: MEG1, s_off: 0, d_off: 0 }, + ], + setup = setup, + )] + #[benches::offset( + // Both at the same offset + args = [ + Cfg { len: 16, s_off: 65, d_off: 65 }, + Cfg { len: 32, s_off: 65, d_off: 65 }, + Cfg { len: 64, s_off: 65, d_off: 65 }, + Cfg { len: 512, s_off: 65, d_off: 65 }, + Cfg { len: 4096, s_off: 65, d_off: 65 }, + Cfg { len: MEG1, s_off: 65, d_off: 65 }, + ], + setup = setup, + )] + #[benches::misaligned( + // `src` and `dst` both misaligned by different amounts + args = [ + Cfg { len: 16, s_off: 65, d_off: 66 }, + Cfg { len: 32, s_off: 65, d_off: 66 }, + Cfg { len: 64, s_off: 65, d_off: 66 }, + Cfg { len: 512, s_off: 65, d_off: 66 }, + Cfg { len: 4096, s_off: 65, d_off: 66 }, + Cfg { len: MEG1, s_off: 65, d_off: 66 }, + ], + setup = setup, + )] + fn bench((len, mut dst, src): (usize, AlignedSlice, AlignedSlice)) { + unsafe { + black_box(memcpy( + black_box(dst.as_mut_ptr()), + black_box(src.as_ptr()), + black_box(len), + )); + } + } + + library_benchmark_group!(name = memcpy; benchmarks = bench); +} + +mod mset { + use super::*; + + struct Cfg { + len: usize, + offset: usize, + } + + fn setup(Cfg { len, offset }: Cfg) -> (usize, AlignedSlice) { + println!("bytes: {len}, offset: {offset}"); + (len, AlignedSlice::new_zeroed(len, offset)) + } + + #[library_benchmark] + #[benches::aligned( + args = [ + Cfg { len: 16, offset: 0 }, + Cfg { len: 32, offset: 0 }, + Cfg { len: 64, offset: 0 }, + Cfg { len: 512, offset: 0 }, + Cfg { len: 4096, offset: 0 }, + Cfg { len: MEG1, offset: 0 }, + ], + setup = setup, + )] + #[benches::offset( + args = [ + Cfg { len: 16, offset: 65 }, + Cfg { len: 32, offset: 65 }, + Cfg { len: 64, offset: 65 }, + Cfg { len: 512, offset: 65 }, + Cfg { len: 4096, offset: 65 }, + Cfg { len: MEG1, offset: 65 }, + ], + setup = setup, + )] + fn bench((len, mut dst): (usize, AlignedSlice)) { + unsafe { + black_box(memset( + black_box(dst.as_mut_ptr()), + black_box(27), + black_box(len), + )); + } + } + + library_benchmark_group!(name = memset; benchmarks = bench); +} + +mod mcmp { + use super::*; + + struct Cfg { + len: usize, + s_off: usize, + d_off: usize, + } + + fn setup(cfg: Cfg) -> (usize, AlignedSlice, AlignedSlice) { + let Cfg { len, s_off, d_off } = cfg; + println!("bytes: {len}, src offset: {s_off}, dst offset: {d_off}"); + let b1 = AlignedSlice::new_zeroed(len, s_off); + let mut b2 = AlignedSlice::new_zeroed(len, d_off); + b2[len - 1] = 1; + (len, b1, b2) + } + + #[library_benchmark] + #[benches::aligned( + // Both aligned + args = [ + Cfg { len: 16, s_off: 0, d_off: 0 }, + Cfg { len: 32, s_off: 0, d_off: 0 }, + Cfg { len: 64, s_off: 0, d_off: 0 }, + Cfg { len: 512, s_off: 0, d_off: 0 }, + Cfg { len: 4096, s_off: 0, d_off: 0 }, + Cfg { len: MEG1, s_off: 0, d_off: 0 }, + ], + setup = setup + )] + #[benches::offset( + // Both at the same offset + args = [ + Cfg { len: 16, s_off: 65, d_off: 65 }, + Cfg { len: 32, s_off: 65, d_off: 65 }, + Cfg { len: 64, s_off: 65, d_off: 65 }, + Cfg { len: 512, s_off: 65, d_off: 65 }, + Cfg { len: 4096, s_off: 65, d_off: 65 }, + Cfg { len: MEG1, s_off: 65, d_off: 65 }, + ], + setup = setup + )] + #[benches::misaligned( + // `src` and `dst` both misaligned by different amounts + args = [ + Cfg { len: 16, s_off: 65, d_off: 66 }, + Cfg { len: 32, s_off: 65, d_off: 66 }, + Cfg { len: 64, s_off: 65, d_off: 66 }, + Cfg { len: 512, s_off: 65, d_off: 66 }, + Cfg { len: 4096, s_off: 65, d_off: 66 }, + Cfg { len: MEG1, s_off: 65, d_off: 66 }, + ], + setup = setup + )] + fn bench((len, mut dst, src): (usize, AlignedSlice, AlignedSlice)) { + unsafe { + black_box(memcmp( + black_box(dst.as_mut_ptr()), + black_box(src.as_ptr()), + black_box(len), + )); + } + } + + library_benchmark_group!(name = memcmp; benchmarks = bench); +} + +mod mmove { + use Spread::{Aligned, Large, Medium, Small}; + + use super::*; + + struct Cfg { + len: usize, + spread: Spread, + off: usize, + } + + enum Spread { + /// `src` and `dst` are close and have the same alignment (or offset). + Aligned, + /// `src` and `dst` are close. + Small, + /// `src` and `dst` are halfway offset in the buffer. + Medium, + /// `src` and `dst` only overlap by a single byte. + Large, + } + + // Note that small and large are + fn calculate_spread(len: usize, spread: Spread) -> usize { + match spread { + // Note that this test doesn't make sense for lengths less than len=128 + Aligned => { + assert!(len > MAX_ALIGN, "aligned memset would have no overlap"); + MAX_ALIGN + } + Small => 1, + Medium => (len / 2) + 1, // add 1 so all are misaligned + Large => len - 1, + } + } + + fn setup_forward(cfg: Cfg) -> (usize, usize, AlignedSlice) { + let Cfg { len, spread, off } = cfg; + let spread = calculate_spread(len, spread); + println!("bytes: {len}, spread: {spread}, offset: {off}, forward"); + assert!(spread < len, "memmove tests should have some overlap"); + let mut buf = AlignedSlice::new_zeroed(len + spread, off); + let mut fill: usize = 0; + buf[..len].fill_with(|| { + fill += 1; + fill as u8 + }); + (len, spread, buf) + } + + fn setup_backward(cfg: Cfg) -> (usize, usize, AlignedSlice) { + let Cfg { len, spread, off } = cfg; + let spread = calculate_spread(len, spread); + println!("bytes: {len}, spread: {spread}, offset: {off}, backward"); + assert!(spread < len, "memmove tests should have some overlap"); + let mut buf = AlignedSlice::new_zeroed(len + spread, off); + let mut fill: usize = 0; + buf[spread..].fill_with(|| { + fill += 1; + fill as u8 + }); + (len, spread, buf) + } + + #[library_benchmark] + #[benches::aligned( + args = [ + // Don't test small spreads since there is no overlap + Cfg { len: 4096, spread: Aligned, off: 0 }, + Cfg { len: MEG1, spread: Aligned, off: 0 }, + ], + setup = setup_forward + )] + #[benches::small_spread( + args = [ + Cfg { len: 16, spread: Small, off: 0 }, + Cfg { len: 32, spread: Small, off: 0 }, + Cfg { len: 64, spread: Small, off: 0 }, + Cfg { len: 512, spread: Small, off: 0 }, + Cfg { len: 4096, spread: Small, off: 0 }, + Cfg { len: MEG1, spread: Small, off: 0 }, + ], + setup = setup_forward + )] + #[benches::medium_spread( + args = [ + Cfg { len: 16, spread: Medium, off: 0 }, + Cfg { len: 32, spread: Medium, off: 0 }, + Cfg { len: 64, spread: Medium, off: 0 }, + Cfg { len: 512, spread: Medium, off: 0 }, + Cfg { len: 4096, spread: Medium, off: 0 }, + Cfg { len: MEG1, spread: Medium, off: 0 }, + ], + setup = setup_forward + )] + #[benches::large_spread( + args = [ + Cfg { len: 16, spread: Large, off: 0 }, + Cfg { len: 32, spread: Large, off: 0 }, + Cfg { len: 64, spread: Large, off: 0 }, + Cfg { len: 512, spread: Large, off: 0 }, + Cfg { len: 4096, spread: Large, off: 0 }, + Cfg { len: MEG1, spread: Large, off: 0 }, + ], + setup = setup_forward + )] + #[benches::aligned_off( + args = [ + Cfg { len: 4096, spread: Aligned, off: 65 }, + Cfg { len: MEG1, spread: Aligned, off: 65 }, + ], + setup = setup_forward + )] + #[benches::small_spread_off( + args = [ + Cfg { len: 16, spread: Small, off: 65 }, + Cfg { len: 32, spread: Small, off: 65 }, + Cfg { len: 64, spread: Small, off: 65 }, + Cfg { len: 512, spread: Small, off: 65 }, + Cfg { len: 4096, spread: Small, off: 65 }, + Cfg { len: MEG1, spread: Small, off: 65 }, + ], + setup = setup_forward + )] + #[benches::medium_spread_off( + args = [ + Cfg { len: 16, spread: Medium, off: 65 }, + Cfg { len: 32, spread: Medium, off: 65 }, + Cfg { len: 64, spread: Medium, off: 65 }, + Cfg { len: 512, spread: Medium, off: 65 }, + Cfg { len: 4096, spread: Medium, off: 65 }, + Cfg { len: MEG1, spread: Medium, off: 65 }, + ], + setup = setup_forward + )] + #[benches::large_spread_off( + args = [ + Cfg { len: 16, spread: Large, off: 65 }, + Cfg { len: 32, spread: Large, off: 65 }, + Cfg { len: 64, spread: Large, off: 65 }, + Cfg { len: 512, spread: Large, off: 65 }, + Cfg { len: 4096, spread: Large, off: 65 }, + Cfg { len: MEG1, spread: Large, off: 65 }, + ], + setup = setup_forward + )] + fn forward((len, spread, mut buf): (usize, usize, AlignedSlice)) { + // Test moving from the start of the buffer toward the end + unsafe { + black_box(memmove( + black_box(buf[spread..].as_mut_ptr()), + black_box(buf.as_ptr()), + black_box(len), + )); + } + } + + #[library_benchmark] + #[benches::aligned( + args = [ + // Don't test small spreads since there is no overlap + Cfg { len: 4096, spread: Aligned, off: 0 }, + Cfg { len: MEG1, spread: Aligned, off: 0 }, + ], + setup = setup_backward + )] + #[benches::small_spread( + args = [ + Cfg { len: 16, spread: Small, off: 0 }, + Cfg { len: 32, spread: Small, off: 0 }, + Cfg { len: 64, spread: Small, off: 0 }, + Cfg { len: 512, spread: Small, off: 0 }, + Cfg { len: 4096, spread: Small, off: 0 }, + Cfg { len: MEG1, spread: Small, off: 0 }, + ], + setup = setup_backward + )] + #[benches::medium_spread( + args = [ + Cfg { len: 16, spread: Medium, off: 0 }, + Cfg { len: 32, spread: Medium, off: 0 }, + Cfg { len: 64, spread: Medium, off: 0 }, + Cfg { len: 512, spread: Medium, off: 0 }, + Cfg { len: 4096, spread: Medium, off: 0 }, + Cfg { len: MEG1, spread: Medium, off: 0 }, + ], + setup = setup_backward + )] + #[benches::large_spread( + args = [ + Cfg { len: 16, spread: Large, off: 0 }, + Cfg { len: 32, spread: Large, off: 0 }, + Cfg { len: 64, spread: Large, off: 0 }, + Cfg { len: 512, spread: Large, off: 0 }, + Cfg { len: 4096, spread: Large, off: 0 }, + Cfg { len: MEG1, spread: Large, off: 0 }, + ], + setup = setup_backward + )] + #[benches::aligned_off( + args = [ + // Don't test small spreads since there is no overlap + Cfg { len: 4096, spread: Aligned, off: 65 }, + Cfg { len: MEG1, spread: Aligned, off: 65 }, + ], + setup = setup_backward + )] + #[benches::small_spread_off( + args = [ + Cfg { len: 16, spread: Small, off: 65 }, + Cfg { len: 32, spread: Small, off: 65 }, + Cfg { len: 64, spread: Small, off: 65 }, + Cfg { len: 512, spread: Small, off: 65 }, + Cfg { len: 4096, spread: Small, off: 65 }, + Cfg { len: MEG1, spread: Small, off: 65 }, + ], + setup = setup_backward + )] + #[benches::medium_spread_off( + args = [ + Cfg { len: 16, spread: Medium, off: 65 }, + Cfg { len: 32, spread: Medium, off: 65 }, + Cfg { len: 64, spread: Medium, off: 65 }, + Cfg { len: 512, spread: Medium, off: 65 }, + Cfg { len: 4096, spread: Medium, off: 65 }, + Cfg { len: MEG1, spread: Medium, off: 65 }, + ], + setup = setup_backward + )] + #[benches::large_spread_off( + args = [ + Cfg { len: 16, spread: Large, off: 65 }, + Cfg { len: 32, spread: Large, off: 65 }, + Cfg { len: 64, spread: Large, off: 65 }, + Cfg { len: 512, spread: Large, off: 65 }, + Cfg { len: 4096, spread: Large, off: 65 }, + Cfg { len: MEG1, spread: Large, off: 65 }, + ], + setup = setup_backward + )] + fn backward((len, spread, mut buf): (usize, usize, AlignedSlice)) { + // Test moving from the end of the buffer toward the start + unsafe { + black_box(memmove( + black_box(buf.as_mut_ptr()), + black_box(buf[spread..].as_ptr()), + black_box(len), + )); + } + } + + library_benchmark_group!(name = memmove; benchmarks = forward, backward); +} + +use mcmp::memcmp; +use mcpy::memcpy; +use mmove::memmove; +use mset::memset; + +main!(library_benchmark_groups = memcpy, memset, memcmp, memmove); diff --git a/library/compiler-builtins/builtins-test/build.rs b/library/compiler-builtins/builtins-test/build.rs new file mode 100644 index 0000000000000..e8f4eb4dd22eb --- /dev/null +++ b/library/compiler-builtins/builtins-test/build.rs @@ -0,0 +1,120 @@ +use std::collections::HashSet; + +mod builtins_configure { + include!("../compiler-builtins/configure.rs"); +} + +/// Features to enable +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] +enum Feature { + NoSysF128, + NoSysF128IntConvert, + NoSysF16, + NoSysF16F64Convert, + NoSysF16F128Convert, +} + +impl Feature { + fn implies(self) -> &'static [Self] { + match self { + Self::NoSysF128 => [Self::NoSysF128IntConvert, Self::NoSysF16F128Convert].as_slice(), + Self::NoSysF128IntConvert => [].as_slice(), + Self::NoSysF16 => [Self::NoSysF16F64Convert, Self::NoSysF16F128Convert].as_slice(), + Self::NoSysF16F64Convert => [].as_slice(), + Self::NoSysF16F128Convert => [].as_slice(), + } + } +} + +fn main() { + println!("cargo::rerun-if-changed=../configure.rs"); + + let target = builtins_configure::Target::from_env(); + let mut features = HashSet::new(); + + // These platforms do not have f128 symbols available in their system libraries, so + // skip related tests. + if target.arch == "arm" + || target.vendor == "apple" + || target.env == "msvc" + // GCC and LLVM disagree on the ABI of `f16` and `f128` with MinGW. See + // . + || (target.os == "windows" && target.env == "gnu") + // FIXME(llvm): There is an ABI incompatibility between GCC and Clang on 32-bit x86. + // See . + || target.arch == "x86" + // 32-bit PowerPC and 64-bit LE gets code generated that Qemu cannot handle. See + // . + || target.arch == "powerpc" + || target.arch == "powerpc64le" + // FIXME: We get different results from the builtin functions. See + // . + || target.arch == "powerpc64" + { + features.insert(Feature::NoSysF128); + } + + if target.arch == "x86" { + // 32-bit x86 does not have `__fixunstfti`/`__fixtfti` but does have everything else + features.insert(Feature::NoSysF128IntConvert); + // FIXME: 32-bit x86 has a bug in `f128 -> f16` system libraries + features.insert(Feature::NoSysF16F128Convert); + } + + // These platforms do not have f16 symbols available in their system libraries, so + // skip related tests. Most of these are missing `f16 <-> f32` conversion routines. + if (target.arch == "aarch64" && target.os == "linux") + || target.arch.starts_with("arm") + || target.arch == "powerpc" + || target.arch == "powerpc64" + || target.arch == "powerpc64le" + || target.arch == "loongarch64" + || (target.arch == "x86" && !target.has_feature("sse")) + || target.os == "windows" + // Linking says "error: function signature mismatch: __extendhfsf2" and seems to + // think the signature is either `(i32) -> f32` or `(f32) -> f32`. See + // . + || target.arch == "wasm32" + || target.arch == "wasm64" + { + features.insert(Feature::NoSysF16); + } + + // These platforms are missing either `__extendhfdf2` or `__truncdfhf2`. + if target.vendor == "apple" || target.os == "windows" { + features.insert(Feature::NoSysF16F64Convert); + } + + // Add implied features. Collection is required for borrows. + features.extend( + features + .iter() + .flat_map(|x| x.implies()) + .copied() + .collect::>(), + ); + + for feature in features { + let (name, warning) = match feature { + Feature::NoSysF128 => ("no-sys-f128", "using apfloat fallback for f128"), + Feature::NoSysF128IntConvert => ( + "no-sys-f128-int-convert", + "using apfloat fallback for f128 <-> int conversions", + ), + Feature::NoSysF16F64Convert => ( + "no-sys-f16-f64-convert", + "using apfloat fallback for f16 <-> f64 conversions", + ), + Feature::NoSysF16F128Convert => ( + "no-sys-f16-f128-convert", + "using apfloat fallback for f16 <-> f128 conversions", + ), + Feature::NoSysF16 => ("no-sys-f16", "using apfloat fallback for f16"), + }; + println!("cargo:warning={warning}"); + println!("cargo:rustc-cfg=feature=\"{name}\""); + } + + builtins_configure::configure_aliases(&target); + builtins_configure::configure_f16_f128(&target); +} diff --git a/library/compiler-builtins/builtins-test/src/bench.rs b/library/compiler-builtins/builtins-test/src/bench.rs new file mode 100644 index 0000000000000..2348f6bc97379 --- /dev/null +++ b/library/compiler-builtins/builtins-test/src/bench.rs @@ -0,0 +1,366 @@ +use alloc::vec::Vec; +use core::cell::RefCell; + +use compiler_builtins::float::Float; + +/// Fuzz with these many items to ensure equal functions +pub const CHECK_ITER_ITEMS: u32 = 10_000; +/// Benchmark with this many items to get a variety +pub const BENCH_ITER_ITEMS: u32 = 500; + +/// Still run benchmarks/tests but don't check correctness between compiler-builtins and +/// builtin system functions functions +pub fn skip_sys_checks(test_name: &str) -> bool { + const ALWAYS_SKIPPED: &[&str] = &[ + // FIXME(f16_f128): system symbols have incorrect results + // + "extend_f16_f32", + "trunc_f32_f16", + "trunc_f64_f16", + // FIXME(#616): re-enable once fix is in nightly + // + "mul_f32", + "mul_f64", + ]; + + // FIXME(f16_f128): error on LE ppc64. There are more tests that are cfg-ed out completely + // in their benchmark modules due to runtime panics. + // + const PPC64LE_SKIPPED: &[&str] = &["extend_f32_f128"]; + + // FIXME(f16_f128): system symbols have incorrect results + // + const X86_NO_SSE_SKIPPED: &[&str] = &[ + "add_f128", "sub_f128", "mul_f128", "div_f128", "powi_f32", "powi_f64", + ]; + + // FIXME(f16_f128): Wide multiply carry bug in `compiler-rt`, re-enable when nightly no longer + // uses `compiler-rt` version. + // + const AARCH64_SKIPPED: &[&str] = &["mul_f128", "div_f128"]; + + // FIXME(llvm): system symbols have incorrect results on Windows + // + const WINDOWS_SKIPPED: &[&str] = &[ + "conv_f32_u128", + "conv_f32_i128", + "conv_f64_u128", + "conv_f64_i128", + ]; + + if cfg!(target_arch = "arm") { + // The Arm symbols need a different ABI that our macro doesn't handle, just skip it + return true; + } + + if ALWAYS_SKIPPED.contains(&test_name) { + return true; + } + + if cfg!(all(target_arch = "powerpc64", target_endian = "little")) + && PPC64LE_SKIPPED.contains(&test_name) + { + return true; + } + + if cfg!(all(target_arch = "x86", not(target_feature = "sse"))) + && X86_NO_SSE_SKIPPED.contains(&test_name) + { + return true; + } + + if cfg!(target_arch = "aarch64") && AARCH64_SKIPPED.contains(&test_name) { + return true; + } + + if cfg!(target_family = "windows") && WINDOWS_SKIPPED.contains(&test_name) { + return true; + } + + false +} + +/// Still run benchmarks/tests but don't check correctness between compiler-builtins and +/// assembly functions +pub fn skip_asm_checks(_test_name: &str) -> bool { + // Nothing to skip at this time + false +} + +/// Create a comparison of the system symbol, compiler_builtins, and optionally handwritten +/// assembly. +/// +/// # Safety +/// +/// The signature must be correct and any assembly must be sound. +#[macro_export] +macro_rules! float_bench { + ( + // Name of this benchmark + name: $name:ident, + // The function signature to be tested + sig: ($($arg:ident: $arg_ty:ty),*) -> $ret_ty:ty, + // Path to the crate in compiler_builtins + crate_fn: $crate_fn:path, + // Optional alias on ppc + $( crate_fn_ppc: $crate_fn_ppc:path, )? + // Name of the system symbol + sys_fn: $sys_fn:ident, + // Optional alias on ppc + $( sys_fn_ppc: $sys_fn_ppc:path, )? + // Meta saying whether the system symbol is available + sys_available: $sys_available:meta, + // An optional function to validate the results of two functions are equal, if not + // just `$ret_ty::check_eq` + $( output_eq: $output_eq:expr, )? + // Assembly implementations, if any. + asm: [ + $( + #[cfg($asm_meta:meta)] { + $($asm_tt:tt)* + } + );* + $(;)? + ] + $(,)? + ) => {paste::paste! { + // SAFETY: macro invocation must use the correct signature + #[cfg($sys_available)] + unsafe extern "C" { + /// Binding for the system function + #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] + fn $sys_fn($($arg: $arg_ty),*) -> $ret_ty; + + + #[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))] + float_bench! { @coalesce_fn $($sys_fn_ppc)? => + fn $sys_fn($($arg: $arg_ty),*) -> $ret_ty; + } + } + + fn $name(c: &mut Criterion) { + use core::hint::black_box; + use compiler_builtins::float::Float; + use $crate::bench::TestIO; + + #[inline(never)] // equalize with external calls + fn crate_fn($($arg: $arg_ty),*) -> $ret_ty { + #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] + let target_crate_fn = $crate_fn; + + // On PPC, use an alias if specified + #[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))] + let target_crate_fn = float_bench!(@coalesce $($crate_fn_ppc)?, $crate_fn); + + target_crate_fn( $($arg),* ) + } + + #[inline(always)] // already a branch + #[cfg($sys_available)] + fn sys_fn($($arg: $arg_ty),*) -> $ret_ty { + #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] + let target_sys_fn = $sys_fn; + + // On PPC, use an alias if specified + #[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))] + let target_sys_fn = float_bench!(@coalesce $($sys_fn_ppc)?, $sys_fn); + + unsafe { target_sys_fn( $($arg),* ) } + } + + #[inline(never)] // equalize with external calls + #[cfg(any( $($asm_meta),* ))] + fn asm_fn($(mut $arg: $arg_ty),*) -> $ret_ty { + use core::arch::asm; + $( + #[cfg($asm_meta)] + unsafe { $($asm_tt)* } + )* + } + + let testvec = <($($arg_ty),*)>::make_testvec($crate::bench::CHECK_ITER_ITEMS); + let benchvec = <($($arg_ty),*)>::make_testvec($crate::bench::BENCH_ITER_ITEMS); + let test_name = stringify!($name); + let check_eq = float_bench!(@coalesce $($output_eq)?, $ret_ty::check_eq); + + // Verify math lines up. We run the crate functions even if we don't validate the + // output here to make sure there are no panics or crashes. + + #[cfg($sys_available)] + for ($($arg),*) in testvec.iter().copied() { + let crate_res = crate_fn($($arg),*); + let sys_res = sys_fn($($arg),*); + + if $crate::bench::skip_sys_checks(test_name) { + continue; + } + + assert!( + check_eq(crate_res, sys_res), + "{test_name}{:?}: crate: {crate_res:?}, sys: {sys_res:?}", + ($($arg),* ,) + ); + } + + #[cfg(any( $($asm_meta),* ))] + { + for ($($arg),*) in testvec.iter().copied() { + let crate_res = crate_fn($($arg),*); + let asm_res = asm_fn($($arg),*); + + if $crate::bench::skip_asm_checks(test_name) { + continue; + } + + assert!( + check_eq(crate_res, asm_res), + "{test_name}{:?}: crate: {crate_res:?}, asm: {asm_res:?}", + ($($arg),* ,) + ); + } + } + + let mut group = c.benchmark_group(test_name); + group.bench_function("compiler-builtins", |b| b.iter(|| { + for ($($arg),*) in benchvec.iter().copied() { + black_box(crate_fn( $(black_box($arg)),* )); + } + })); + + #[cfg($sys_available)] + group.bench_function("system", |b| b.iter(|| { + for ($($arg),*) in benchvec.iter().copied() { + black_box(sys_fn( $(black_box($arg)),* )); + } + })); + + #[cfg(any( $($asm_meta),* ))] + group.bench_function(&format!( + "assembly ({} {})", std::env::consts::ARCH, std::env::consts::FAMILY + ), |b| b.iter(|| { + for ($($arg),*) in benchvec.iter().copied() { + black_box(asm_fn( $(black_box($arg)),* )); + } + })); + + group.finish(); + } + }}; + + // Allow overriding a default + (@coalesce $specified:expr, $default:expr) => { $specified }; + (@coalesce, $default:expr) => { $default }; + + // Allow overriding a function name + (@coalesce_fn $specified:ident => fn $default_name:ident $($tt:tt)+) => { + fn $specified $($tt)+ + }; + (@coalesce_fn => fn $default_name:ident $($tt:tt)+) => { + fn $default_name $($tt)+ + }; +} + +/// A type used as either an input or output to/from a benchmark function. +pub trait TestIO: Sized { + fn make_testvec(len: u32) -> Vec; + fn check_eq(a: Self, b: Self) -> bool; +} + +macro_rules! impl_testio { + (float $($f_ty:ty),+) => {$( + impl TestIO for $f_ty { + fn make_testvec(len: u32) -> Vec { + // refcell because fuzz_* takes a `Fn` + let ret = RefCell::new(Vec::new()); + crate::fuzz_float(len, |a| ret.borrow_mut().push(a)); + ret.into_inner() + } + + fn check_eq(a: Self, b: Self) -> bool { + Float::eq_repr(a, b) + } + } + + impl TestIO for ($f_ty, $f_ty) { + fn make_testvec(len: u32) -> Vec { + // refcell because fuzz_* takes a `Fn` + let ret = RefCell::new(Vec::new()); + crate::fuzz_float_2(len, |a, b| ret.borrow_mut().push((a, b))); + ret.into_inner() + } + + fn check_eq(_a: Self, _b: Self) -> bool { + unimplemented!() + } + } + )*}; + + (int $($i_ty:ty),+) => {$( + impl TestIO for $i_ty { + fn make_testvec(len: u32) -> Vec { + // refcell because fuzz_* takes a `Fn` + let ret = RefCell::new(Vec::new()); + crate::fuzz(len, |a| ret.borrow_mut().push(a)); + ret.into_inner() + } + + fn check_eq(a: Self, b: Self) -> bool { + a == b + } + } + + impl TestIO for ($i_ty, $i_ty) { + fn make_testvec(len: u32) -> Vec { + // refcell because fuzz_* takes a `Fn` + let ret = RefCell::new(Vec::new()); + crate::fuzz_2(len, |a, b| ret.borrow_mut().push((a, b))); + ret.into_inner() + } + + fn check_eq(_a: Self, _b: Self) -> bool { + unimplemented!() + } + } + )*}; + + ((float, int) ($f_ty:ty, $i_ty:ty)) => { + impl TestIO for ($f_ty, $i_ty) { + fn make_testvec(len: u32) -> Vec { + // refcell because fuzz_* takes a `Fn` + let ivec = RefCell::new(Vec::new()); + let fvec = RefCell::new(Vec::new()); + + crate::fuzz(len.isqrt(), |a| ivec.borrow_mut().push(a)); + crate::fuzz_float(len.isqrt(), |a| fvec.borrow_mut().push(a)); + + let mut ret = Vec::new(); + let ivec = ivec.into_inner(); + let fvec = fvec.into_inner(); + + for f in fvec { + for i in &ivec { + ret.push((f, *i)); + } + } + + ret + } + + fn check_eq(_a: Self, _b: Self) -> bool { + unimplemented!() + } + } + } +} + +#[cfg(f16_enabled)] +impl_testio!(float f16); +impl_testio!(float f32, f64); +#[cfg(f128_enabled)] +impl_testio!(float f128); +impl_testio!(int i16, i32, i64, i128); +impl_testio!(int u16, u32, u64, u128); +impl_testio!((float, int)(f32, i32)); +impl_testio!((float, int)(f64, i32)); +#[cfg(f128_enabled)] +impl_testio!((float, int)(f128, i32)); diff --git a/library/compiler-builtins/builtins-test/src/lib.rs b/library/compiler-builtins/builtins-test/src/lib.rs new file mode 100644 index 0000000000000..c596ac2138076 --- /dev/null +++ b/library/compiler-builtins/builtins-test/src/lib.rs @@ -0,0 +1,337 @@ +//! This crate is for integration testing and fuzz testing of functions in `compiler-builtins`. This +//! includes publicly documented intrinsics and some internal alternative implementation functions +//! such as `usize_leading_zeros_riscv` (which are tested because they are configured for +//! architectures not tested by the CI). +//! +//! The general idea is to use a combination of edge case testing and randomized fuzz testing. The +//! edge case testing is crucial for checking cases like where both inputs are equal or equal to +//! special values such as `i128::MIN`, which is unlikely for the random fuzzer by itself to +//! encounter. The randomized fuzz testing is specially designed to cover wide swaths of search +//! space in as few iterations as possible. See `fuzz_values` in `builtins-test/tests/misc.rs` for +//! an example. +//! +//! Some floating point tests are disabled for specific architectures, because they do not have +//! correct rounding. +#![no_std] +#![cfg_attr(f128_enabled, feature(f128))] +#![cfg_attr(f16_enabled, feature(f16))] + +pub mod bench; +extern crate alloc; + +use compiler_builtins::float::Float; +use compiler_builtins::int::{Int, MinInt}; +use rand_xoshiro::Xoshiro128StarStar; +use rand_xoshiro::rand_core::{RngCore, SeedableRng}; + +/// Sets the number of fuzz iterations run for most tests. In practice, the vast majority of bugs +/// are caught by the edge case testers. Most of the remaining bugs triggered by more complex +/// sequences are caught well within 10_000 fuzz iterations. For classes of algorithms like division +/// that are vulnerable to rare edge cases, we want 1_000_000 iterations to be more confident. In +/// practical CI, however, we only want to run the more strenuous test once to catch algorithmic +/// level bugs, and run the 10_000 iteration test on most targets. Target-dependent bugs are likely +/// to involve miscompilation and misconfiguration that is likely to break algorithms in quickly +/// caught ways. We choose to configure `N = 1_000_000` iterations for `x86_64` targets (and if +/// debug assertions are disabled. Tests without `--release` would take too long) which are likely +/// to have fast hardware, and run `N = 10_000` for all other targets. +pub const N: u32 = if cfg!(target_arch = "x86_64") && !cfg!(debug_assertions) { + 1_000_000 +} else { + 10_000 +}; + +/// Random fuzzing step. When run several times, it results in excellent fuzzing entropy such as: +/// 11110101010101011110111110011111 +/// 10110101010100001011101011001010 +/// 1000000000000000 +/// 10000000000000110111110000001010 +/// 1111011111111101010101111110101 +/// 101111111110100000000101000000 +/// 10000000110100000000100010101 +/// 1010101010101000 +fn fuzz_step(rng: &mut Xoshiro128StarStar, x: &mut I) { + let ones = !I::ZERO; + let bit_indexing_mask: u32 = I::BITS - 1; + // It happens that all the RNG we need can come from one call. 7 bits are needed to index a + // worst case 128 bit integer, and there are 4 indexes that need to be made plus 4 bits for + // selecting operations + let rng32 = rng.next_u32(); + + // Randomly OR, AND, and XOR randomly sized and shifted continuous strings of + // ones with `lhs` and `rhs`. + let r0 = bit_indexing_mask & rng32; + let r1 = bit_indexing_mask & (rng32 >> 7); + let mask = ones.wrapping_shl(r0).rotate_left(r1); + match (rng32 >> 14) % 4 { + 0 => *x |= mask, + 1 => *x &= mask, + // both 2 and 3 to make XORs as common as ORs and ANDs combined + _ => *x ^= mask, + } + + // Alternating ones and zeros (e.x. 0b1010101010101010). This catches second-order + // problems that might occur for algorithms with two modes of operation (potentially + // there is some invariant that can be broken and maintained via alternating between modes, + // breaking the algorithm when it reaches the end). + let mut alt_ones = I::ONE; + for _ in 0..(I::BITS / 2) { + alt_ones <<= 2; + alt_ones |= I::ONE; + } + let r0 = bit_indexing_mask & (rng32 >> 16); + let r1 = bit_indexing_mask & (rng32 >> 23); + let mask = alt_ones.wrapping_shl(r0).rotate_left(r1); + match rng32 >> 30 { + 0 => *x |= mask, + 1 => *x &= mask, + _ => *x ^= mask, + } +} + +// We need macros like this, because `#![no_std]` prevents us from using iterators +macro_rules! edge_cases { + ($I:ident, $case:ident, $inner:block) => { + for i0 in 0..$I::FUZZ_NUM { + let mask_lo = (!$I::UnsignedInt::ZERO).wrapping_shr($I::FUZZ_LENGTHS[i0] as u32); + for i1 in i0..I::FUZZ_NUM { + let mask_hi = + (!$I::UnsignedInt::ZERO).wrapping_shl($I::FUZZ_LENGTHS[i1 - i0] as u32); + let $case = I::from_unsigned(mask_lo & mask_hi); + $inner + } + } + }; +} + +/// Feeds a series of fuzzing inputs to `f`. The fuzzer first uses an algorithm designed to find +/// edge cases, followed by a more random fuzzer that runs `n` times. +pub fn fuzz(n: u32, mut f: F) +where + ::UnsignedInt: Int, +{ + // edge case tester. Calls `f` 210 times for u128. + // zero gets skipped by the loop + f(I::ZERO); + edge_cases!(I, case, { + f(case); + }); + + // random fuzzer + let mut rng = Xoshiro128StarStar::seed_from_u64(0); + let mut x: I = MinInt::ZERO; + for _ in 0..n { + fuzz_step(&mut rng, &mut x); + f(x) + } +} + +/// The same as `fuzz`, except `f` has two inputs. +pub fn fuzz_2(n: u32, f: F) +where + ::UnsignedInt: Int, +{ + // Check cases where the first and second inputs are zero. Both call `f` 210 times for `u128`. + edge_cases!(I, case, { + f(I::ZERO, case); + }); + edge_cases!(I, case, { + f(case, I::ZERO); + }); + // Nested edge tester. Calls `f` 44100 times for `u128`. + edge_cases!(I, case0, { + edge_cases!(I, case1, { + f(case0, case1); + }) + }); + + // random fuzzer + let mut rng = Xoshiro128StarStar::seed_from_u64(0); + let mut x: I = I::ZERO; + let mut y: I = I::ZERO; + for _ in 0..n { + fuzz_step(&mut rng, &mut x); + fuzz_step(&mut rng, &mut y); + f(x, y) + } +} + +/// Tester for shift functions +pub fn fuzz_shift(f: F) { + // Shift functions are very simple and do not need anything other than shifting a small + // set of random patterns for every fuzz length. + let mut rng = Xoshiro128StarStar::seed_from_u64(0); + let mut x: I = MinInt::ZERO; + for i in 0..I::FUZZ_NUM { + fuzz_step(&mut rng, &mut x); + f(x, MinInt::ZERO); + f(x, I::FUZZ_LENGTHS[i] as u32); + } +} + +fn fuzz_float_step(rng: &mut Xoshiro128StarStar, f: &mut F) { + let rng32 = rng.next_u32(); + // we need to fuzz the different parts of the float separately, because the masking on larger + // significands will tend to set the exponent to all ones or all zeros frequently + + // sign bit fuzzing + let sign = (rng32 & 1) != 0; + + // exponent fuzzing. Only 4 bits for the selector needed. + let ones = (F::Int::ONE << F::EXP_BITS) - F::Int::ONE; + let r0 = (rng32 >> 1) % F::EXP_BITS; + let r1 = (rng32 >> 5) % F::EXP_BITS; + // custom rotate shift. Note that `F::Int` is unsigned, so we can shift right without smearing + // the sign bit. + let mask = if r1 == 0 { + ones.wrapping_shr(r0) + } else { + let tmp = ones.wrapping_shr(r0); + (tmp.wrapping_shl(r1) | tmp.wrapping_shr(F::EXP_BITS - r1)) & ones + }; + let mut exp = (f.to_bits() & F::EXP_MASK) >> F::SIG_BITS; + match (rng32 >> 9) % 4 { + 0 => exp |= mask, + 1 => exp &= mask, + _ => exp ^= mask, + } + + // significand fuzzing + let mut sig = f.to_bits() & F::SIG_MASK; + fuzz_step(rng, &mut sig); + sig &= F::SIG_MASK; + + *f = F::from_parts(sign, exp, sig); +} + +macro_rules! float_edge_cases { + ($F:ident, $case:ident, $inner:block) => { + for exponent in [ + F::Int::ZERO, + F::Int::ONE, + F::Int::ONE << (F::EXP_BITS / 2), + (F::Int::ONE << (F::EXP_BITS - 1)) - F::Int::ONE, + F::Int::ONE << (F::EXP_BITS - 1), + (F::Int::ONE << (F::EXP_BITS - 1)) + F::Int::ONE, + (F::Int::ONE << F::EXP_BITS) - F::Int::ONE, + ] + .iter() + { + for significand in [ + F::Int::ZERO, + F::Int::ONE, + F::Int::ONE << (F::SIG_BITS / 2), + (F::Int::ONE << (F::SIG_BITS - 1)) - F::Int::ONE, + F::Int::ONE << (F::SIG_BITS - 1), + (F::Int::ONE << (F::SIG_BITS - 1)) + F::Int::ONE, + (F::Int::ONE << F::SIG_BITS) - F::Int::ONE, + ] + .iter() + { + for sign in [false, true].iter() { + let $case = F::from_parts(*sign, *exponent, *significand); + $inner + } + } + } + }; +} + +pub fn fuzz_float(n: u32, f: E) { + float_edge_cases!(F, case, { + f(case); + }); + + // random fuzzer + let mut rng = Xoshiro128StarStar::seed_from_u64(0); + let mut x = F::ZERO; + for _ in 0..n { + fuzz_float_step(&mut rng, &mut x); + f(x); + } +} + +pub fn fuzz_float_2(n: u32, f: E) { + float_edge_cases!(F, case0, { + float_edge_cases!(F, case1, { + f(case0, case1); + }); + }); + + // random fuzzer + let mut rng = Xoshiro128StarStar::seed_from_u64(0); + let mut x = F::ZERO; + let mut y = F::ZERO; + for _ in 0..n { + fuzz_float_step(&mut rng, &mut x); + fuzz_float_step(&mut rng, &mut y); + f(x, y) + } +} + +/// Perform an operation using builtin types if available, falling back to apfloat if not. +#[macro_export] +macro_rules! apfloat_fallback { + ( + $float_ty:ty, + // Type name in `rustc_apfloat::ieee`. Not a full path, it automatically gets the prefix. + $apfloat_ty:ident, + // Cfg expression for when builtin system operations should be used + $sys_available:meta, + // The expression to run. This expression may use `FloatTy` for its signature. + // Optionally, the final conversion back to a float can be suppressed using + // `=> no_convert` (for e.g. operations that return a bool). + // + // If the apfloat needs a different operation, it can be provided here. + $op:expr $(=> $convert:ident)? $(; $apfloat_op:expr)?, + // Arguments that get passed to `$op` after converting to a float + $($arg:expr),+ + $(,)? + ) => {{ + #[cfg($sys_available)] + let ret = { + type FloatTy = $float_ty; + $op( $($arg),+ ) + }; + + #[cfg(not($sys_available))] + let ret = { + use rustc_apfloat::Float; + type FloatTy = rustc_apfloat::ieee::$apfloat_ty; + + apfloat_fallback!(@inner + fty: $float_ty, + // Apply a conversion to `FloatTy` to each arg, then pass all args to `$op` + op_res: $op( $(FloatTy::from_bits($arg.to_bits().into())),+ ), + $(apfloat_op: $apfloat_op, )? + $(conv_opts: $convert,)? + args: $($arg),+ + ) + }; + + ret + }}; + + // Operations that do not need converting back to a float + (@inner fty: $float_ty:ty, op_res: $val:expr, conv_opts: no_convert, args: $($_arg:expr),+) => { + $val + }; + + // Some apfloat operations return a `StatusAnd` that we need to extract the value from. This + // is the default. + (@inner fty: $float_ty:ty, op_res: $val:expr, args: $($_arg:expr),+) => {{ + // ignore the status, just get the value + let unwrapped = $val.value; + + <$float_ty>::from_bits(FloatTy::to_bits(unwrapped).try_into().unwrap()) + }}; + + // This is the case where we can't use the same expression for the default builtin and + // nonstandard apfloat fallback (e.g. `as` casts in std are normal functions in apfloat, so + // two separate expressions must be specified. + (@inner + fty: $float_ty:ty, op_res: $_val:expr, + apfloat_op: $apfloat_op:expr, args: $($arg:expr),+ + ) => {{ + $apfloat_op($($arg),+) + }}; +} diff --git a/library/compiler-builtins/builtins-test/tests/addsub.rs b/library/compiler-builtins/builtins-test/tests/addsub.rs new file mode 100644 index 0000000000000..865b9e472ab4c --- /dev/null +++ b/library/compiler-builtins/builtins-test/tests/addsub.rs @@ -0,0 +1,143 @@ +#![allow(unused_macros)] +#![cfg_attr(f128_enabled, feature(f128))] + +use builtins_test::*; + +mod int_addsub { + use super::*; + + macro_rules! sum { + ($($i:ty, $fn_add:ident, $fn_sub:ident);*;) => { + $( + #[test] + fn $fn_add() { + use compiler_builtins::int::addsub::{$fn_add, $fn_sub}; + + fuzz_2(N, |x: $i, y: $i| { + let add0 = x.wrapping_add(y); + let sub0 = x.wrapping_sub(y); + let add1: $i = $fn_add(x, y); + let sub1: $i = $fn_sub(x, y); + if add0 != add1 { + panic!( + "{}({}, {}): std: {}, builtins: {}", + stringify!($fn_add), x, y, add0, add1 + ); + } + if sub0 != sub1 { + panic!( + "{}({}, {}): std: {}, builtins: {}", + stringify!($fn_sub), x, y, sub0, sub1 + ); + } + }); + } + )* + }; + } + + macro_rules! overflowing_sum { + ($($i:ty, $fn_add:ident, $fn_sub:ident);*;) => { + $( + #[test] + fn $fn_add() { + use compiler_builtins::int::addsub::{$fn_add, $fn_sub}; + + fuzz_2(N, |x: $i, y: $i| { + let (add0, add_o0)= x.overflowing_add(y); + let (sub0, sub_o0)= x.overflowing_sub(y); + let mut add_o1 = 0; + let mut sub_o1 = 0; + let add1: $i = $fn_add(x, y, &mut add_o1); + let sub1: $i = $fn_sub(x, y, &mut sub_o1); + if add0 != add1 || i32::from(add_o0) != add_o1 { + panic!( + "{}({}, {}): std: {:?}, builtins: {:?}", + stringify!($fn_add), x, y, (add0, add_o0) , (add1, add_o1) + ); + } + if sub0 != sub1 || i32::from(sub_o0) != sub_o1 { + panic!( + "{}({}, {}): std: {:?}, builtins: {:?}", + stringify!($fn_sub), x, y, (sub0, sub_o0) , (sub1, sub_o1) + ); + } + }); + } + )* + }; + } + + // Integer addition and subtraction is very simple, so 100 fuzzing passes should be plenty. + sum! { + u128, __rust_u128_add, __rust_u128_sub; + i128, __rust_i128_add, __rust_i128_sub; + } + + overflowing_sum! { + u128, __rust_u128_addo, __rust_u128_subo; + i128, __rust_i128_addo, __rust_i128_subo; + } +} + +macro_rules! float_sum { + ($($f:ty, $fn_add:ident, $fn_sub:ident, $apfloat_ty:ident, $sys_available:meta);*;) => { + $( + #[test] + fn $fn_add() { + use core::ops::{Add, Sub}; + use compiler_builtins::float::{{add::$fn_add, sub::$fn_sub}, Float}; + + fuzz_float_2(N, |x: $f, y: $f| { + let add0 = apfloat_fallback!($f, $apfloat_ty, $sys_available, Add::add, x, y); + let sub0 = apfloat_fallback!($f, $apfloat_ty, $sys_available, Sub::sub, x, y); + let add1: $f = $fn_add(x, y); + let sub1: $f = $fn_sub(x, y); + if !Float::eq_repr(add0, add1) { + panic!( + "{}({:?}, {:?}): std: {:?}, builtins: {:?}", + stringify!($fn_add), x, y, add0, add1 + ); + } + if !Float::eq_repr(sub0, sub1) { + panic!( + "{}({:?}, {:?}): std: {:?}, builtins: {:?}", + stringify!($fn_sub), x, y, sub0, sub1 + ); + } + }); + } + )* + } +} + +#[cfg(not(all(target_arch = "x86", not(target_feature = "sse"))))] +mod float_addsub { + use super::*; + + float_sum! { + f32, __addsf3, __subsf3, Single, all(); + f64, __adddf3, __subdf3, Double, all(); + } +} + +#[cfg(f128_enabled)] +#[cfg(not(all(target_arch = "x86", not(target_feature = "sse"))))] +#[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] +mod float_addsub_f128 { + use super::*; + + float_sum! { + f128, __addtf3, __subtf3, Quad, not(feature = "no-sys-f128"); + } +} + +#[cfg(f128_enabled)] +#[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))] +mod float_addsub_f128_ppc { + use super::*; + + float_sum! { + f128, __addkf3, __subkf3, Quad, not(feature = "no-sys-f128"); + } +} diff --git a/library/compiler-builtins/builtins-test/tests/aeabi_memclr.rs b/library/compiler-builtins/builtins-test/tests/aeabi_memclr.rs new file mode 100644 index 0000000000000..bfd15a391aab2 --- /dev/null +++ b/library/compiler-builtins/builtins-test/tests/aeabi_memclr.rs @@ -0,0 +1,60 @@ +#![cfg(all( + target_arch = "arm", + not(any(target_env = "gnu", target_env = "musl")), + target_os = "linux", + feature = "mem" +))] +#![feature(compiler_builtins_lib)] +#![no_std] + +extern crate compiler_builtins; + +// test runner +extern crate utest_cortex_m_qemu; + +// overrides `panic!` +#[macro_use] +extern crate utest_macros; + +use core::mem; + +macro_rules! panic { + ($($tt:tt)*) => { + upanic!($($tt)*); + }; +} + +extern "C" { + fn __aeabi_memclr4(dest: *mut u8, n: usize); + fn __aeabi_memset4(dest: *mut u8, n: usize, c: u32); +} + +struct Aligned { + array: [u8; 8], + _alignment: [u32; 0], +} + +impl Aligned { + fn new() -> Self { + Aligned { + array: [0; 8], + _alignment: [], + } + } +} + +#[test] +fn memclr4() { + let mut aligned = Aligned::new(); + assert_eq!(mem::align_of_val(&aligned), 4); + let xs = &mut aligned.array; + + for n in 0..9 { + unsafe { + __aeabi_memset4(xs.as_mut_ptr(), n, 0xff); + __aeabi_memclr4(xs.as_mut_ptr(), n); + } + + assert!(xs[0..n].iter().all(|x| *x == 0)); + } +} diff --git a/library/compiler-builtins/builtins-test/tests/aeabi_memcpy.rs b/library/compiler-builtins/builtins-test/tests/aeabi_memcpy.rs new file mode 100644 index 0000000000000..c892c5aba0f74 --- /dev/null +++ b/library/compiler-builtins/builtins-test/tests/aeabi_memcpy.rs @@ -0,0 +1,71 @@ +#![cfg(all( + target_arch = "arm", + not(any(target_env = "gnu", target_env = "musl")), + target_os = "linux", + feature = "mem" +))] +#![feature(compiler_builtins_lib)] +#![no_std] + +extern crate compiler_builtins; + +// test runner +extern crate utest_cortex_m_qemu; + +// overrides `panic!` +#[macro_use] +extern crate utest_macros; + +macro_rules! panic { + ($($tt:tt)*) => { + upanic!($($tt)*); + }; +} + +extern "C" { + fn __aeabi_memcpy(dest: *mut u8, src: *const u8, n: usize); + fn __aeabi_memcpy4(dest: *mut u8, src: *const u8, n: usize); +} + +struct Aligned { + array: [u8; 8], + _alignment: [u32; 0], +} + +impl Aligned { + fn new(array: [u8; 8]) -> Self { + Aligned { + array: array, + _alignment: [], + } + } +} + +#[test] +fn memcpy() { + let mut dest = [0; 4]; + let src = [0xde, 0xad, 0xbe, 0xef]; + + for n in 0..dest.len() { + dest.copy_from_slice(&[0; 4]); + + unsafe { __aeabi_memcpy(dest.as_mut_ptr(), src.as_ptr(), n) } + + assert_eq!(&dest[0..n], &src[0..n]) + } +} + +#[test] +fn memcpy4() { + let mut aligned = Aligned::new([0; 8]); + let dest = &mut aligned.array; + let src = [0xde, 0xad, 0xbe, 0xef, 0xba, 0xad, 0xf0, 0x0d]; + + for n in 0..dest.len() { + dest.copy_from_slice(&[0; 8]); + + unsafe { __aeabi_memcpy4(dest.as_mut_ptr(), src.as_ptr(), n) } + + assert_eq!(&dest[0..n], &src[0..n]) + } +} diff --git a/library/compiler-builtins/builtins-test/tests/aeabi_memset.rs b/library/compiler-builtins/builtins-test/tests/aeabi_memset.rs new file mode 100644 index 0000000000000..34ab3acc78c16 --- /dev/null +++ b/library/compiler-builtins/builtins-test/tests/aeabi_memset.rs @@ -0,0 +1,240 @@ +#![cfg(all( + target_arch = "arm", + not(any(target_env = "gnu", target_env = "musl")), + target_os = "linux", + feature = "mem" +))] +#![feature(compiler_builtins_lib)] +#![no_std] + +extern crate compiler_builtins; + +// test runner +extern crate utest_cortex_m_qemu; + +// overrides `panic!` +#[macro_use] +extern crate utest_macros; + +use core::mem; + +macro_rules! panic { + ($($tt:tt)*) => { + upanic!($($tt)*); + }; +} + +extern "C" { + fn __aeabi_memset4(dest: *mut u8, n: usize, c: u32); +} + +struct Aligned { + array: [u8; 8], + _alignment: [u32; 0], +} + +impl Aligned { + fn new(array: [u8; 8]) -> Self { + Aligned { + array: array, + _alignment: [], + } + } +} + +#[test] +fn zero() { + let mut aligned = Aligned::new([0u8; 8]); + assert_eq!(mem::align_of_val(&aligned), 4); + let xs = &mut aligned.array; + let c = 0xdeadbeef; + + unsafe { __aeabi_memset4(xs.as_mut_ptr(), 0, c) } + + assert_eq!(*xs, [0; 8]); + + let mut aligned = Aligned::new([1u8; 8]); + assert_eq!(mem::align_of_val(&aligned), 4); + let xs = &mut aligned.array; + let c = 0xdeadbeef; + + unsafe { __aeabi_memset4(xs.as_mut_ptr(), 0, c) } + + assert_eq!(*xs, [1; 8]); +} + +#[test] +fn one() { + let mut aligned = Aligned::new([0u8; 8]); + assert_eq!(mem::align_of_val(&aligned), 4); + let xs = &mut aligned.array; + let n = 1; + let c = 0xdeadbeef; + + unsafe { __aeabi_memset4(xs.as_mut_ptr(), n, c) } + + assert_eq!(*xs, [0xef, 0, 0, 0, 0, 0, 0, 0]); + + let mut aligned = Aligned::new([1u8; 8]); + assert_eq!(mem::align_of_val(&aligned), 4); + let xs = &mut aligned.array; + let c = 0xdeadbeef; + + unsafe { __aeabi_memset4(xs.as_mut_ptr(), n, c) } + + assert_eq!(*xs, [0xef, 1, 1, 1, 1, 1, 1, 1]); +} + +#[test] +fn two() { + let mut aligned = Aligned::new([0u8; 8]); + assert_eq!(mem::align_of_val(&aligned), 4); + let xs = &mut aligned.array; + let n = 2; + let c = 0xdeadbeef; + + unsafe { __aeabi_memset4(xs.as_mut_ptr(), n, c) } + + assert_eq!(*xs, [0xef, 0xef, 0, 0, 0, 0, 0, 0]); + + let mut aligned = Aligned::new([1u8; 8]); + assert_eq!(mem::align_of_val(&aligned), 4); + let xs = &mut aligned.array; + let c = 0xdeadbeef; + + unsafe { __aeabi_memset4(xs.as_mut_ptr(), n, c) } + + assert_eq!(*xs, [0xef, 0xef, 1, 1, 1, 1, 1, 1]); +} + +#[test] +fn three() { + let mut aligned = Aligned::new([0u8; 8]); + assert_eq!(mem::align_of_val(&aligned), 4); + let xs = &mut aligned.array; + let n = 3; + let c = 0xdeadbeef; + + unsafe { __aeabi_memset4(xs.as_mut_ptr(), n, c) } + + assert_eq!(*xs, [0xef, 0xef, 0xef, 0, 0, 0, 0, 0]); + + let mut aligned = Aligned::new([1u8; 8]); + assert_eq!(mem::align_of_val(&aligned), 4); + let xs = &mut aligned.array; + let c = 0xdeadbeef; + + unsafe { __aeabi_memset4(xs.as_mut_ptr(), n, c) } + + assert_eq!(*xs, [0xef, 0xef, 0xef, 1, 1, 1, 1, 1]); +} + +#[test] +fn four() { + let mut aligned = Aligned::new([0u8; 8]); + assert_eq!(mem::align_of_val(&aligned), 4); + let xs = &mut aligned.array; + let n = 4; + let c = 0xdeadbeef; + + unsafe { __aeabi_memset4(xs.as_mut_ptr(), n, c) } + + assert_eq!(*xs, [0xef, 0xef, 0xef, 0xef, 0, 0, 0, 0]); + + let mut aligned = Aligned::new([1u8; 8]); + assert_eq!(mem::align_of_val(&aligned), 4); + let xs = &mut aligned.array; + let c = 0xdeadbeef; + + unsafe { __aeabi_memset4(xs.as_mut_ptr(), n, c) } + + assert_eq!(*xs, [0xef, 0xef, 0xef, 0xef, 1, 1, 1, 1]); +} + +#[test] +fn five() { + let mut aligned = Aligned::new([0u8; 8]); + assert_eq!(mem::align_of_val(&aligned), 4); + let xs = &mut aligned.array; + let n = 5; + let c = 0xdeadbeef; + + unsafe { __aeabi_memset4(xs.as_mut_ptr(), n, c) } + + assert_eq!(*xs, [0xef, 0xef, 0xef, 0xef, 0xef, 0, 0, 0]); + + let mut aligned = Aligned::new([1u8; 8]); + assert_eq!(mem::align_of_val(&aligned), 4); + let xs = &mut aligned.array; + let c = 0xdeadbeef; + + unsafe { __aeabi_memset4(xs.as_mut_ptr(), n, c) } + + assert_eq!(*xs, [0xef, 0xef, 0xef, 0xef, 0xef, 1, 1, 1]); +} + +#[test] +fn six() { + let mut aligned = Aligned::new([0u8; 8]); + assert_eq!(mem::align_of_val(&aligned), 4); + let xs = &mut aligned.array; + let n = 6; + let c = 0xdeadbeef; + + unsafe { __aeabi_memset4(xs.as_mut_ptr(), n, c) } + + assert_eq!(*xs, [0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0, 0]); + + let mut aligned = Aligned::new([1u8; 8]); + assert_eq!(mem::align_of_val(&aligned), 4); + let xs = &mut aligned.array; + let c = 0xdeadbeef; + + unsafe { __aeabi_memset4(xs.as_mut_ptr(), n, c) } + + assert_eq!(*xs, [0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 1, 1]); +} + +#[test] +fn seven() { + let mut aligned = Aligned::new([0u8; 8]); + assert_eq!(mem::align_of_val(&aligned), 4); + let xs = &mut aligned.array; + let n = 7; + let c = 0xdeadbeef; + + unsafe { __aeabi_memset4(xs.as_mut_ptr(), n, c) } + + assert_eq!(*xs, [0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0]); + + let mut aligned = Aligned::new([1u8; 8]); + assert_eq!(mem::align_of_val(&aligned), 4); + let xs = &mut aligned.array; + let c = 0xdeadbeef; + + unsafe { __aeabi_memset4(xs.as_mut_ptr(), n, c) } + + assert_eq!(*xs, [0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 1]); +} + +#[test] +fn eight() { + let mut aligned = Aligned::new([0u8; 8]); + assert_eq!(mem::align_of_val(&aligned), 4); + let xs = &mut aligned.array; + let n = 8; + let c = 0xdeadbeef; + + unsafe { __aeabi_memset4(xs.as_mut_ptr(), n, c) } + + assert_eq!(*xs, [0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef]); + + let mut aligned = Aligned::new([1u8; 8]); + assert_eq!(mem::align_of_val(&aligned), 4); + let xs = &mut aligned.array; + let c = 0xdeadbeef; + + unsafe { __aeabi_memset4(xs.as_mut_ptr(), n, c) } + + assert_eq!(*xs, [0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef]); +} diff --git a/library/compiler-builtins/builtins-test/tests/big.rs b/library/compiler-builtins/builtins-test/tests/big.rs new file mode 100644 index 0000000000000..d1ae88bd16485 --- /dev/null +++ b/library/compiler-builtins/builtins-test/tests/big.rs @@ -0,0 +1,134 @@ +use compiler_builtins::int::{HInt, MinInt, i256, u256}; + +const LOHI_SPLIT: u128 = 0xaaaaaaaaaaaaaaaaffffffffffffffff; + +/// Print a `u256` as hex since we can't add format implementations +fn hexu(v: u256) -> String { + format!( + "0x{:016x}{:016x}{:016x}{:016x}", + v.0[3], v.0[2], v.0[1], v.0[0] + ) +} + +#[test] +fn widen_u128() { + assert_eq!(u128::MAX.widen(), u256([u64::MAX, u64::MAX, 0, 0])); + assert_eq!( + LOHI_SPLIT.widen(), + u256([u64::MAX, 0xaaaaaaaaaaaaaaaa, 0, 0]) + ); +} + +#[test] +fn widen_i128() { + assert_eq!((-1i128).widen(), u256::MAX.signed()); + assert_eq!( + (LOHI_SPLIT as i128).widen(), + i256([u64::MAX, 0xaaaaaaaaaaaaaaaa, u64::MAX, u64::MAX]) + ); + assert_eq!((-1i128).zero_widen().unsigned(), (u128::MAX).widen()); +} + +#[test] +fn widen_mul_u128() { + let tests = [ + (u128::MAX / 2, 2_u128, u256([u64::MAX - 1, u64::MAX, 0, 0])), + (u128::MAX, 2_u128, u256([u64::MAX - 1, u64::MAX, 1, 0])), + (u128::MAX, u128::MAX, u256([1, 0, u64::MAX - 1, u64::MAX])), + (u128::MIN, u128::MIN, u256::ZERO), + (1234, 0, u256::ZERO), + (0, 1234, u256::ZERO), + ]; + + let mut errors = Vec::new(); + for (i, (a, b, exp)) in tests.iter().copied().enumerate() { + let res = a.widen_mul(b); + let res_z = a.zero_widen_mul(b); + assert_eq!(res, res_z); + if res != exp { + errors.push((i, a, b, exp, res)); + } + } + + for (i, a, b, exp, res) in &errors { + eprintln!( + "FAILURE ({i}): {a:#034x} * {b:#034x} = {} got {}", + hexu(*exp), + hexu(*res) + ); + } + assert!(errors.is_empty()); +} + +#[test] +fn not_u128() { + assert_eq!(!u256::ZERO, u256::MAX); +} + +#[test] +fn shr_u128() { + let only_low = [ + 1, + u16::MAX.into(), + u32::MAX.into(), + u64::MAX.into(), + u128::MAX, + ]; + + let mut errors = Vec::new(); + + for a in only_low { + for perturb in 0..10 { + let a = a.saturating_add(perturb); + for shift in 0..128 { + let res = a.widen() >> shift; + let expected = (a >> shift).widen(); + if res != expected { + errors.push((a.widen(), shift, res, expected)); + } + } + } + } + + let check = [ + ( + u256::MAX, + 1, + u256([u64::MAX, u64::MAX, u64::MAX, u64::MAX >> 1]), + ), + ( + u256::MAX, + 5, + u256([u64::MAX, u64::MAX, u64::MAX, u64::MAX >> 5]), + ), + (u256::MAX, 63, u256([u64::MAX, u64::MAX, u64::MAX, 1])), + (u256::MAX, 64, u256([u64::MAX, u64::MAX, u64::MAX, 0])), + (u256::MAX, 65, u256([u64::MAX, u64::MAX, u64::MAX >> 1, 0])), + (u256::MAX, 127, u256([u64::MAX, u64::MAX, 1, 0])), + (u256::MAX, 128, u256([u64::MAX, u64::MAX, 0, 0])), + (u256::MAX, 129, u256([u64::MAX, u64::MAX >> 1, 0, 0])), + (u256::MAX, 191, u256([u64::MAX, 1, 0, 0])), + (u256::MAX, 192, u256([u64::MAX, 0, 0, 0])), + (u256::MAX, 193, u256([u64::MAX >> 1, 0, 0, 0])), + (u256::MAX, 191, u256([u64::MAX, 1, 0, 0])), + (u256::MAX, 254, u256([0b11, 0, 0, 0])), + (u256::MAX, 255, u256([1, 0, 0, 0])), + ]; + + for (input, shift, expected) in check { + let res = input >> shift; + if res != expected { + errors.push((input, shift, res, expected)); + } + } + + for (a, b, res, expected) in &errors { + eprintln!( + "FAILURE: {} >> {b} = {} got {}", + hexu(*a), + hexu(*expected), + hexu(*res), + ); + } + assert!(errors.is_empty()); +} diff --git a/library/compiler-builtins/builtins-test/tests/cmp.rs b/library/compiler-builtins/builtins-test/tests/cmp.rs new file mode 100644 index 0000000000000..a904dc5f7de4d --- /dev/null +++ b/library/compiler-builtins/builtins-test/tests/cmp.rs @@ -0,0 +1,184 @@ +#![allow(unused_macros)] +#![allow(unreachable_code)] +#![cfg_attr(f128_enabled, feature(f128))] + +use builtins_test::*; + +mod float_comparisons { + use super::*; + + macro_rules! cmp { + ( + $f:ty, $x:ident, $y:ident, $apfloat_ty:ident, $sys_available:meta, + $($unordered_val:expr, $fn:ident);*; + ) => { + $( + let cmp0 = if apfloat_fallback!( + $f, $apfloat_ty, $sys_available, + |x: FloatTy| x.is_nan() => no_convert, + $x + ) || apfloat_fallback!( + $f, $apfloat_ty, $sys_available, + |y: FloatTy| y.is_nan() => no_convert, + $y + ) + { + $unordered_val + } else if apfloat_fallback!( + $f, $apfloat_ty, $sys_available, + |x, y| x < y => no_convert, + $x, $y + ) { + -1 + } else if apfloat_fallback!( + $f, $apfloat_ty, $sys_available, + |x, y| x == y => no_convert, + $x, $y + ) { + 0 + } else { + 1 + }; + + let cmp1 = $fn($x, $y); + if cmp0 != cmp1 { + panic!( + "{}({:?}, {:?}): std: {:?}, builtins: {:?}", + stringify!($fn), $x, $y, cmp0, cmp1 + ); + } + )* + }; + } + + #[test] + fn cmp_f32() { + use compiler_builtins::float::cmp::{ + __eqsf2, __gesf2, __gtsf2, __lesf2, __ltsf2, __nesf2, __unordsf2, + }; + + fuzz_float_2(N, |x: f32, y: f32| { + assert_eq!(__unordsf2(x, y) != 0, x.is_nan() || y.is_nan()); + cmp!(f32, x, y, Single, all(), + 1, __ltsf2; + 1, __lesf2; + 1, __eqsf2; + -1, __gesf2; + -1, __gtsf2; + 1, __nesf2; + ); + }); + } + + #[test] + fn cmp_f64() { + use compiler_builtins::float::cmp::{ + __eqdf2, __gedf2, __gtdf2, __ledf2, __ltdf2, __nedf2, __unorddf2, + }; + + fuzz_float_2(N, |x: f64, y: f64| { + assert_eq!(__unorddf2(x, y) != 0, x.is_nan() || y.is_nan()); + cmp!(f64, x, y, Double, all(), + 1, __ltdf2; + 1, __ledf2; + 1, __eqdf2; + -1, __gedf2; + -1, __gtdf2; + 1, __nedf2; + ); + }); + } + + #[test] + #[cfg(f128_enabled)] + fn cmp_f128() { + #[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))] + use compiler_builtins::float::cmp::{ + __eqkf2 as __eqtf2, __gekf2 as __getf2, __gtkf2 as __gttf2, __lekf2 as __letf2, + __ltkf2 as __lttf2, __nekf2 as __netf2, __unordkf2 as __unordtf2, + }; + #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] + use compiler_builtins::float::cmp::{ + __eqtf2, __getf2, __gttf2, __letf2, __lttf2, __netf2, __unordtf2, + }; + + fuzz_float_2(N, |x: f128, y: f128| { + let x_is_nan = apfloat_fallback!( + f128, Quad, not(feature = "no-sys-f128"), + |x: FloatTy| x.is_nan() => no_convert, + x + ); + let y_is_nan = apfloat_fallback!( + f128, Quad, not(feature = "no-sys-f128"), + |x: FloatTy| x.is_nan() => no_convert, + y + ); + + assert_eq!(__unordtf2(x, y) != 0, x_is_nan || y_is_nan); + + cmp!(f128, x, y, Quad, not(feature = "no-sys-f128"), + 1, __lttf2; + 1, __letf2; + 1, __eqtf2; + -1, __getf2; + -1, __gttf2; + 1, __netf2; + ); + }); + } +} + +#[cfg(target_arch = "arm")] +mod float_comparisons_arm { + use super::*; + + macro_rules! cmp2 { + ($x:ident, $y:ident, $($unordered_val:expr, $fn_std:expr, $fn_builtins:ident);*;) => { + $( + let cmp0: i32 = if $x.is_nan() || $y.is_nan() { + $unordered_val + } else { + $fn_std as i32 + }; + let cmp1: i32 = $fn_builtins($x, $y); + if cmp0 != cmp1 { + panic!("{}({}, {}): std: {}, builtins: {}", stringify!($fn_builtins), $x, $y, cmp0, cmp1); + } + )* + }; + } + + #[test] + fn cmp_f32() { + use compiler_builtins::float::cmp::{ + __aeabi_fcmpeq, __aeabi_fcmpge, __aeabi_fcmpgt, __aeabi_fcmple, __aeabi_fcmplt, + }; + + fuzz_float_2(N, |x: f32, y: f32| { + cmp2!(x, y, + 0, x < y, __aeabi_fcmplt; + 0, x <= y, __aeabi_fcmple; + 0, x == y, __aeabi_fcmpeq; + 0, x >= y, __aeabi_fcmpge; + 0, x > y, __aeabi_fcmpgt; + ); + }); + } + + #[test] + fn cmp_f64() { + use compiler_builtins::float::cmp::{ + __aeabi_dcmpeq, __aeabi_dcmpge, __aeabi_dcmpgt, __aeabi_dcmple, __aeabi_dcmplt, + }; + + fuzz_float_2(N, |x: f64, y: f64| { + cmp2!(x, y, + 0, x < y, __aeabi_dcmplt; + 0, x <= y, __aeabi_dcmple; + 0, x == y, __aeabi_dcmpeq; + 0, x >= y, __aeabi_dcmpge; + 0, x > y, __aeabi_dcmpgt; + ); + }); + } +} diff --git a/library/compiler-builtins/builtins-test/tests/conv.rs b/library/compiler-builtins/builtins-test/tests/conv.rs new file mode 100644 index 0000000000000..491915d9bb172 --- /dev/null +++ b/library/compiler-builtins/builtins-test/tests/conv.rs @@ -0,0 +1,364 @@ +#![cfg_attr(f128_enabled, feature(f128))] +#![cfg_attr(f16_enabled, feature(f16))] +// makes configuration easier +#![allow(unused_macros)] +#![allow(unused_imports)] + +use builtins_test::*; +use compiler_builtins::float::Float; +use rustc_apfloat::{Float as _, FloatConvert as _}; + +mod i_to_f { + use super::*; + + macro_rules! i_to_f { + ($f_ty:ty, $apfloat_ty:ident, $sys_available:meta, $($i_ty:ty, $fn:ident);*;) => { + $( + #[test] + fn $fn() { + use compiler_builtins::float::conv::$fn; + use compiler_builtins::int::Int; + + fuzz(N, |x: $i_ty| { + let f0 = apfloat_fallback!( + $f_ty, $apfloat_ty, $sys_available, + |x| x as $f_ty; + // When the builtin is not available, we need to use a different conversion + // method (since apfloat doesn't support `as` casting). + |x: $i_ty| { + use compiler_builtins::int::MinInt; + + let apf = if <$i_ty>::SIGNED { + FloatTy::from_i128(x.try_into().unwrap()).value + } else { + FloatTy::from_u128(x.try_into().unwrap()).value + }; + + <$f_ty>::from_bits(apf.to_bits()) + }, + x + ); + let f1: $f_ty = $fn(x); + + #[cfg($sys_available)] { + // This makes sure that the conversion produced the best rounding possible, and does + // this independent of `x as $into` rounding correctly. + // This assumes that float to integer conversion is correct. + let y_minus_ulp = <$f_ty>::from_bits(f1.to_bits().wrapping_sub(1)) as $i_ty; + let y = f1 as $i_ty; + let y_plus_ulp = <$f_ty>::from_bits(f1.to_bits().wrapping_add(1)) as $i_ty; + let error_minus = <$i_ty as Int>::abs_diff(y_minus_ulp, x); + let error = <$i_ty as Int>::abs_diff(y, x); + let error_plus = <$i_ty as Int>::abs_diff(y_plus_ulp, x); + + // The first two conditions check that none of the two closest float values are + // strictly closer in representation to `x`. The second makes sure that rounding is + // towards even significand if two float values are equally close to the integer. + if error_minus < error + || error_plus < error + || ((error_minus == error || error_plus == error) + && ((f0.to_bits() & 1) != 0)) + { + if !cfg!(any( + target_arch = "powerpc", + target_arch = "powerpc64" + )) { + panic!( + "incorrect rounding by {}({}): {}, ({}, {}, {}), errors ({}, {}, {})", + stringify!($fn), + x, + f1.to_bits(), + y_minus_ulp, + y, + y_plus_ulp, + error_minus, + error, + error_plus, + ); + } + } + } + + // Test against native conversion. We disable testing on all `x86` because of + // rounding bugs with `i686`. `powerpc` also has the same rounding bug. + if !Float::eq_repr(f0, f1) && !cfg!(any( + target_arch = "x86", + target_arch = "powerpc", + target_arch = "powerpc64" + )) { + panic!( + "{}({}): std: {:?}, builtins: {:?}", + stringify!($fn), + x, + f0, + f1, + ); + } + }); + } + )* + }; + } + + i_to_f! { f32, Single, all(), + u32, __floatunsisf; + i32, __floatsisf; + u64, __floatundisf; + i64, __floatdisf; + u128, __floatuntisf; + i128, __floattisf; + } + + i_to_f! { f64, Double, all(), + u32, __floatunsidf; + i32, __floatsidf; + u64, __floatundidf; + i64, __floatdidf; + u128, __floatuntidf; + i128, __floattidf; + } + + #[cfg(not(feature = "no-f16-f128"))] + #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] + i_to_f! { f128, Quad, not(feature = "no-sys-f128-int-convert"), + u32, __floatunsitf; + i32, __floatsitf; + u64, __floatunditf; + i64, __floatditf; + u128, __floatuntitf; + i128, __floattitf; + } + + #[cfg(not(feature = "no-f16-f128"))] + #[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))] + i_to_f! { f128, Quad, not(feature = "no-sys-f128-int-convert"), + u32, __floatunsikf; + i32, __floatsikf; + u64, __floatundikf; + i64, __floatdikf; + u128, __floatuntikf; + i128, __floattikf; + } +} + +mod f_to_i { + use super::*; + + macro_rules! f_to_i { + ($x:ident, $f_ty:ty, $apfloat_ty:ident, $sys_available:meta, $($i_ty:ty, $fn:ident);*;) => { + $( + // it is undefined behavior in the first place to do conversions with NaNs + if !apfloat_fallback!( + $f_ty, $apfloat_ty, $sys_available, |x: FloatTy| x.is_nan() => no_convert, $x + ) { + let conv0 = apfloat_fallback!( + $f_ty, $apfloat_ty, $sys_available, + // Use an `as` cast when the builtin is available on the system. + |x| x as $i_ty; + // When the builtin is not available, we need to use a different conversion + // method (since apfloat doesn't support `as` casting). + |x: $f_ty| { + use compiler_builtins::int::MinInt; + + let apf = FloatTy::from_bits(x.to_bits().into()); + let bits: usize = <$i_ty>::BITS.try_into().unwrap(); + + let err_fn = || panic!( + "Unable to convert value {x:?} to type {}:", stringify!($i_ty) + ); + + if <$i_ty>::SIGNED { + <$i_ty>::try_from(apf.to_i128(bits).value).ok().unwrap_or_else(err_fn) + } else { + <$i_ty>::try_from(apf.to_u128(bits).value).ok().unwrap_or_else(err_fn) + } + }, + $x + ); + let conv1: $i_ty = $fn($x); + if conv0 != conv1 { + panic!("{}({:?}): std: {:?}, builtins: {:?}", stringify!($fn), $x, conv0, conv1); + } + } + )* + }; + } + + #[test] + fn f32_to_int() { + use compiler_builtins::float::conv::{ + __fixsfdi, __fixsfsi, __fixsfti, __fixunssfdi, __fixunssfsi, __fixunssfti, + }; + + fuzz_float(N, |x: f32| { + f_to_i!(x, f32, Single, all(), + u32, __fixunssfsi; + u64, __fixunssfdi; + u128, __fixunssfti; + i32, __fixsfsi; + i64, __fixsfdi; + i128, __fixsfti; + ); + }); + } + + #[test] + fn f64_to_int() { + use compiler_builtins::float::conv::{ + __fixdfdi, __fixdfsi, __fixdfti, __fixunsdfdi, __fixunsdfsi, __fixunsdfti, + }; + + fuzz_float(N, |x: f64| { + f_to_i!(x, f64, Double, all(), + u32, __fixunsdfsi; + u64, __fixunsdfdi; + u128, __fixunsdfti; + i32, __fixdfsi; + i64, __fixdfdi; + i128, __fixdfti; + ); + }); + } + + #[test] + #[cfg(f128_enabled)] + fn f128_to_int() { + #[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))] + use compiler_builtins::float::conv::{ + __fixkfdi as __fixtfdi, __fixkfsi as __fixtfsi, __fixkfti as __fixtfti, + __fixunskfdi as __fixunstfdi, __fixunskfsi as __fixunstfsi, + __fixunskfti as __fixunstfti, + }; + #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] + use compiler_builtins::float::conv::{ + __fixtfdi, __fixtfsi, __fixtfti, __fixunstfdi, __fixunstfsi, __fixunstfti, + }; + + fuzz_float(N, |x: f128| { + f_to_i!( + x, + f128, + Quad, + not(feature = "no-sys-f128-int-convert"), + u32, __fixunstfsi; + u64, __fixunstfdi; + u128, __fixunstfti; + i32, __fixtfsi; + i64, __fixtfdi; + i128, __fixtfti; + ); + }); + } +} + +macro_rules! f_to_f { + ( + $mod:ident, + $( + $from_ty:ty => $to_ty:ty, + $from_ap_ty:ident => $to_ap_ty:ident, + $fn:ident, $sys_available:meta + );+; + ) => {$( + #[test] + fn $fn() { + use compiler_builtins::float::{$mod::$fn, Float}; + use rustc_apfloat::ieee::{$from_ap_ty, $to_ap_ty}; + + fuzz_float(N, |x: $from_ty| { + let tmp0: $to_ty = apfloat_fallback!( + $from_ty, + $from_ap_ty, + $sys_available, + |x: $from_ty| x as $to_ty; + |x: $from_ty| { + let from_apf = FloatTy::from_bits(x.to_bits().into()); + // Get `value` directly to ignore INVALID_OP + let to_apf: $to_ap_ty = from_apf.convert(&mut false).value; + <$to_ty>::from_bits(to_apf.to_bits().try_into().unwrap()) + }, + x + ); + let tmp1: $to_ty = $fn(x); + + if !Float::eq_repr(tmp0, tmp1) { + panic!( + "{}({:?}): std: {:?}, builtins: {:?}", + stringify!($fn), + x, + tmp0, + tmp1 + ); + } + }) + } + )+}; +} + +mod extend { + use super::*; + + f_to_f! { + extend, + f32 => f64, Single => Double, __extendsfdf2, all(); + } + + #[cfg(all(f16_enabled, f128_enabled))] + #[cfg(not(any( + target_arch = "powerpc", + target_arch = "powerpc64", + target_arch = "loongarch64" + )))] + f_to_f! { + extend, + f16 => f32, Half => Single, __extendhfsf2, not(feature = "no-sys-f16"); + f16 => f32, Half => Single, __gnu_h2f_ieee, not(feature = "no-sys-f16"); + f16 => f64, Half => Double, __extendhfdf2, not(feature = "no-sys-f16-f64-convert"); + f16 => f128, Half => Quad, __extendhftf2, not(feature = "no-sys-f16-f128-convert"); + f32 => f128, Single => Quad, __extendsftf2, not(feature = "no-sys-f128"); + f64 => f128, Double => Quad, __extenddftf2, not(feature = "no-sys-f128"); + } + + #[cfg(f128_enabled)] + #[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))] + f_to_f! { + extend, + // FIXME(#655): `f16` tests disabled until we can bootstrap symbols + f32 => f128, Single => Quad, __extendsfkf2, not(feature = "no-sys-f128"); + f64 => f128, Double => Quad, __extenddfkf2, not(feature = "no-sys-f128"); + } +} + +mod trunc { + use super::*; + + f_to_f! { + trunc, + f64 => f32, Double => Single, __truncdfsf2, all(); + } + + #[cfg(all(f16_enabled, f128_enabled))] + #[cfg(not(any( + target_arch = "powerpc", + target_arch = "powerpc64", + target_arch = "loongarch64" + )))] + f_to_f! { + trunc, + f32 => f16, Single => Half, __truncsfhf2, not(feature = "no-sys-f16"); + f32 => f16, Single => Half, __gnu_f2h_ieee, not(feature = "no-sys-f16"); + f64 => f16, Double => Half, __truncdfhf2, not(feature = "no-sys-f16-f64-convert"); + f128 => f16, Quad => Half, __trunctfhf2, not(feature = "no-sys-f16-f128-convert"); + f128 => f32, Quad => Single, __trunctfsf2, not(feature = "no-sys-f128"); + f128 => f64, Quad => Double, __trunctfdf2, not(feature = "no-sys-f128"); + } + + #[cfg(f128_enabled)] + #[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))] + f_to_f! { + trunc, + // FIXME(#655): `f16` tests disabled until we can bootstrap symbols + f128 => f32, Quad => Single, __trunckfsf2, not(feature = "no-sys-f128"); + f128 => f64, Quad => Double, __trunckfdf2, not(feature = "no-sys-f128"); + } +} diff --git a/library/compiler-builtins/builtins-test/tests/div_rem.rs b/library/compiler-builtins/builtins-test/tests/div_rem.rs new file mode 100644 index 0000000000000..5ae653cc90cc4 --- /dev/null +++ b/library/compiler-builtins/builtins-test/tests/div_rem.rs @@ -0,0 +1,164 @@ +#![feature(f128)] +#![allow(unused_macros)] + +use builtins_test::*; +use compiler_builtins::int::sdiv::{__divmoddi4, __divmodsi4, __divmodti4}; +use compiler_builtins::int::udiv::{__udivmoddi4, __udivmodsi4, __udivmodti4, u128_divide_sparc}; + +// Division algorithms have by far the nastiest and largest number of edge cases, and experience shows +// that sometimes 100_000 iterations of the random fuzzer is needed. + +/// Creates intensive test functions for division functions of a certain size +macro_rules! test { + ( + $n:expr, // the number of bits in a $iX or $uX + $uX:ident, // unsigned integer that will be shifted + $iX:ident, // signed version of $uX + $test_name:ident, // name of the test function + $unsigned_name:ident, // unsigned division function + $signed_name:ident // signed division function + ) => { + #[test] + fn $test_name() { + fuzz_2(N, |lhs, rhs| { + if rhs == 0 { + return; + } + + let mut rem: $uX = 0; + let quo: $uX = $unsigned_name(lhs, rhs, Some(&mut rem)); + if rhs <= rem || (lhs != rhs.wrapping_mul(quo).wrapping_add(rem)) { + panic!( + "unsigned division function failed with lhs:{} rhs:{} \ + std:({}, {}) builtins:({}, {})", + lhs, + rhs, + lhs.wrapping_div(rhs), + lhs.wrapping_rem(rhs), + quo, + rem + ); + } + + // test the signed division function also + let lhs = lhs as $iX; + let rhs = rhs as $iX; + let mut rem: $iX = 0; + let quo: $iX = $signed_name(lhs, rhs, &mut rem); + // We cannot just test that + // `lhs == rhs.wrapping_mul(quo).wrapping_add(rem)`, but also + // need to make sure the remainder isn't larger than the divisor + // and has the correct sign. + let incorrect_rem = if rem == 0 { + false + } else if rhs == $iX::MIN { + // `rhs.wrapping_abs()` would overflow, so handle this case + // separately. + (lhs.is_negative() != rem.is_negative()) || (rem == $iX::MIN) + } else { + (lhs.is_negative() != rem.is_negative()) + || (rhs.wrapping_abs() <= rem.wrapping_abs()) + }; + if incorrect_rem || lhs != rhs.wrapping_mul(quo).wrapping_add(rem) { + panic!( + "signed division function failed with lhs:{} rhs:{} \ + std:({}, {}) builtins:({}, {})", + lhs, + rhs, + lhs.wrapping_div(rhs), + lhs.wrapping_rem(rhs), + quo, + rem + ); + } + }); + } + }; +} + +test!(32, u32, i32, div_rem_si4, __udivmodsi4, __divmodsi4); +test!(64, u64, i64, div_rem_di4, __udivmoddi4, __divmoddi4); +test!(128, u128, i128, div_rem_ti4, __udivmodti4, __divmodti4); + +#[test] +fn divide_sparc() { + fuzz_2(N, |lhs, rhs| { + if rhs == 0 { + return; + } + + let mut rem: u128 = 0; + let quo: u128 = u128_divide_sparc(lhs, rhs, &mut rem); + if rhs <= rem || (lhs != rhs.wrapping_mul(quo).wrapping_add(rem)) { + panic!( + "u128_divide_sparc({}, {}): \ + std:({}, {}), builtins:({}, {})", + lhs, + rhs, + lhs.wrapping_div(rhs), + lhs.wrapping_rem(rhs), + quo, + rem + ); + } + }); +} + +macro_rules! float { + ($($f:ty, $fn:ident, $apfloat_ty:ident, $sys_available:meta);*;) => { + $( + #[test] + fn $fn() { + use compiler_builtins::float::{div::$fn, Float}; + use core::ops::Div; + + fuzz_float_2(N, |x: $f, y: $f| { + let quo0: $f = apfloat_fallback!($f, $apfloat_ty, $sys_available, Div::div, x, y); + let quo1: $f = $fn(x, y); + + // ARM SIMD instructions always flush subnormals to zero + if cfg!(target_arch = "arm") && + ((Float::is_subnormal(quo0)) || Float::is_subnormal(quo1)) { + return; + } + + if !Float::eq_repr(quo0, quo1) { + panic!( + "{}({:?}, {:?}): std: {:?}, builtins: {:?}", + stringify!($fn), + x, + y, + quo0, + quo1 + ); + } + }); + } + )* + }; +} + +#[cfg(not(all(target_arch = "x86", not(target_feature = "sse"))))] +mod float_div { + use super::*; + + float! { + f32, __divsf3, Single, all(); + f64, __divdf3, Double, all(); + } + + #[cfg(not(feature = "no-f16-f128"))] + #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] + float! { + f128, __divtf3, Quad, + // FIXME(llvm): there is a bug in LLVM rt. + // See . + not(any(feature = "no-sys-f128", all(target_arch = "aarch64", target_os = "linux"))); + } + + #[cfg(not(feature = "no-f16-f128"))] + #[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))] + float! { + f128, __divkf3, Quad, not(feature = "no-sys-f128"); + } +} diff --git a/library/compiler-builtins/builtins-test/tests/float_pow.rs b/library/compiler-builtins/builtins-test/tests/float_pow.rs new file mode 100644 index 0000000000000..8209543e666a0 --- /dev/null +++ b/library/compiler-builtins/builtins-test/tests/float_pow.rs @@ -0,0 +1,72 @@ +#![allow(unused_macros)] +#![cfg_attr(f128_enabled, feature(f128))] +#![cfg(not(all(target_arch = "x86", not(target_feature = "sse"))))] + +use builtins_test::*; + +// This is approximate because of issues related to +// https://github.com/rust-lang/rust/issues/73920. +// TODO how do we resolve this indeterminacy? +macro_rules! pow { + ($($f:ty, $tolerance:expr, $fn:ident, $sys_available:meta);*;) => { + $( + #[test] + // FIXME(apfloat): We skip tests if system symbols aren't available rather + // than providing a fallback, since `rustc_apfloat` does not provide `pow`. + #[cfg($sys_available)] + fn $fn() { + use compiler_builtins::float::pow::$fn; + use compiler_builtins::float::Float; + fuzz_float_2(N, |x: $f, y: $f| { + if !(Float::is_subnormal(x) || Float::is_subnormal(y) || x.is_nan()) { + let n = y.to_bits() & !<$f as Float>::SIG_MASK; + let n = (n as <$f as Float>::SignedInt) >> <$f as Float>::SIG_BITS; + let n = n as i32; + let tmp0: $f = x.powi(n); + let tmp1: $f = $fn(x, n); + let (a, b) = if tmp0 < tmp1 { + (tmp0, tmp1) + } else { + (tmp1, tmp0) + }; + + let good = if a == b { + // handles infinity equality + true + } else if a < $tolerance { + b < $tolerance + } else { + let quo = b / a; + (quo < (1. + $tolerance)) && (quo > (1. - $tolerance)) + }; + + assert!( + good, + "{}({:?}, {:?}): std: {:?}, builtins: {:?}", + stringify!($fn), x, n, tmp0, tmp1 + ); + } + }); + } + )* + }; +} + +pow! { + f32, 1e-4, __powisf2, all(); + f64, 1e-12, __powidf2, all(); +} + +#[cfg(f128_enabled)] +// FIXME(f16_f128): MSVC cannot build these until `__divtf3` is available in nightly. +#[cfg(not(target_env = "msvc"))] +#[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] +pow! { + f128, 1e-36, __powitf2, not(feature = "no-sys-f128"); +} + +#[cfg(f128_enabled)] +#[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))] +pow! { + f128, 1e-36, __powikf2, not(feature = "no-sys-f128"); +} diff --git a/library/compiler-builtins/builtins-test/tests/lse.rs b/library/compiler-builtins/builtins-test/tests/lse.rs new file mode 100644 index 0000000000000..53167d98fc0e3 --- /dev/null +++ b/library/compiler-builtins/builtins-test/tests/lse.rs @@ -0,0 +1,97 @@ +#![feature(decl_macro)] // so we can use pub(super) +#![cfg(all(target_arch = "aarch64", target_os = "linux", not(feature = "no-asm")))] + +/// Translate a byte size to a Rust type. +macro int_ty { + (1) => { i8 }, + (2) => { i16 }, + (4) => { i32 }, + (8) => { i64 }, + (16) => { i128 } +} + +mod cas { + pub(super) macro test($_ordering:ident, $bytes:tt, $name:ident) { + #[test] + fn $name() { + builtins_test::fuzz_2(10000, |expected: super::int_ty!($bytes), new| { + let mut target = expected.wrapping_add(10); + assert_eq!( + unsafe { + compiler_builtins::aarch64_linux::$name::$name(expected, new, &mut target) + }, + expected.wrapping_add(10), + "return value should always be the previous value", + ); + assert_eq!( + target, + expected.wrapping_add(10), + "shouldn't have changed target" + ); + + target = expected; + assert_eq!( + unsafe { + compiler_builtins::aarch64_linux::$name::$name(expected, new, &mut target) + }, + expected + ); + assert_eq!(target, new, "should have updated target"); + }); + } + } +} + +macro test_cas16($_ordering:ident, $name:ident) { + cas::test!($_ordering, 16, $name); +} + +mod swap { + pub(super) macro test($_ordering:ident, $bytes:tt, $name:ident) { + #[test] + fn $name() { + builtins_test::fuzz_2(10000, |left: super::int_ty!($bytes), mut right| { + let orig_right = right; + assert_eq!( + unsafe { compiler_builtins::aarch64_linux::$name::$name(left, &mut right) }, + orig_right + ); + assert_eq!(left, right); + }); + } + } +} + +macro_rules! test_op { + ($mod:ident, $( $op:tt )* ) => { + mod $mod { + pub(super) macro test { + ($_ordering:ident, $bytes:tt, $name:ident) => { + #[test] + fn $name() { + builtins_test::fuzz_2(10000, |old, val| { + let mut target = old; + let op: fn(super::int_ty!($bytes), super::int_ty!($bytes)) -> _ = $($op)*; + let expected = op(old, val); + assert_eq!(old, unsafe { compiler_builtins::aarch64_linux::$name::$name(val, &mut target) }, "{} should return original value", stringify!($name)); + assert_eq!(expected, target, "{} should store to target", stringify!($name)); + }); + } + } + } + } + }; +} + +test_op!(add, |left, right| left.wrapping_add(right)); +test_op!(clr, |left, right| left & !right); +test_op!(xor, std::ops::BitXor::bitxor); +test_op!(or, std::ops::BitOr::bitor); + +compiler_builtins::foreach_cas!(cas::test); +compiler_builtins::foreach_cas16!(test_cas16); +compiler_builtins::foreach_swp!(swap::test); +compiler_builtins::foreach_ldadd!(add::test); +compiler_builtins::foreach_ldclr!(clr::test); +compiler_builtins::foreach_ldeor!(xor::test); +compiler_builtins::foreach_ldset!(or::test); diff --git a/library/compiler-builtins/builtins-test/tests/mem.rs b/library/compiler-builtins/builtins-test/tests/mem.rs new file mode 100644 index 0000000000000..d838ef159a024 --- /dev/null +++ b/library/compiler-builtins/builtins-test/tests/mem.rs @@ -0,0 +1,286 @@ +extern crate compiler_builtins; +use compiler_builtins::mem::{memcmp, memcpy, memmove, memset}; + +const WORD_SIZE: usize = core::mem::size_of::(); + +#[test] +fn memcpy_3() { + let mut arr: [u8; 12] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]; + unsafe { + let src = arr.as_ptr().offset(9); + let dst = arr.as_mut_ptr().offset(1); + assert_eq!(memcpy(dst, src, 3), dst); + assert_eq!(arr, [0, 9, 10, 11, 4, 5, 6, 7, 8, 9, 10, 11]); + } + arr = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]; + unsafe { + let src = arr.as_ptr().offset(1); + let dst = arr.as_mut_ptr().offset(9); + assert_eq!(memcpy(dst, src, 3), dst); + assert_eq!(arr, [0, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3]); + } +} + +#[test] +fn memcpy_10() { + let arr: [u8; 18] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17]; + let mut dst: [u8; 12] = [0; 12]; + unsafe { + let src = arr.as_ptr().offset(1); + assert_eq!(memcpy(dst.as_mut_ptr(), src, 10), dst.as_mut_ptr()); + assert_eq!(dst, [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 0, 0]); + } + unsafe { + let src = arr.as_ptr().offset(8); + assert_eq!(memcpy(dst.as_mut_ptr(), src, 10), dst.as_mut_ptr()); + assert_eq!(dst, [8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 0, 0]); + } +} + +#[test] +fn memcpy_big() { + // Make the arrays cross 3 pages + const SIZE: usize = 8193; + let src: [u8; SIZE] = [22; SIZE]; + struct Dst { + start: usize, + buf: [u8; SIZE], + end: usize, + } + + let mut dst = Dst { + start: 0, + buf: [0; SIZE], + end: 0, + }; + unsafe { + assert_eq!( + memcpy(dst.buf.as_mut_ptr(), src.as_ptr(), SIZE), + dst.buf.as_mut_ptr() + ); + assert_eq!(dst.start, 0); + assert_eq!(dst.buf, [22; SIZE]); + assert_eq!(dst.end, 0); + } +} + +#[test] +fn memmove_forward() { + let mut arr: [u8; 12] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]; + unsafe { + let src = arr.as_ptr().offset(6); + let dst = arr.as_mut_ptr().offset(3); + assert_eq!(memmove(dst, src, 5), dst); + assert_eq!(arr, [0, 1, 2, 6, 7, 8, 9, 10, 8, 9, 10, 11]); + } +} + +#[test] +fn memmove_backward() { + let mut arr: [u8; 12] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]; + unsafe { + let src = arr.as_ptr().offset(3); + let dst = arr.as_mut_ptr().offset(6); + assert_eq!(memmove(dst, src, 5), dst); + assert_eq!(arr, [0, 1, 2, 3, 4, 5, 3, 4, 5, 6, 7, 11]); + } +} + +#[test] +fn memset_zero() { + let mut arr: [u8; 8] = [0, 1, 2, 3, 4, 5, 6, 7]; + unsafe { + let ptr = arr.as_mut_ptr().offset(5); + assert_eq!(memset(ptr, 0, 2), ptr); + assert_eq!(arr, [0, 1, 2, 3, 4, 0, 0, 7]); + + // Only the LSB matters for a memset + assert_eq!(memset(arr.as_mut_ptr(), 0x2000, 8), arr.as_mut_ptr()); + assert_eq!(arr, [0, 0, 0, 0, 0, 0, 0, 0]); + } +} + +#[test] +fn memset_nonzero() { + let mut arr: [u8; 8] = [0, 1, 2, 3, 4, 5, 6, 7]; + unsafe { + let ptr = arr.as_mut_ptr().offset(2); + assert_eq!(memset(ptr, 22, 3), ptr); + assert_eq!(arr, [0, 1, 22, 22, 22, 5, 6, 7]); + + // Only the LSB matters for a memset + assert_eq!(memset(arr.as_mut_ptr(), 0x2009, 8), arr.as_mut_ptr()); + assert_eq!(arr, [9, 9, 9, 9, 9, 9, 9, 9]); + } +} + +#[test] +fn memcmp_eq() { + let arr1 @ arr2 = gen_arr::<256>(); + for i in 0..256 { + unsafe { + assert_eq!(memcmp(arr1.0.as_ptr(), arr2.0.as_ptr(), i), 0); + assert_eq!(memcmp(arr2.0.as_ptr(), arr1.0.as_ptr(), i), 0); + } + } +} + +#[test] +fn memcmp_ne() { + let arr1 @ arr2 = gen_arr::<256>(); + // Reduce iteration count in Miri as it is too slow otherwise. + let limit = if cfg!(miri) { 64 } else { 256 }; + for i in 0..limit { + let mut diff_arr = arr1; + diff_arr.0[i] = 127; + let expect = diff_arr.0[i].cmp(&arr2.0[i]); + for k in i + 1..limit { + let result = unsafe { memcmp(diff_arr.0.as_ptr(), arr2.0.as_ptr(), k) }; + assert_eq!(expect, result.cmp(&0)); + } + } +} + +#[derive(Clone, Copy)] +struct AlignedStorage([u8; N], [usize; 0]); + +fn gen_arr() -> AlignedStorage { + let mut ret = AlignedStorage::([0; N], []); + for i in 0..N { + ret.0[i] = i as u8; + } + ret +} + +#[test] +fn memmove_forward_misaligned_nonaligned_start() { + let mut arr = gen_arr::<32>(); + let mut reference = arr; + unsafe { + let src = arr.0.as_ptr().offset(6); + let dst = arr.0.as_mut_ptr().offset(3); + assert_eq!(memmove(dst, src, 17), dst); + reference.0.copy_within(6..6 + 17, 3); + assert_eq!(arr.0, reference.0); + } +} + +#[test] +fn memmove_forward_misaligned_aligned_start() { + let mut arr = gen_arr::<32>(); + let mut reference = arr; + unsafe { + let src = arr.0.as_ptr().offset(6); + let dst = arr.0.as_mut_ptr().add(0); + assert_eq!(memmove(dst, src, 17), dst); + reference.0.copy_within(6..6 + 17, 0); + assert_eq!(arr.0, reference.0); + } +} + +#[test] +fn memmove_forward_aligned() { + let mut arr = gen_arr::<32>(); + let mut reference = arr; + unsafe { + let src = arr.0.as_ptr().add(3 + WORD_SIZE); + let dst = arr.0.as_mut_ptr().add(3); + assert_eq!(memmove(dst, src, 17), dst); + reference + .0 + .copy_within(3 + WORD_SIZE..3 + WORD_SIZE + 17, 3); + assert_eq!(arr.0, reference.0); + } +} + +#[test] +fn memmove_backward_misaligned_nonaligned_start() { + let mut arr = gen_arr::<32>(); + let mut reference = arr; + unsafe { + let src = arr.0.as_ptr().offset(3); + let dst = arr.0.as_mut_ptr().offset(6); + assert_eq!(memmove(dst, src, 17), dst); + reference.0.copy_within(3..3 + 17, 6); + assert_eq!(arr.0, reference.0); + } +} + +#[test] +fn memmove_backward_misaligned_aligned_start() { + let mut arr = gen_arr::<32>(); + let mut reference = arr; + unsafe { + let src = arr.0.as_ptr().offset(3); + let dst = arr.0.as_mut_ptr().add(WORD_SIZE); + assert_eq!(memmove(dst, src, 17), dst); + reference.0.copy_within(3..3 + 17, WORD_SIZE); + assert_eq!(arr.0, reference.0); + } +} + +#[test] +fn memmove_backward_aligned() { + let mut arr = gen_arr::<32>(); + let mut reference = arr; + unsafe { + let src = arr.0.as_ptr().add(3); + let dst = arr.0.as_mut_ptr().add(3 + WORD_SIZE); + assert_eq!(memmove(dst, src, 17), dst); + reference.0.copy_within(3..3 + 17, 3 + WORD_SIZE); + assert_eq!(arr.0, reference.0); + } +} + +#[test] +fn memmove_misaligned_bounds() { + // The above test have the downside that the addresses surrounding the range-to-copy are all + // still in-bounds, so Miri would not actually complain about OOB accesses. So we also test with + // an array that has just the right size. We test a few times to avoid it being accidentally + // aligned. + for _ in 0..8 { + let mut arr1 = [0u8; 17]; + let mut arr2 = [0u8; 17]; + unsafe { + // Copy both ways so we hit both the forward and backward cases. + memmove(arr1.as_mut_ptr(), arr2.as_mut_ptr(), 17); + memmove(arr2.as_mut_ptr(), arr1.as_mut_ptr(), 17); + } + } +} + +#[test] +fn memset_backward_misaligned_nonaligned_start() { + let mut arr = gen_arr::<32>(); + let mut reference = arr; + unsafe { + let ptr = arr.0.as_mut_ptr().offset(6); + assert_eq!(memset(ptr, 0xCC, 17), ptr); + core::ptr::write_bytes(reference.0.as_mut_ptr().add(6), 0xCC, 17); + assert_eq!(arr.0, reference.0); + } +} + +#[test] +fn memset_backward_misaligned_aligned_start() { + let mut arr = gen_arr::<32>(); + let mut reference = arr; + unsafe { + let ptr = arr.0.as_mut_ptr().add(WORD_SIZE); + assert_eq!(memset(ptr, 0xCC, 17), ptr); + core::ptr::write_bytes(reference.0.as_mut_ptr().add(WORD_SIZE), 0xCC, 17); + assert_eq!(arr.0, reference.0); + } +} + +#[test] +fn memset_backward_aligned() { + let mut arr = gen_arr::<32>(); + let mut reference = arr; + unsafe { + let ptr = arr.0.as_mut_ptr().add(3 + WORD_SIZE); + assert_eq!(memset(ptr, 0xCC, 17), ptr); + core::ptr::write_bytes(reference.0.as_mut_ptr().add(3 + WORD_SIZE), 0xCC, 17); + assert_eq!(arr.0, reference.0); + } +} diff --git a/library/compiler-builtins/builtins-test/tests/misc.rs b/library/compiler-builtins/builtins-test/tests/misc.rs new file mode 100644 index 0000000000000..64a9d56f36b33 --- /dev/null +++ b/library/compiler-builtins/builtins-test/tests/misc.rs @@ -0,0 +1,202 @@ +// makes configuration easier +#![allow(unused_macros)] + +use builtins_test::*; + +/// Make sure that the the edge case tester and randomized tester don't break, and list examples of +/// fuzz values for documentation purposes. +#[test] +fn fuzz_values() { + const VALS: [u16; 47] = [ + 0b0, // edge cases + 0b1111111111111111, + 0b1111111111111110, + 0b1111111111111100, + 0b1111111110000000, + 0b1111111100000000, + 0b1110000000000000, + 0b1100000000000000, + 0b1000000000000000, + 0b111111111111111, + 0b111111111111110, + 0b111111111111100, + 0b111111110000000, + 0b111111100000000, + 0b110000000000000, + 0b100000000000000, + 0b11111111111111, + 0b11111111111110, + 0b11111111111100, + 0b11111110000000, + 0b11111100000000, + 0b10000000000000, + 0b111111111, + 0b111111110, + 0b111111100, + 0b110000000, + 0b100000000, + 0b11111111, + 0b11111110, + 0b11111100, + 0b10000000, + 0b111, + 0b110, + 0b100, + 0b11, + 0b10, + 0b1, + 0b1010110100000, // beginning of random fuzzing + 0b1100011001011010, + 0b1001100101001111, + 0b1101010100011010, + 0b100010001, + 0b1000000000000000, + 0b1100000000000101, + 0b1100111101010101, + 0b1100010111111111, + 0b1111110101111111, + ]; + let mut i = 0; + fuzz(10, |x: u16| { + assert_eq!(x, VALS[i]); + i += 1; + }); +} + +#[test] +fn leading_zeros() { + use compiler_builtins::int::leading_zeros::{leading_zeros_default, leading_zeros_riscv}; + { + use compiler_builtins::int::leading_zeros::__clzsi2; + fuzz(N, |x: u32| { + if x == 0 { + return; // undefined value for an intrinsic + } + let lz = x.leading_zeros() as usize; + let lz0 = __clzsi2(x); + let lz1 = leading_zeros_default(x); + let lz2 = leading_zeros_riscv(x); + if lz0 != lz { + panic!("__clzsi2({x}): std: {lz}, builtins: {lz0}"); + } + if lz1 != lz { + panic!("leading_zeros_default({x}): std: {lz}, builtins: {lz1}"); + } + if lz2 != lz { + panic!("leading_zeros_riscv({x}): std: {lz}, builtins: {lz2}"); + } + }); + } + + { + use compiler_builtins::int::leading_zeros::__clzdi2; + fuzz(N, |x: u64| { + if x == 0 { + return; // undefined value for an intrinsic + } + let lz = x.leading_zeros() as usize; + let lz0 = __clzdi2(x); + let lz1 = leading_zeros_default(x); + let lz2 = leading_zeros_riscv(x); + if lz0 != lz { + panic!("__clzdi2({x}): std: {lz}, builtins: {lz0}"); + } + if lz1 != lz { + panic!("leading_zeros_default({x}): std: {lz}, builtins: {lz1}"); + } + if lz2 != lz { + panic!("leading_zeros_riscv({x}): std: {lz}, builtins: {lz2}"); + } + }); + } + + { + use compiler_builtins::int::leading_zeros::__clzti2; + fuzz(N, |x: u128| { + if x == 0 { + return; // undefined value for an intrinsic + } + let lz = x.leading_zeros() as usize; + let lz0 = __clzti2(x); + if lz0 != lz { + panic!("__clzti2({x}): std: {lz}, builtins: {lz0}"); + } + }); + } +} + +#[test] +fn trailing_zeros() { + use compiler_builtins::int::trailing_zeros::{__ctzdi2, __ctzsi2, __ctzti2, trailing_zeros}; + fuzz(N, |x: u32| { + if x == 0 { + return; // undefined value for an intrinsic + } + let tz = x.trailing_zeros() as usize; + let tz0 = __ctzsi2(x); + let tz1 = trailing_zeros(x); + if tz0 != tz { + panic!("__ctzsi2({x}): std: {tz}, builtins: {tz0}"); + } + if tz1 != tz { + panic!("trailing_zeros({x}): std: {tz}, builtins: {tz1}"); + } + }); + fuzz(N, |x: u64| { + if x == 0 { + return; // undefined value for an intrinsic + } + let tz = x.trailing_zeros() as usize; + let tz0 = __ctzdi2(x); + let tz1 = trailing_zeros(x); + if tz0 != tz { + panic!("__ctzdi2({x}): std: {tz}, builtins: {tz0}"); + } + if tz1 != tz { + panic!("trailing_zeros({x}): std: {tz}, builtins: {tz1}"); + } + }); + fuzz(N, |x: u128| { + if x == 0 { + return; // undefined value for an intrinsic + } + let tz = x.trailing_zeros() as usize; + let tz0 = __ctzti2(x); + if tz0 != tz { + panic!("__ctzti2({x}): std: {tz}, builtins: {tz0}"); + } + }); +} + +#[test] +fn bswap() { + use compiler_builtins::int::bswap::{__bswapdi2, __bswapsi2}; + fuzz(N, |x: u32| { + assert_eq!(x.swap_bytes(), __bswapsi2(x)); + }); + fuzz(N, |x: u64| { + assert_eq!(x.swap_bytes(), __bswapdi2(x)); + }); + + assert_eq!(__bswapsi2(0x12345678u32), 0x78563412u32); + assert_eq!(__bswapsi2(0x00000001u32), 0x01000000u32); + assert_eq!(__bswapdi2(0x123456789ABCDEF0u64), 0xF0DEBC9A78563412u64); + assert_eq!(__bswapdi2(0x0200000001000000u64), 0x0000000100000002u64); + + #[cfg(any(target_pointer_width = "32", target_pointer_width = "64"))] + { + use compiler_builtins::int::bswap::__bswapti2; + fuzz(N, |x: u128| { + assert_eq!(x.swap_bytes(), __bswapti2(x)); + }); + + assert_eq!( + __bswapti2(0x123456789ABCDEF013579BDF02468ACEu128), + 0xCE8A4602DF9B5713F0DEBC9A78563412u128 + ); + assert_eq!( + __bswapti2(0x04000000030000000200000001000000u128), + 0x00000001000000020000000300000004u128 + ); + } +} diff --git a/library/compiler-builtins/builtins-test/tests/mul.rs b/library/compiler-builtins/builtins-test/tests/mul.rs new file mode 100644 index 0000000000000..58bc9ab4ac95c --- /dev/null +++ b/library/compiler-builtins/builtins-test/tests/mul.rs @@ -0,0 +1,150 @@ +#![allow(unused_macros)] +#![cfg_attr(f128_enabled, feature(f128))] + +use builtins_test::*; + +mod int_mul { + use super::*; + + macro_rules! mul { + ($($i:ty, $fn:ident);*;) => { + $( + #[test] + fn $fn() { + use compiler_builtins::int::mul::$fn; + + fuzz_2(N, |x: $i, y: $i| { + let mul0 = x.wrapping_mul(y); + let mul1: $i = $fn(x, y); + if mul0 != mul1 { + panic!( + "{func}({x}, {y}): std: {mul0}, builtins: {mul1}", + func = stringify!($fn), + ); + } + }); + + } + )* + }; + } + + mul! { + u64, __muldi3; + i128, __multi3; + } +} + +mod int_overflowing_mul { + use super::*; + + macro_rules! overflowing_mul { + ($($i:ty, $fn:ident);*;) => { + $( + #[test] + fn $fn() { + use compiler_builtins::int::mul::$fn; + + fuzz_2(N, |x: $i, y: $i| { + let (mul0, o0) = x.overflowing_mul(y); + let mut o1 = 0i32; + let mul1: $i = $fn(x, y, &mut o1); + let o1 = o1 != 0; + if mul0 != mul1 || o0 != o1 { + panic!( + "{func}({x}, {y}): std: ({mul0}, {o0}), builtins: ({mul1}, {o1})", + func = stringify!($fn), + ); + } + }); + } + )* + }; + } + + overflowing_mul! { + i32, __mulosi4; + i64, __mulodi4; + i128, __muloti4; + } + + #[test] + fn overflowing_mul_u128() { + use compiler_builtins::int::mul::{__rust_i128_mulo, __rust_u128_mulo}; + + fuzz_2(N, |x: u128, y: u128| { + let mut o1 = 0; + let (mul0, o0) = x.overflowing_mul(y); + let mul1 = __rust_u128_mulo(x, y, &mut o1); + if mul0 != mul1 || i32::from(o0) != o1 { + panic!("__rust_u128_mulo({x}, {y}): std: ({mul0}, {o0}), builtins: ({mul1}, {o1})",); + } + let x = x as i128; + let y = y as i128; + let (mul0, o0) = x.overflowing_mul(y); + let mul1 = __rust_i128_mulo(x, y, &mut o1); + if mul0 != mul1 || i32::from(o0) != o1 { + panic!("__rust_i128_mulo({x}, {y}): std: ({mul0}, {o0}), builtins: ({mul1}, {o1})",); + } + }); + } +} + +macro_rules! float_mul { + ($($f:ty, $fn:ident, $apfloat_ty:ident, $sys_available:meta);*;) => { + $( + #[test] + fn $fn() { + use compiler_builtins::float::{mul::$fn, Float}; + use core::ops::Mul; + + fuzz_float_2(N, |x: $f, y: $f| { + let mul0 = apfloat_fallback!($f, $apfloat_ty, $sys_available, Mul::mul, x, y); + let mul1: $f = $fn(x, y); + if !Float::eq_repr(mul0, mul1) { + panic!( + "{func}({x:?}, {y:?}): std: {mul0:?}, builtins: {mul1:?}", + func = stringify!($fn), + ); + } + }); + } + )* + }; +} + +#[cfg(not(all(target_arch = "x86", not(target_feature = "sse"))))] +mod float_mul { + use super::*; + + // FIXME(#616): Stop ignoring arches that don't have native support once fix for builtins is in + // nightly. + float_mul! { + f32, __mulsf3, Single, not(target_arch = "arm"); + f64, __muldf3, Double, not(target_arch = "arm"); + } +} + +#[cfg(f128_enabled)] +#[cfg(not(all(target_arch = "x86", not(target_feature = "sse"))))] +#[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] +mod float_mul_f128 { + use super::*; + + float_mul! { + f128, __multf3, Quad, + // FIXME(llvm): there is a bug in LLVM rt. + // See . + not(any(feature = "no-sys-f128", all(target_arch = "aarch64", target_os = "linux"))); + } +} + +#[cfg(f128_enabled)] +#[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))] +mod float_mul_f128_ppc { + use super::*; + + float_mul! { + f128, __mulkf3, Quad, not(feature = "no-sys-f128"); + } +} diff --git a/library/compiler-builtins/builtins-test/tests/shift.rs b/library/compiler-builtins/builtins-test/tests/shift.rs new file mode 100644 index 0000000000000..0f2483855e591 --- /dev/null +++ b/library/compiler-builtins/builtins-test/tests/shift.rs @@ -0,0 +1,35 @@ +use builtins_test::*; + +macro_rules! shift { + ($($i:ty, $fn_std:ident, $fn_builtins:ident);*;) => { + $( + #[test] + fn $fn_builtins() { + use compiler_builtins::int::shift::$fn_builtins; + + fuzz_shift(|x: $i, s: u32| { + let tmp0: $i = x.$fn_std(s); + let tmp1: $i = $fn_builtins(x, s); + if tmp0 != tmp1 { + panic!( + "{}({}, {}): std: {}, builtins: {}", + stringify!($fn_builtins), x, s, tmp0, tmp1 + ); + } + }); + } + )* + }; +} + +shift! { + u32, wrapping_shl, __ashlsi3; + u64, wrapping_shl, __ashldi3; + u128, wrapping_shl, __ashlti3; + i32, wrapping_shr, __ashrsi3; + i64, wrapping_shr, __ashrdi3; + i128, wrapping_shr, __ashrti3; + u32, wrapping_shr, __lshrsi3; + u64, wrapping_shr, __lshrdi3; + u128, wrapping_shr, __lshrti3; +} diff --git a/library/compiler-builtins/ci/bench-icount.sh b/library/compiler-builtins/ci/bench-icount.sh new file mode 100755 index 0000000000000..4d93e257a6cb0 --- /dev/null +++ b/library/compiler-builtins/ci/bench-icount.sh @@ -0,0 +1,58 @@ +#!/bin/bash + +set -eux + +iai_home="iai-home" + +# Download the baseline from master +./ci/ci-util.py locate-baseline --download --extract + +# Run benchmarks once +function run_icount_benchmarks() { + cargo_args=( + "--bench" "icount" + "--no-default-features" + "--features" "unstable,unstable-float,icount" + ) + + iai_args=( + "--home" "$(pwd)/$iai_home" + "--regression=ir=5.0" + "--save-summary" + ) + + # Parse `cargo_arg0 cargo_arg1 -- iai_arg0 iai_arg1` syntax + parsing_iai_args=0 + while [ "$#" -gt 0 ]; do + if [ "$parsing_iai_args" == "1" ]; then + iai_args+=("$1") + elif [ "$1" == "--" ]; then + parsing_iai_args=1 + else + cargo_args+=("$1") + fi + + shift + done + + # Run iai-callgrind benchmarks + cargo bench "${cargo_args[@]}" -- "${iai_args[@]}" + + # NB: iai-callgrind should exit on error but does not, so we inspect the sumary + # for errors. See https://github.com/iai-callgrind/iai-callgrind/issues/337 + if [ -n "${PR_NUMBER:-}" ]; then + # If this is for a pull request, ignore regressions if specified. + ./ci/ci-util.py check-regressions --home "$iai_home" --allow-pr-override "$PR_NUMBER" + else + ./ci/ci-util.py check-regressions --home "$iai_home" || true + fi +} + +# Run once with softfloats, once with arch instructions enabled +run_icount_benchmarks --features force-soft-floats -- --save-baseline=softfloat +run_icount_benchmarks -- --save-baseline=hardfloat + +# Name and tar the new baseline +name="baseline-icount-$(date -u +'%Y%m%d%H%M')-${GITHUB_SHA:0:12}" +echo "BASELINE_NAME=$name" >>"$GITHUB_ENV" +tar cJf "$name.tar.xz" "$iai_home" diff --git a/library/compiler-builtins/ci/ci-util.py b/library/compiler-builtins/ci/ci-util.py new file mode 100755 index 0000000000000..d785b2e9e1d80 --- /dev/null +++ b/library/compiler-builtins/ci/ci-util.py @@ -0,0 +1,438 @@ +#!/usr/bin/env python3 +"""Utilities for CI. + +This dynamically prepares a list of routines that had a source file change based on +git history. +""" + +import json +import os +import re +import subprocess as sp +import sys +from dataclasses import dataclass +from glob import glob, iglob +from inspect import cleandoc +from os import getenv +from pathlib import Path +from typing import TypedDict, Self + +USAGE = cleandoc( + """ + usage: + + ./ci/ci-util.py [flags] + + COMMAND: + generate-matrix + Calculate a matrix of which functions had source change, print that as + a JSON object. + + locate-baseline [--download] [--extract] + Locate the most recent benchmark baseline available in CI and, if flags + specify, download and extract it. Never exits with nonzero status if + downloading fails. + + Note that `--extract` will overwrite files in `iai-home`. + + check-regressions [--home iai-home] [--allow-pr-override pr_number] + Check `iai-home` (or `iai-home` if unspecified) for `summary.json` + files and see if there are any regressions. This is used as a workaround + for `iai-callgrind` not exiting with error status; see + . + + If `--allow-pr-override` is specified, the regression check will not exit + with failure if any line in the PR starts with `allow-regressions`. + """ +) + +REPO_ROOT = Path(__file__).parent.parent +GIT = ["git", "-C", REPO_ROOT] +DEFAULT_BRANCH = "master" +WORKFLOW_NAME = "CI" # Workflow that generates the benchmark artifacts +ARTIFACT_GLOB = "baseline-icount*" +# Place this in a PR body to skip regression checks (must be at the start of a line). +REGRESSION_DIRECTIVE = "ci: allow-regressions" +# Place this in a PR body to skip extensive tests +SKIP_EXTENSIVE_DIRECTIVE = "ci: skip-extensive" +# Place this in a PR body to allow running a large number of extensive tests. If not +# set, this script will error out if a threshold is exceeded in order to avoid +# accidentally spending huge amounts of CI time. +ALLOW_MANY_EXTENSIVE_DIRECTIVE = "ci: allow-many-extensive" +MANY_EXTENSIVE_THRESHOLD = 20 + +# Don't run exhaustive tests if these files change, even if they contaiin a function +# definition. +IGNORE_FILES = [ + "libm/src/math/support/", + "libm/src/libm_helper.rs", + "libm/src/math/arch/intrinsics.rs", +] + +# libm PR CI takes a long time and doesn't need to run unless relevant files have been +# changed. Anything matching this regex pattern will trigger a run. +TRIGGER_LIBM_PR_CI = ".*(libm|musl).*" + +TYPES = ["f16", "f32", "f64", "f128"] + + +def eprint(*args, **kwargs): + """Print to stderr.""" + print(*args, file=sys.stderr, **kwargs) + + +@dataclass +class PrInfo: + """GitHub response for PR query""" + + body: str + commits: list[str] + created_at: str + number: int + + @classmethod + def load(cls, pr_number: int | str) -> Self: + """For a given PR number, query the body and commit list""" + pr_info = sp.check_output( + [ + "gh", + "pr", + "view", + str(pr_number), + "--json=number,commits,body,createdAt", + # Flatten the commit list to only hashes, change a key to snake naming + "--jq=.commits |= map(.oid) | .created_at = .createdAt | del(.createdAt)", + ], + text=True, + ) + eprint("PR info:", json.dumps(pr_info, indent=4)) + return cls(**json.loads(pr_info)) + + def contains_directive(self, directive: str) -> bool: + """Return true if the provided directive is on a line in the PR body""" + lines = self.body.splitlines() + return any(line.startswith(directive) for line in lines) + + +class FunctionDef(TypedDict): + """Type for an entry in `function-definitions.json`""" + + sources: list[str] + type: str + + +class Context: + gh_ref: str | None + changed: list[Path] + defs: dict[str, FunctionDef] + + def __init__(self) -> None: + self.gh_ref = getenv("GITHUB_REF") + self.changed = [] + self._init_change_list() + + with open(REPO_ROOT.joinpath("etc/function-definitions.json")) as f: + defs = json.load(f) + + defs.pop("__comment", None) + self.defs = defs + + def _init_change_list(self): + """Create a list of files that have been changed. This uses GITHUB_REF if + available, otherwise a diff between `HEAD` and `master`. + """ + + # For pull requests, GitHub creates a ref `refs/pull/1234/merge` (1234 being + # the PR number), and sets this as `GITHUB_REF`. + ref = self.gh_ref + eprint(f"using ref `{ref}`") + if not self.is_pr(): + # If the ref is not for `merge` then we are not in PR CI + eprint("No diff available for ref") + return + + # The ref is for a dummy merge commit. We can extract the merge base by + # inspecting all parents (`^@`). + merge_sha = sp.check_output( + GIT + ["show-ref", "--hash", ref], text=True + ).strip() + merge_log = sp.check_output(GIT + ["log", "-1", merge_sha], text=True) + eprint(f"Merge:\n{merge_log}\n") + + parents = ( + sp.check_output(GIT + ["rev-parse", f"{merge_sha}^@"], text=True) + .strip() + .splitlines() + ) + assert len(parents) == 2, f"expected two-parent merge but got:\n{parents}" + base = parents[0].strip() + incoming = parents[1].strip() + + eprint(f"base: {base}, incoming: {incoming}") + textlist = sp.check_output( + GIT + ["diff", base, incoming, "--name-only"], text=True + ) + self.changed = [Path(p) for p in textlist.splitlines()] + + def is_pr(self) -> bool: + """Check if we are looking at a PR rather than a push.""" + return self.gh_ref is not None and "merge" in self.gh_ref + + @staticmethod + def _ignore_file(fname: str) -> bool: + return any(fname.startswith(pfx) for pfx in IGNORE_FILES) + + def changed_routines(self) -> dict[str, list[str]]: + """Create a list of routines for which one or more files have been updated, + separated by type. + """ + routines = set() + for name, meta in self.defs.items(): + # Don't update if changes to the file should be ignored + sources = (f for f in meta["sources"] if not self._ignore_file(f)) + + # Select changed files + changed = [f for f in sources if Path(f) in self.changed] + + if len(changed) > 0: + eprint(f"changed files for {name}: {changed}") + routines.add(name) + + ret: dict[str, list[str]] = {} + for r in sorted(routines): + ret.setdefault(self.defs[r]["type"], []).append(r) + + return ret + + def may_skip_libm_ci(self) -> bool: + """If this is a PR and no libm files were changed, allow skipping libm + jobs.""" + + if self.is_pr(): + return all(not re.match(TRIGGER_LIBM_PR_CI, str(f)) for f in self.changed) + + return False + + def emit_workflow_output(self): + """Create a JSON object a list items for each type's changed files, if any + did change, and the routines that were affected by the change. + """ + + pr_number = os.environ.get("PR_NUMBER") + skip_tests = False + error_on_many_tests = False + + if pr_number is not None and len(pr_number) > 0: + pr = PrInfo.load(pr_number) + skip_tests = pr.contains_directive(SKIP_EXTENSIVE_DIRECTIVE) + error_on_many_tests = not pr.contains_directive( + ALLOW_MANY_EXTENSIVE_DIRECTIVE + ) + + if skip_tests: + eprint("Skipping all extensive tests") + + changed = self.changed_routines() + matrix = [] + total_to_test = 0 + + # Figure out which extensive tests need to run + for ty in TYPES: + ty_changed = changed.get(ty, []) + ty_to_test = [] if skip_tests else ty_changed + total_to_test += len(ty_to_test) + + item = { + "ty": ty, + "changed": ",".join(ty_changed), + "to_test": ",".join(ty_to_test), + } + + matrix.append(item) + + ext_matrix = json.dumps({"extensive_matrix": matrix}, separators=(",", ":")) + may_skip = str(self.may_skip_libm_ci()).lower() + print(f"extensive_matrix={ext_matrix}") + print(f"may_skip_libm_ci={may_skip}") + eprint(f"extensive_matrix={ext_matrix}") + eprint(f"may_skip_libm_ci={may_skip}") + eprint(f"total extensive tests: {total_to_test}") + + if error_on_many_tests and total_to_test > MANY_EXTENSIVE_THRESHOLD: + eprint( + f"More than {MANY_EXTENSIVE_THRESHOLD} tests would be run; add" + f" `{ALLOW_MANY_EXTENSIVE_DIRECTIVE}` to the PR body if this is" + " intentional. If this is refactoring that happens to touch a lot of" + f" files, `{SKIP_EXTENSIVE_DIRECTIVE}` can be used instead." + ) + exit(1) + + +def locate_baseline(flags: list[str]) -> None: + """Find the most recent baseline from CI, download it if specified. + + This returns rather than erroring, even if the `gh` commands fail. This is to avoid + erroring in CI if the baseline is unavailable (artifact time limit exceeded, first + run on the branch, etc). + """ + + download = False + extract = False + + while len(flags) > 0: + match flags[0]: + case "--download": + download = True + case "--extract": + extract = True + case _: + eprint(USAGE) + exit(1) + flags = flags[1:] + + if extract and not download: + eprint("cannot extract without downloading") + exit(1) + + try: + # Locate the most recent job to complete with success on our branch + latest_job = sp.check_output( + [ + "gh", + "run", + "list", + "--status=success", + f"--branch={DEFAULT_BRANCH}", + "--json=databaseId,url,headSha,conclusion,createdAt," + "status,workflowDatabaseId,workflowName", + # Return the first array element matching our workflow name. NB: cannot + # just use `--limit=1`, jq filtering happens after limiting. We also + # cannot just use `--workflow` because GH gets confused from + # different file names in history. + f'--jq=[.[] | select(.workflowName == "{WORKFLOW_NAME}")][0]', + ], + text=True, + ) + except sp.CalledProcessError as e: + eprint(f"failed to run github command: {e}") + return + + try: + latest = json.loads(latest_job) + eprint("latest job: ", json.dumps(latest, indent=4)) + except json.JSONDecodeError as e: + eprint(f"failed to decode json '{latest_job}', {e}") + return + + if not download: + eprint("--download not specified, returning") + return + + job_id = latest.get("databaseId") + if job_id is None: + eprint("skipping download step") + return + + sp.run( + ["gh", "run", "download", str(job_id), f"--pattern={ARTIFACT_GLOB}"], + check=False, + ) + + if not extract: + eprint("skipping extraction step") + return + + # Find the baseline with the most recent timestamp. GH downloads the files to e.g. + # `some-dirname/some-dirname.tar.xz`, so just glob the whole thing together. + candidate_baselines = glob(f"{ARTIFACT_GLOB}/{ARTIFACT_GLOB}") + if len(candidate_baselines) == 0: + eprint("no possible baseline directories found") + return + + candidate_baselines.sort(reverse=True) + baseline_archive = candidate_baselines[0] + eprint(f"extracting {baseline_archive}") + sp.run(["tar", "xJvf", baseline_archive], check=True) + eprint("baseline extracted successfully") + + +def check_iai_regressions(args: list[str]): + """Find regressions in iai summary.json files, exit with failure if any are + found. + """ + + iai_home_str = "iai-home" + pr_number = None + + while len(args) > 0: + match args: + case ["--home", home, *rest]: + iai_home_str = home + args = rest + case ["--allow-pr-override", pr_num, *rest]: + pr_number = pr_num + args = rest + case _: + eprint(USAGE) + exit(1) + + iai_home = Path(iai_home_str) + + found_summaries = False + regressions: list[dict] = [] + for summary_path in iglob("**/summary.json", root_dir=iai_home, recursive=True): + found_summaries = True + with open(iai_home / summary_path, "r") as f: + summary = json.load(f) + + summary_regs = [] + run = summary["callgrind_summary"]["callgrind_run"] + fname = summary["function_name"] + id = summary["id"] + name_entry = {"name": f"{fname}.{id}"} + + for segment in run["segments"]: + summary_regs.extend(segment["regressions"]) + + summary_regs.extend(run["total"]["regressions"]) + + regressions.extend(name_entry | reg for reg in summary_regs) + + if not found_summaries: + eprint(f"did not find any summary.json files within {iai_home}") + exit(1) + + if len(regressions) == 0: + eprint("No regressions found") + return + + eprint("Found regressions:", json.dumps(regressions, indent=4)) + + if pr_number is not None: + pr = PrInfo.load(pr_number) + if pr.contains_directive(REGRESSION_DIRECTIVE): + eprint("PR allows regressions, returning") + return + + exit(1) + + +def main(): + match sys.argv[1:]: + case ["generate-matrix"]: + ctx = Context() + ctx.emit_workflow_output() + case ["locate-baseline", *flags]: + locate_baseline(flags) + case ["check-regressions", *args]: + check_iai_regressions(args) + case ["--help" | "-h"]: + print(USAGE) + exit() + case _: + eprint(USAGE) + exit(1) + + +if __name__ == "__main__": + main() diff --git a/library/compiler-builtins/ci/docker/aarch64-unknown-linux-gnu/Dockerfile b/library/compiler-builtins/ci/docker/aarch64-unknown-linux-gnu/Dockerfile new file mode 100644 index 0000000000000..df71804ba235e --- /dev/null +++ b/library/compiler-builtins/ci/docker/aarch64-unknown-linux-gnu/Dockerfile @@ -0,0 +1,16 @@ +ARG IMAGE=ubuntu:24.04 +FROM $IMAGE + +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + gcc libc6-dev ca-certificates \ + gcc-aarch64-linux-gnu m4 make libc6-dev-arm64-cross \ + qemu-user-static + +ENV TOOLCHAIN_PREFIX=aarch64-linux-gnu- +ENV CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_LINKER="$TOOLCHAIN_PREFIX"gcc \ + CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_RUNNER=qemu-aarch64-static \ + AR_aarch64_unknown_linux_gnu="$TOOLCHAIN_PREFIX"ar \ + CC_aarch64_unknown_linux_gnu="$TOOLCHAIN_PREFIX"gcc \ + QEMU_LD_PREFIX=/usr/aarch64-linux-gnu \ + RUST_TEST_THREADS=1 diff --git a/library/compiler-builtins/ci/docker/arm-unknown-linux-gnueabi/Dockerfile b/library/compiler-builtins/ci/docker/arm-unknown-linux-gnueabi/Dockerfile new file mode 100644 index 0000000000000..38ad1a136236c --- /dev/null +++ b/library/compiler-builtins/ci/docker/arm-unknown-linux-gnueabi/Dockerfile @@ -0,0 +1,15 @@ +ARG IMAGE=ubuntu:24.04 +FROM $IMAGE + +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + gcc libc6-dev ca-certificates \ + gcc-arm-linux-gnueabi libc6-dev-armel-cross qemu-user-static + +ENV TOOLCHAIN_PREFIX=arm-linux-gnueabi- +ENV CARGO_TARGET_ARM_UNKNOWN_LINUX_GNUEABI_LINKER="$TOOLCHAIN_PREFIX"gcc \ + CARGO_TARGET_ARM_UNKNOWN_LINUX_GNUEABI_RUNNER=qemu-arm-static \ + AR_arm_unknown_linux_gnueabi="$TOOLCHAIN_PREFIX"ar \ + CC_arm_unknown_linux_gnueabi="$TOOLCHAIN_PREFIX"gcc \ + QEMU_LD_PREFIX=/usr/arm-linux-gnueabi \ + RUST_TEST_THREADS=1 diff --git a/library/compiler-builtins/ci/docker/arm-unknown-linux-gnueabihf/Dockerfile b/library/compiler-builtins/ci/docker/arm-unknown-linux-gnueabihf/Dockerfile new file mode 100644 index 0000000000000..ffead05d5f22f --- /dev/null +++ b/library/compiler-builtins/ci/docker/arm-unknown-linux-gnueabihf/Dockerfile @@ -0,0 +1,15 @@ +ARG IMAGE=ubuntu:24.04 +FROM $IMAGE + +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + gcc libc6-dev ca-certificates \ + gcc-arm-linux-gnueabihf libc6-dev-armhf-cross qemu-user-static + +ENV TOOLCHAIN_PREFIX=arm-linux-gnueabihf- +ENV CARGO_TARGET_ARM_UNKNOWN_LINUX_GNUEABIHF_LINKER="$TOOLCHAIN_PREFIX"gcc \ + CARGO_TARGET_ARM_UNKNOWN_LINUX_GNUEABIHF_RUNNER=qemu-arm-static \ + AR_arm_unknown_linux_gnueabihf="$TOOLCHAIN_PREFIX"ar \ + CC_arm_unknown_linux_gnueabihf="$TOOLCHAIN_PREFIX"gcc \ + QEMU_LD_PREFIX=/usr/arm-linux-gnueabihf \ + RUST_TEST_THREADS=1 diff --git a/library/compiler-builtins/ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile b/library/compiler-builtins/ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile new file mode 100644 index 0000000000000..9ab49e46ee3aa --- /dev/null +++ b/library/compiler-builtins/ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile @@ -0,0 +1,15 @@ +ARG IMAGE=ubuntu:24.04 +FROM $IMAGE + +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + gcc libc6-dev ca-certificates \ + gcc-arm-linux-gnueabihf libc6-dev-armhf-cross qemu-user-static + +ENV TOOLCHAIN_PREFIX=arm-linux-gnueabihf- +ENV CARGO_TARGET_ARMV7_UNKNOWN_LINUX_GNUEABIHF_LINKER="$TOOLCHAIN_PREFIX"gcc \ + CARGO_TARGET_ARMV7_UNKNOWN_LINUX_GNUEABIHF_RUNNER=qemu-arm-static \ + AR_armv7_unknown_linux_gnueabihf="$TOOLCHAIN_PREFIX"ar \ + CC_armv7_unknown_linux_gnueabihf="$TOOLCHAIN_PREFIX"gcc \ + QEMU_LD_PREFIX=/usr/arm-linux-gnueabihf \ + RUST_TEST_THREADS=1 diff --git a/library/compiler-builtins/ci/docker/i586-unknown-linux-gnu/Dockerfile b/library/compiler-builtins/ci/docker/i586-unknown-linux-gnu/Dockerfile new file mode 100644 index 0000000000000..d12ced3257fe3 --- /dev/null +++ b/library/compiler-builtins/ci/docker/i586-unknown-linux-gnu/Dockerfile @@ -0,0 +1,6 @@ +ARG IMAGE=ubuntu:24.04 +FROM $IMAGE + +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + gcc-multilib m4 make libc6-dev ca-certificates diff --git a/library/compiler-builtins/ci/docker/i686-unknown-linux-gnu/Dockerfile b/library/compiler-builtins/ci/docker/i686-unknown-linux-gnu/Dockerfile new file mode 100644 index 0000000000000..d12ced3257fe3 --- /dev/null +++ b/library/compiler-builtins/ci/docker/i686-unknown-linux-gnu/Dockerfile @@ -0,0 +1,6 @@ +ARG IMAGE=ubuntu:24.04 +FROM $IMAGE + +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + gcc-multilib m4 make libc6-dev ca-certificates diff --git a/library/compiler-builtins/ci/docker/loongarch64-unknown-linux-gnu/Dockerfile b/library/compiler-builtins/ci/docker/loongarch64-unknown-linux-gnu/Dockerfile new file mode 100644 index 0000000000000..62b43da9e70d6 --- /dev/null +++ b/library/compiler-builtins/ci/docker/loongarch64-unknown-linux-gnu/Dockerfile @@ -0,0 +1,14 @@ +ARG IMAGE=ubuntu:24.04 +FROM $IMAGE + +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + gcc libc6-dev qemu-user-static ca-certificates \ + gcc-14-loongarch64-linux-gnu libc6-dev-loong64-cross + +ENV CARGO_TARGET_LOONGARCH64_UNKNOWN_LINUX_GNU_LINKER=loongarch64-linux-gnu-gcc-14 \ + CARGO_TARGET_LOONGARCH64_UNKNOWN_LINUX_GNU_RUNNER=qemu-loongarch64-static \ + AR_loongarch64_unknown_linux_gnu=loongarch64-linux-gnu-ar \ + CC_loongarch64_unknown_linux_gnu=loongarch64-linux-gnu-gcc-14 \ + QEMU_LD_PREFIX=/usr/loongarch64-linux-gnu \ + RUST_TEST_THREADS=1 diff --git a/library/compiler-builtins/ci/docker/mips-unknown-linux-gnu/Dockerfile b/library/compiler-builtins/ci/docker/mips-unknown-linux-gnu/Dockerfile new file mode 100644 index 0000000000000..c02a94672340e --- /dev/null +++ b/library/compiler-builtins/ci/docker/mips-unknown-linux-gnu/Dockerfile @@ -0,0 +1,16 @@ +ARG IMAGE=ubuntu:24.04 +FROM $IMAGE + +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + gcc libc6-dev ca-certificates \ + gcc-mips-linux-gnu libc6-dev-mips-cross \ + binfmt-support qemu-user-static qemu-system-mips + +ENV TOOLCHAIN_PREFIX=mips-linux-gnu- +ENV CARGO_TARGET_MIPS_UNKNOWN_LINUX_GNU_LINKER="$TOOLCHAIN_PREFIX"gcc \ + CARGO_TARGET_MIPS_UNKNOWN_LINUX_GNU_RUNNER=qemu-mips-static \ + AR_mips_unknown_linux_gnu="$TOOLCHAIN_PREFIX"ar \ + CC_mips_unknown_linux_gnu="$TOOLCHAIN_PREFIX"gcc \ + QEMU_LD_PREFIX=/usr/mips-linux-gnu \ + RUST_TEST_THREADS=1 diff --git a/library/compiler-builtins/ci/docker/mips64-unknown-linux-gnuabi64/Dockerfile b/library/compiler-builtins/ci/docker/mips64-unknown-linux-gnuabi64/Dockerfile new file mode 100644 index 0000000000000..6d8b96069bedb --- /dev/null +++ b/library/compiler-builtins/ci/docker/mips64-unknown-linux-gnuabi64/Dockerfile @@ -0,0 +1,20 @@ +ARG IMAGE=ubuntu:24.04 +FROM $IMAGE + +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + ca-certificates \ + gcc \ + gcc-mips64-linux-gnuabi64 \ + libc6-dev \ + libc6-dev-mips64-cross \ + qemu-user-static \ + qemu-system-mips + +ENV TOOLCHAIN_PREFIX=mips64-linux-gnuabi64- +ENV CARGO_TARGET_MIPS64_UNKNOWN_LINUX_GNUABI64_LINKER="$TOOLCHAIN_PREFIX"gcc \ + CARGO_TARGET_MIPS64_UNKNOWN_LINUX_GNUABI64_RUNNER=qemu-mips64-static \ + AR_mips64_unknown_linux_gnuabi64="$TOOLCHAIN_PREFIX"ar \ + CC_mips64_unknown_linux_gnuabi64="$TOOLCHAIN_PREFIX"gcc \ + QEMU_LD_PREFIX=/usr/mips64-linux-gnuabi64 \ + RUST_TEST_THREADS=1 diff --git a/library/compiler-builtins/ci/docker/mips64el-unknown-linux-gnuabi64/Dockerfile b/library/compiler-builtins/ci/docker/mips64el-unknown-linux-gnuabi64/Dockerfile new file mode 100644 index 0000000000000..7e6ac7c3b8aae --- /dev/null +++ b/library/compiler-builtins/ci/docker/mips64el-unknown-linux-gnuabi64/Dockerfile @@ -0,0 +1,19 @@ +ARG IMAGE=ubuntu:24.04 +FROM $IMAGE + +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + ca-certificates \ + gcc \ + gcc-mips64el-linux-gnuabi64 \ + libc6-dev \ + libc6-dev-mips64el-cross \ + qemu-user-static + +ENV TOOLCHAIN_PREFIX=mips64el-linux-gnuabi64- +ENV CARGO_TARGET_MIPS64EL_UNKNOWN_LINUX_GNUABI64_LINKER="$TOOLCHAIN_PREFIX"gcc \ + CARGO_TARGET_MIPS64EL_UNKNOWN_LINUX_GNUABI64_RUNNER=qemu-mips64el-static \ + AR_mips64el_unknown_linux_gnuabi64="$TOOLCHAIN_PREFIX"ar \ + CC_mips64el_unknown_linux_gnuabi64="$TOOLCHAIN_PREFIX"gcc \ + QEMU_LD_PREFIX=/usr/mips64el-linux-gnuabi64 \ + RUST_TEST_THREADS=1 diff --git a/library/compiler-builtins/ci/docker/mipsel-unknown-linux-gnu/Dockerfile b/library/compiler-builtins/ci/docker/mipsel-unknown-linux-gnu/Dockerfile new file mode 100644 index 0000000000000..9feadc7b5ce10 --- /dev/null +++ b/library/compiler-builtins/ci/docker/mipsel-unknown-linux-gnu/Dockerfile @@ -0,0 +1,16 @@ +ARG IMAGE=ubuntu:24.04 +FROM $IMAGE + +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + gcc libc6-dev ca-certificates \ + gcc-mipsel-linux-gnu libc6-dev-mipsel-cross \ + binfmt-support qemu-user-static + +ENV TOOLCHAIN_PREFIX=mipsel-linux-gnu- +ENV CARGO_TARGET_MIPSEL_UNKNOWN_LINUX_GNU_LINKER="$TOOLCHAIN_PREFIX"gcc \ + CARGO_TARGET_MIPSEL_UNKNOWN_LINUX_GNU_RUNNER=qemu-mipsel-static \ + AR_mipsel_unknown_linux_gnu="$TOOLCHAIN_PREFIX"ar \ + CC_mipsel_unknown_linux_gnu="$TOOLCHAIN_PREFIX"gcc \ + QEMU_LD_PREFIX=/usr/mipsel-linux-gnu \ + RUST_TEST_THREADS=1 diff --git a/library/compiler-builtins/ci/docker/powerpc-unknown-linux-gnu/Dockerfile b/library/compiler-builtins/ci/docker/powerpc-unknown-linux-gnu/Dockerfile new file mode 100644 index 0000000000000..84dcaf47ed5d1 --- /dev/null +++ b/library/compiler-builtins/ci/docker/powerpc-unknown-linux-gnu/Dockerfile @@ -0,0 +1,16 @@ +ARG IMAGE=ubuntu:24.04 +FROM $IMAGE + +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + gcc libc6-dev qemu-user-static ca-certificates \ + gcc-powerpc-linux-gnu libc6-dev-powerpc-cross \ + qemu-system-ppc + +ENV TOOLCHAIN_PREFIX=powerpc-linux-gnu- +ENV CARGO_TARGET_POWERPC_UNKNOWN_LINUX_GNU_LINKER="$TOOLCHAIN_PREFIX"gcc \ + CARGO_TARGET_POWERPC_UNKNOWN_LINUX_GNU_RUNNER=qemu-ppc-static \ + AR_powerpc_unknown_linux_gnu="$TOOLCHAIN_PREFIX"ar \ + CC_powerpc_unknown_linux_gnu="$TOOLCHAIN_PREFIX"gcc \ + QEMU_LD_PREFIX=/usr/powerpc-linux-gnu \ + RUST_TEST_THREADS=1 diff --git a/library/compiler-builtins/ci/docker/powerpc64-unknown-linux-gnu/Dockerfile b/library/compiler-builtins/ci/docker/powerpc64-unknown-linux-gnu/Dockerfile new file mode 100644 index 0000000000000..b90fd5ec54562 --- /dev/null +++ b/library/compiler-builtins/ci/docker/powerpc64-unknown-linux-gnu/Dockerfile @@ -0,0 +1,16 @@ +ARG IMAGE=ubuntu:24.04 +FROM $IMAGE + +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + gcc libc6-dev ca-certificates \ + gcc-powerpc64-linux-gnu libc6-dev-ppc64-cross \ + binfmt-support qemu-user-static qemu-system-ppc + +ENV TOOLCHAIN_PREFIX=powerpc64-linux-gnu- +ENV CARGO_TARGET_POWERPC64_UNKNOWN_LINUX_GNU_LINKER="$TOOLCHAIN_PREFIX"gcc \ + CARGO_TARGET_POWERPC64_UNKNOWN_LINUX_GNU_RUNNER=qemu-ppc64-static \ + AR_powerpc64_unknown_linux_gnu="$TOOLCHAIN_PREFIX"ar \ + CC_powerpc64_unknown_linux_gnu="$TOOLCHAIN_PREFIX"gcc \ + QEMU_LD_PREFIX=/usr/powerpc64-linux-gnu \ + RUST_TEST_THREADS=1 diff --git a/library/compiler-builtins/ci/docker/powerpc64le-unknown-linux-gnu/Dockerfile b/library/compiler-builtins/ci/docker/powerpc64le-unknown-linux-gnu/Dockerfile new file mode 100644 index 0000000000000..e6d1d1cd0b53f --- /dev/null +++ b/library/compiler-builtins/ci/docker/powerpc64le-unknown-linux-gnu/Dockerfile @@ -0,0 +1,17 @@ +ARG IMAGE=ubuntu:24.04 +FROM $IMAGE + +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + gcc libc6-dev qemu-user-static ca-certificates \ + gcc-powerpc64le-linux-gnu libc6-dev-ppc64el-cross \ + qemu-system-ppc + +ENV TOOLCHAIN_PREFIX=powerpc64le-linux-gnu- +ENV CARGO_TARGET_POWERPC64LE_UNKNOWN_LINUX_GNU_LINKER="$TOOLCHAIN_PREFIX"gcc \ + CARGO_TARGET_POWERPC64LE_UNKNOWN_LINUX_GNU_RUNNER=qemu-ppc64le-static \ + AR_powerpc64le_unknown_linux_gnu="$TOOLCHAIN_PREFIX"ar \ + CC_powerpc64le_unknown_linux_gnu="$TOOLCHAIN_PREFIX"gcc \ + QEMU_CPU=POWER8 \ + QEMU_LD_PREFIX=/usr/powerpc64le-linux-gnu \ + RUST_TEST_THREADS=1 diff --git a/library/compiler-builtins/ci/docker/riscv64gc-unknown-linux-gnu/Dockerfile b/library/compiler-builtins/ci/docker/riscv64gc-unknown-linux-gnu/Dockerfile new file mode 100644 index 0000000000000..eeb4ed0193e2d --- /dev/null +++ b/library/compiler-builtins/ci/docker/riscv64gc-unknown-linux-gnu/Dockerfile @@ -0,0 +1,16 @@ +ARG IMAGE=ubuntu:24.04 +FROM $IMAGE + +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + gcc libc6-dev qemu-user-static ca-certificates \ + gcc-riscv64-linux-gnu libc6-dev-riscv64-cross \ + qemu-system-riscv64 + +ENV TOOLCHAIN_PREFIX=riscv64-linux-gnu- +ENV CARGO_TARGET_RISCV64GC_UNKNOWN_LINUX_GNU_LINKER="$TOOLCHAIN_PREFIX"gcc \ + CARGO_TARGET_RISCV64GC_UNKNOWN_LINUX_GNU_RUNNER=qemu-riscv64-static \ + AR_riscv64gc_unknown_linux_gnu="$TOOLCHAIN_PREFIX"ar \ + CC_riscv64gc_unknown_linux_gnu="$TOOLCHAIN_PREFIX"gcc \ + QEMU_LD_PREFIX=/usr/riscv64-linux-gnu \ + RUST_TEST_THREADS=1 diff --git a/library/compiler-builtins/ci/docker/thumbv6m-none-eabi/Dockerfile b/library/compiler-builtins/ci/docker/thumbv6m-none-eabi/Dockerfile new file mode 100644 index 0000000000000..ad0d4351ea654 --- /dev/null +++ b/library/compiler-builtins/ci/docker/thumbv6m-none-eabi/Dockerfile @@ -0,0 +1,9 @@ +ARG IMAGE=ubuntu:24.04 +FROM $IMAGE + +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + gcc libc6-dev ca-certificates \ + gcc-arm-none-eabi \ + libnewlib-arm-none-eabi +ENV BUILD_ONLY=1 diff --git a/library/compiler-builtins/ci/docker/thumbv7em-none-eabi/Dockerfile b/library/compiler-builtins/ci/docker/thumbv7em-none-eabi/Dockerfile new file mode 100644 index 0000000000000..ad0d4351ea654 --- /dev/null +++ b/library/compiler-builtins/ci/docker/thumbv7em-none-eabi/Dockerfile @@ -0,0 +1,9 @@ +ARG IMAGE=ubuntu:24.04 +FROM $IMAGE + +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + gcc libc6-dev ca-certificates \ + gcc-arm-none-eabi \ + libnewlib-arm-none-eabi +ENV BUILD_ONLY=1 diff --git a/library/compiler-builtins/ci/docker/thumbv7em-none-eabihf/Dockerfile b/library/compiler-builtins/ci/docker/thumbv7em-none-eabihf/Dockerfile new file mode 100644 index 0000000000000..ad0d4351ea654 --- /dev/null +++ b/library/compiler-builtins/ci/docker/thumbv7em-none-eabihf/Dockerfile @@ -0,0 +1,9 @@ +ARG IMAGE=ubuntu:24.04 +FROM $IMAGE + +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + gcc libc6-dev ca-certificates \ + gcc-arm-none-eabi \ + libnewlib-arm-none-eabi +ENV BUILD_ONLY=1 diff --git a/library/compiler-builtins/ci/docker/thumbv7m-none-eabi/Dockerfile b/library/compiler-builtins/ci/docker/thumbv7m-none-eabi/Dockerfile new file mode 100644 index 0000000000000..ad0d4351ea654 --- /dev/null +++ b/library/compiler-builtins/ci/docker/thumbv7m-none-eabi/Dockerfile @@ -0,0 +1,9 @@ +ARG IMAGE=ubuntu:24.04 +FROM $IMAGE + +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + gcc libc6-dev ca-certificates \ + gcc-arm-none-eabi \ + libnewlib-arm-none-eabi +ENV BUILD_ONLY=1 diff --git a/library/compiler-builtins/ci/docker/wasm32-unknown-unknown/Dockerfile b/library/compiler-builtins/ci/docker/wasm32-unknown-unknown/Dockerfile new file mode 100644 index 0000000000000..2813d318670ea --- /dev/null +++ b/library/compiler-builtins/ci/docker/wasm32-unknown-unknown/Dockerfile @@ -0,0 +1,8 @@ +ARG IMAGE=ubuntu:20.04 +FROM $IMAGE + +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + gcc clang libc6-dev ca-certificates + +ENV CARGO_TARGET_WASM32_UNKNOWN_UNKNOWN_RUNNER=true diff --git a/library/compiler-builtins/ci/docker/x86_64-unknown-linux-gnu/Dockerfile b/library/compiler-builtins/ci/docker/x86_64-unknown-linux-gnu/Dockerfile new file mode 100644 index 0000000000000..c590adcddf64d --- /dev/null +++ b/library/compiler-builtins/ci/docker/x86_64-unknown-linux-gnu/Dockerfile @@ -0,0 +1,6 @@ +ARG IMAGE=ubuntu:24.04 +FROM $IMAGE + +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + gcc m4 make libc6-dev ca-certificates diff --git a/library/compiler-builtins/ci/download-compiler-rt.sh b/library/compiler-builtins/ci/download-compiler-rt.sh new file mode 100755 index 0000000000000..bf7f8c2489643 --- /dev/null +++ b/library/compiler-builtins/ci/download-compiler-rt.sh @@ -0,0 +1,10 @@ +#!/bin/sh +# Download sources to build C versions of intrinsics. Once being run, +# `RUST_COMPILER_RT_ROOT` must be set. + +set -eux + +rust_llvm_version=20.1-2025-02-13 + +curl -L -o code.tar.gz "https://github.com/rust-lang/llvm-project/archive/rustc/${rust_llvm_version}.tar.gz" +tar xzf code.tar.gz --strip-components 1 llvm-project-rustc-${rust_llvm_version}/compiler-rt diff --git a/library/compiler-builtins/ci/miri.sh b/library/compiler-builtins/ci/miri.sh new file mode 100755 index 0000000000000..7b0ea44c690f3 --- /dev/null +++ b/library/compiler-builtins/ci/miri.sh @@ -0,0 +1,18 @@ +#!/bin/bash +set -eux + +# We need Tree Borrows as some of our raw pointer patterns are not +# compatible with Stacked Borrows. +export MIRIFLAGS="-Zmiri-tree-borrows" + +# One target that sets `mem-unaligned` and one that does not, +# and a big-endian target. +targets=( + x86_64-unknown-linux-gnu + armv7-unknown-linux-gnueabihf + s390x-unknown-linux-gnu +) +for target in "${targets[@]}"; do + # Only run the `mem` tests to avoid this taking too long. + cargo miri test --manifest-path builtins-test/Cargo.toml --features no-asm --target "$target" -- mem +done diff --git a/library/compiler-builtins/ci/run-docker.sh b/library/compiler-builtins/ci/run-docker.sh new file mode 100755 index 0000000000000..d0122dee5c89a --- /dev/null +++ b/library/compiler-builtins/ci/run-docker.sh @@ -0,0 +1,111 @@ +#!/bin/bash + +# Small script to run tests for a target (or all targets) inside all the +# respective docker images. + +set -euxo pipefail + +host_arch="$(uname -m | sed 's/arm64/aarch64/')" + +# Directories and files that do not yet exist need to be created before +# calling docker, otherwise docker will create them but they will be owned +# by root. +mkdir -p target +cargo generate-lockfile +cargo generate-lockfile --manifest-path builtins-test-intrinsics/Cargo.toml + +run() { + local target="$1" + + echo "testing target: $target" + + emulated="" + target_arch="$(echo "$target" | cut -d'-' -f1)" + if [ "$target_arch" != "$host_arch" ]; then + emulated=1 + echo "target is emulated" + fi + + run_cmd="HOME=/tmp" + + if [ "${GITHUB_ACTIONS:-}" = "true" ]; then + # Enable Docker image caching on GHA + build_cmd=("buildx" "build") + build_args=( + "--cache-from" "type=local,src=/tmp/.buildx-cache" + "--cache-to" "type=local,dest=/tmp/.buildx-cache-new" + # This is the beautiful bash syntax for expanding an array but neither + # raising an error nor returning an empty string if the array is empty. + "${build_args[@]:+"${build_args[@]}"}" + "--load" + ) + fi + + if [ "$(uname -s)" = "Linux" ] && [ -z "${DOCKER_BASE_IMAGE:-}" ]; then + # Share the host rustc and target. Do this only on Linux and if the image + # isn't overridden + run_args=( + --user "$(id -u):$(id -g)" + -e "CARGO_HOME=/cargo" + -v "${HOME}/.cargo:/cargo" + -v "$(pwd)/target:/builtins-target" + -v "$(rustc --print sysroot):/rust:ro" + ) + run_cmd="$run_cmd PATH=\$PATH:/rust/bin:/cargo/bin" + else + # Use rustc provided by a docker image + docker volume create compiler-builtins-cache + build_args=( + "--build-arg" + "IMAGE=${DOCKER_BASE_IMAGE:-rustlang/rust:nightly}" + ) + run_args=(-v "compiler-builtins-cache:/builtins-target") + run_cmd="$run_cmd HOME=/tmp" "USING_CONTAINER_RUSTC=1" + fi + + if [ -d compiler-rt ]; then + export RUST_COMPILER_RT_ROOT="/checkout/compiler-rt" + fi + + run_cmd="$run_cmd ci/run.sh $target" + + docker "${build_cmd[@]:-build}" \ + -t "builtins-$target" \ + "${build_args[@]:-}" \ + "ci/docker/$target" + docker run \ + --rm \ + -e CI \ + -e CARGO_TARGET_DIR=/builtins-target \ + -e CARGO_TERM_COLOR \ + -e MAY_SKIP_LIBM_CI \ + -e RUSTFLAGS \ + -e RUST_BACKTRACE \ + -e RUST_COMPILER_RT_ROOT \ + -e "EMULATED=$emulated" \ + -v "$(pwd):/checkout:ro" \ + -w /checkout \ + "${run_args[@]:-}" \ + --init \ + "builtins-$target" \ + sh -c "$run_cmd" +} + +if [ "${1:-}" = "--help" ] || [ "$#" -gt 1 ]; then + set +x + echo "\ + usage: ./ci/run-docker.sh [target] + + you can also set DOCKER_BASE_IMAGE to use something other than the default + ubuntu:24.04 (or rustlang/rust:nightly). + " + exit +fi + +if [ -z "${1:-}" ]; then + for d in ci/docker/*; do + run $(basename "$d") + done +else + run "$1" +fi diff --git a/library/compiler-builtins/ci/run-extensive.sh b/library/compiler-builtins/ci/run-extensive.sh new file mode 100755 index 0000000000000..4ba41a026fab6 --- /dev/null +++ b/library/compiler-builtins/ci/run-extensive.sh @@ -0,0 +1,24 @@ +#!/bin/bash + +set -euo pipefail + +echo "Tests to run: '$TO_TEST'" + +if [ -z "$TO_TEST" ]; then + echo "No tests to run, exiting." + exit +fi + +set -x + +test_cmd=( + cargo test + --package libm-test + --features "build-mpfr,libm/unstable,libm/force-soft-floats" + --profile release-checked +) + +# Run the non-extensive tests first to catch any easy failures +"${test_cmd[@]}" -- "$TO_TEST" + +LIBM_EXTENSIVE_TESTS="$TO_TEST" "${test_cmd[@]}" -- extensive diff --git a/library/compiler-builtins/ci/run.sh b/library/compiler-builtins/ci/run.sh new file mode 100755 index 0000000000000..68d13c130bcce --- /dev/null +++ b/library/compiler-builtins/ci/run.sh @@ -0,0 +1,302 @@ +#!/bin/bash + +set -eux + +export RUST_BACKTRACE="${RUST_BACKTRACE:-full}" +export NEXTEST_STATUS_LEVEL=all + +target="${1:-}" + +if [ -z "$target" ]; then + host_target=$(rustc -vV | awk '/^host/ { print $2 }') + echo "Defaulted to host target $host_target" + target="$host_target" +fi + +if [[ "$target" = *"wasm"* ]]; then + # Enable the random backend + export RUSTFLAGS="${RUSTFLAGS:-} --cfg getrandom_backend=\"wasm_js\"" +fi + +if [ "${USING_CONTAINER_RUSTC:-}" = 1 ]; then + # Install nonstandard components if we have control of the environment + rustup target list --installed | + grep -E "^$target\$" || + rustup target add "$target" +fi + +# Test our implementation +if [ "${BUILD_ONLY:-}" = "1" ]; then + echo "no tests to run for build-only targets" +else + test_builtins=(cargo test --package builtins-test --no-fail-fast --target "$target") + "${test_builtins[@]}" + "${test_builtins[@]}" --release + "${test_builtins[@]}" --features c + "${test_builtins[@]}" --features c --release + "${test_builtins[@]}" --features no-asm + "${test_builtins[@]}" --features no-asm --release + "${test_builtins[@]}" --features no-f16-f128 + "${test_builtins[@]}" --features no-f16-f128 --release + "${test_builtins[@]}" --benches + "${test_builtins[@]}" --benches --release + + if [ "${TEST_VERBATIM:-}" = "1" ]; then + verb_path=$(cmd.exe //C echo \\\\?\\%cd%\\builtins-test\\target2) + "${test_builtins[@]}" --target-dir "$verb_path" --features c + fi +fi + + +declare -a rlib_paths + +# Set the `rlib_paths` global array to a list of all compiler-builtins rlibs +update_rlib_paths() { + if [ -d /builtins-target ]; then + rlib_paths=( /builtins-target/"${target}"/debug/deps/libcompiler_builtins-*.rlib ) + else + rlib_paths=( target/"${target}"/debug/deps/libcompiler_builtins-*.rlib ) + fi +} + +# Remove any existing artifacts from previous tests that don't set #![compiler_builtins] +update_rlib_paths +rm -f "${rlib_paths[@]}" + +cargo build -p compiler_builtins --target "$target" +cargo build -p compiler_builtins --target "$target" --release +cargo build -p compiler_builtins --target "$target" --features c +cargo build -p compiler_builtins --target "$target" --features c --release +cargo build -p compiler_builtins --target "$target" --features no-asm +cargo build -p compiler_builtins --target "$target" --features no-asm --release +cargo build -p compiler_builtins --target "$target" --features no-f16-f128 +cargo build -p compiler_builtins --target "$target" --features no-f16-f128 --release + +PREFIX=${target//unknown-/}- +case "$target" in + armv7-*) + PREFIX=arm-linux-gnueabihf- + ;; + thumb*) + PREFIX=arm-none-eabi- + ;; + *86*-*) + PREFIX= + ;; +esac + +NM=$(find "$(rustc --print sysroot)" \( -name llvm-nm -o -name llvm-nm.exe \) ) +if [ "$NM" = "" ]; then + NM="${PREFIX}nm" +fi + +# i686-pc-windows-gnu tools have a dependency on some DLLs, so run it with +# rustup run to ensure that those are in PATH. +TOOLCHAIN="$(rustup show active-toolchain | sed 's/ (default)//')" +if [[ "$TOOLCHAIN" == *i686-pc-windows-gnu ]]; then + NM="rustup run $TOOLCHAIN $NM" +fi + +# Look out for duplicated symbols when we include the compiler-rt (C) implementation +update_rlib_paths +for rlib in "${rlib_paths[@]}"; do + set +x + echo "================================================================" + echo "checking $rlib for duplicate symbols" + echo "================================================================" + set -x + + duplicates_found=0 + + # NOTE On i586, It's normal that the get_pc_thunk symbol appears several + # times so ignore it + $NM -g --defined-only "$rlib" 2>&1 | + sort | + uniq -d | + grep -v __x86.get_pc_thunk --quiet | + grep 'T __' && duplicates_found=1 + + if [ "$duplicates_found" != 0 ]; then + echo "error: found duplicate symbols" + exit 1 + else + echo "success; no duplicate symbols found" + fi +done + +rm -f "${rlib_paths[@]}" + +build_intrinsics_test() { + cargo build \ + --target "$target" --verbose \ + --manifest-path builtins-test-intrinsics/Cargo.toml "$@" +} + +# Verify that we haven't dropped any intrinsics/symbols +build_intrinsics_test +build_intrinsics_test --release +build_intrinsics_test --features c +build_intrinsics_test --features c --release + +# Verify that there are no undefined symbols to `panic` within our +# implementations +CARGO_PROFILE_DEV_LTO=true build_intrinsics_test +CARGO_PROFILE_RELEASE_LTO=true build_intrinsics_test --release + +# Ensure no references to any symbols from core +update_rlib_paths +for rlib in "${rlib_paths[@]}"; do + set +x + echo "================================================================" + echo "checking $rlib for references to core" + echo "================================================================" + set -x + + tmpdir="${CARGO_TARGET_DIR:-target}/tmp" + test -d "$tmpdir" || mkdir "$tmpdir" + defined="$tmpdir/defined_symbols.txt" + undefined="$tmpdir/defined_symbols.txt" + + $NM --quiet -U "$rlib" | grep 'T _ZN4core' | awk '{print $3}' | sort | uniq > "$defined" + $NM --quiet -u "$rlib" | grep 'U _ZN4core' | awk '{print $2}' | sort | uniq > "$undefined" + grep_has_results=0 + grep -v -F -x -f "$defined" "$undefined" && grep_has_results=1 + + if [ "$target" = "powerpc64-unknown-linux-gnu" ]; then + echo "FIXME: powerpc64 fails these tests" + elif [ "$grep_has_results" != 0 ]; then + echo "error: found unexpected references to core" + exit 1 + else + echo "success; no references to core found" + fi +done + +# Test libm + +# Make sure a simple build works +cargo check -p libm --no-default-features --target "$target" + +if [ "${MAY_SKIP_LIBM_CI:-}" = "true" ]; then + echo "skipping libm PR CI" + exit +fi + +mflags=() + +# We enumerate features manually. +mflags+=(--no-default-features) + +# Enable arch-specific routines when available. +mflags+=(--features arch) + +# Always enable `unstable-float` since it expands available API but does not +# change any implementations. +mflags+=(--features unstable-float) + +# We need to specifically skip tests for musl-math-sys on systems that can't +# build musl since otherwise `--all` will activate it. +case "$target" in + # Can't build at all on MSVC, WASM, or thumb + *windows-msvc*) mflags+=(--exclude musl-math-sys) ;; + *wasm*) mflags+=(--exclude musl-math-sys) ;; + *thumb*) mflags+=(--exclude musl-math-sys) ;; + + # We can build musl on MinGW but running tests gets a stack overflow + *windows-gnu*) ;; + # FIXME(#309): LE PPC crashes calling the musl version of some functions. It + # seems like a qemu bug but should be investigated further at some point. + # See . + *powerpc64le*) ;; + + # Everything else gets musl enabled + *) mflags+=(--features libm-test/build-musl) ;; +esac + + +# Configure which targets test against MPFR +case "$target" in + # MSVC cannot link MPFR + *windows-msvc*) ;; + # FIXME: MinGW should be able to build MPFR, but setup in CI is nontrivial. + *windows-gnu*) ;; + # Targets that aren't cross compiled in CI work fine + aarch64*apple*) mflags+=(--features libm-test/build-mpfr) ;; + aarch64*linux*) mflags+=(--features libm-test/build-mpfr) ;; + i586*) mflags+=(--features libm-test/build-mpfr --features gmp-mpfr-sys/force-cross) ;; + i686*) mflags+=(--features libm-test/build-mpfr) ;; + x86_64*) mflags+=(--features libm-test/build-mpfr) ;; +esac + +# FIXME: `STATUS_DLL_NOT_FOUND` testing macros on CI. +# +case "$target" in + *windows-gnu) mflags+=(--exclude libm-macros) ;; +esac + +if [ "${BUILD_ONLY:-}" = "1" ]; then + # If we are on targets that can't run tests, verify that we can build. + cmd=(cargo build --target "$target" --package libm) + "${cmd[@]}" + "${cmd[@]}" --features unstable-intrinsics + + echo "can't run tests on $target; skipping" +else + mflags+=(--workspace --target "$target") + cmd=(cargo test "${mflags[@]}") + profile_flag="--profile" + + # If nextest is available, use that + command -v cargo-nextest && nextest=1 || nextest=0 + if [ "$nextest" = "1" ]; then + cmd=(cargo nextest run --max-fail=10) + + # Workaround for https://github.com/nextest-rs/nextest/issues/2066 + if [ -f /.dockerenv ]; then + cfg_file="/tmp/nextest-config.toml" + echo "[store]" >> "$cfg_file" + echo "dir = \"$CARGO_TARGET_DIR/nextest\"" >> "$cfg_file" + cmd+=(--config-file "$cfg_file") + fi + + # Not all configurations have tests to run on wasm + [[ "$target" = *"wasm"* ]] && cmd+=(--no-tests=warn) + + cmd+=("${mflags[@]}") + profile_flag="--cargo-profile" + fi + + # Test once without intrinsics + "${cmd[@]}" + + # Run doctests if they were excluded by nextest + [ "$nextest" = "1" ] && cargo test --doc --exclude compiler_builtins "${mflags[@]}" + + # Exclude the macros and utile crates from the rest of the tests to save CI + # runtime, they shouldn't have anything feature- or opt-level-dependent. + cmd+=(--exclude util --exclude libm-macros) + + # Test once with intrinsics enabled + "${cmd[@]}" --features unstable-intrinsics + "${cmd[@]}" --features unstable-intrinsics --benches + + # Test the same in release mode, which also increases coverage. Also ensure + # the soft float routines are checked. + "${cmd[@]}" "$profile_flag" release-checked + "${cmd[@]}" "$profile_flag" release-checked --features force-soft-floats + "${cmd[@]}" "$profile_flag" release-checked --features unstable-intrinsics + "${cmd[@]}" "$profile_flag" release-checked --features unstable-intrinsics --benches + + # Ensure that the routines do not panic. + # + # `--tests` must be passed because no-panic is only enabled as a dev + # dependency. The `release-opt` profile must be used to enable LTO and a + # single CGU. + ENSURE_NO_PANIC=1 cargo build \ + -p libm \ + --target "$target" \ + --no-default-features \ + --features unstable-float \ + --tests \ + --profile release-opt +fi diff --git a/library/compiler-builtins/compiler-builtins/CHANGELOG.md b/library/compiler-builtins/compiler-builtins/CHANGELOG.md new file mode 100644 index 0000000000000..a7c01c463ca68 --- /dev/null +++ b/library/compiler-builtins/compiler-builtins/CHANGELOG.md @@ -0,0 +1,168 @@ +# Changelog + +All notable changes to this project will be documented in this file. + +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), +and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + +## [Unreleased] + +## [0.1.159](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.158...compiler_builtins-v0.1.159) - 2025-05-12 + +### Other + +- Remove cfg(bootstrap) + +## [0.1.158](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.157...compiler_builtins-v0.1.158) - 2025-05-06 + +### Other + +- Require `target_has_atomic = "ptr"` for runtime feature detection + +## [0.1.157](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.156...compiler_builtins-v0.1.157) - 2025-05-03 + +### Other + +- Use runtime feature detection for fma routines on x86 + +## [0.1.156](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.155...compiler_builtins-v0.1.156) - 2025-04-21 + +### Other + +- avr: Provide `abort()` +- Remove `unsafe` from `naked_asm!` blocks +- Enable icount benchmarks in CI +- Move builtins-test-intrinsics out of the workspace +- Run `cargo fmt` on all projects +- Flatten the `libm/libm` directory +- Update path to libm after the merge + +## [0.1.155](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.154...compiler_builtins-v0.1.155) - 2025-04-17 + +### Other + +- use `#[cfg(bootstrap)]` for rustc sync +- Replace the `bl!` macro with `asm_sym` +- __udivmod(h|q)i4 + +## [0.1.154](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.153...compiler_builtins-v0.1.154) - 2025-04-16 + +### Other + +- turn #[naked] into an unsafe attribute + +## [0.1.153](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.152...compiler_builtins-v0.1.153) - 2025-04-09 + +### Other + +- Remove a mention of `force-soft-float` in `build.rs` +- Revert "Disable `f16` on AArch64 without the `neon` feature" +- Skip No More! +- avoid out-of-bounds accesses ([#799](https://github.com/rust-lang/compiler-builtins/pull/799)) + +## [0.1.152](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.151...compiler_builtins-v0.1.152) - 2025-03-20 + +### Other + +- Remove use of `atomic_load_unordered` and undefined behaviour from `arm_linux.rs` +- Switch repository layout to use a virtual manifest + +## [0.1.151](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.150...compiler_builtins-v0.1.151) - 2025-03-05 + +### Other + +- Add cygwin support +- Enable `f16` for LoongArch ([#770](https://github.com/rust-lang/compiler-builtins/pull/770)) +- Add __extendhfdf2 and add __truncdfhf2 test +- Remove outdated information from the readme + +## [0.1.150](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.149...compiler_builtins-v0.1.150) - 2025-03-01 + +### Other + +- Disable `f16` on AArch64 without the `neon` feature +- Update LLVM downloads to 20.1-2025-02-13 + +## [0.1.149](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.148...compiler_builtins-v0.1.149) - 2025-02-25 + +### Other + +- Make a subset of `libm` symbols weakly available on all platforms + +## [0.1.148](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.147...compiler_builtins-v0.1.148) - 2025-02-24 + +### Other + +- Update the `libm` submodule +- Enable `f16` for MIPS +- Eliminate the use of `public_test_dep!` for a third time + +## [0.1.147](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.146...compiler_builtins-v0.1.147) - 2025-02-19 + +### Other + +- remove win64_128bit_abi_hack + +## [0.1.146](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.145...compiler_builtins-v0.1.146) - 2025-02-06 + +### Other + +- Expose erf{,c}{,f} from libm + +## [0.1.145](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.144...compiler_builtins-v0.1.145) - 2025-02-04 + +### Other + +- Revert "Eliminate the use of `public_test_dep!`" +- Indentation fix to please clippy +- Don't build out of line atomics support code for uefi +- Add a version to some FIXMEs that will be resolved in LLVM 20 +- Remove use of the `start` feature + +## [0.1.144](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.143...compiler_builtins-v0.1.144) - 2025-01-15 + +### Other + +- Eliminate the use of `public_test_dep!` + +## [0.1.143](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.142...compiler_builtins-v0.1.143) - 2025-01-15 + +### Other + +- Use a C-safe return type for `__rust_[ui]128_*` overflowing intrinsics + +## [0.1.142](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.141...compiler_builtins-v0.1.142) - 2025-01-07 + +### Other + +- Account for optimization levels other than numbers + +## [0.1.141](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.140...compiler_builtins-v0.1.141) - 2025-01-07 + +### Other + +- Update the `libm` submodule +- Fix new `clippy::precedence` errors +- Rename `EXP_MAX` to `EXP_SAT` +- Shorten prefixes for float constants + +## [0.1.140](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.139...compiler_builtins-v0.1.140) - 2024-12-26 + +### Other + +- Disable f128 for amdgpu ([#737](https://github.com/rust-lang/compiler-builtins/pull/737)) +- Fix a bug in `abs_diff` +- Disable `f16` on platforms that have recursion problems + +## [0.1.139](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.138...compiler_builtins-v0.1.139) - 2024-11-03 + +### Other + +- Remove incorrect `sparcv9` match pattern from `configure_f16_f128` + +## [0.1.138](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.137...compiler_builtins-v0.1.138) - 2024-11-01 + +### Other + +- Use `f16_enabled`/`f128_enabled` in `examples/intrinsics.rs` ([#724](https://github.com/rust-lang/compiler-builtins/pull/724)) +- Disable `f16` for LoongArch64 ([#722](https://github.com/rust-lang/compiler-builtins/pull/722)) diff --git a/library/compiler-builtins/compiler-builtins/Cargo.toml b/library/compiler-builtins/compiler-builtins/Cargo.toml new file mode 100644 index 0000000000000..d65a22152ef94 --- /dev/null +++ b/library/compiler-builtins/compiler-builtins/Cargo.toml @@ -0,0 +1,64 @@ +[package] +authors = ["Jorge Aparicio "] +name = "compiler_builtins" +version = "0.1.159" +license = "MIT AND Apache-2.0 WITH LLVM-exception AND (MIT OR Apache-2.0)" +readme = "README.md" +repository = "https://github.com/rust-lang/compiler-builtins" +homepage = "https://github.com/rust-lang/compiler-builtins" +documentation = "https://docs.rs/compiler_builtins" +edition = "2021" +description = "Compiler intrinsics used by the Rust compiler." +links = "compiler-rt" + +[lib] +bench = false +doctest = false +test = false + +[dependencies] +# For more information on this dependency see +# https://github.com/rust-lang/rust/tree/master/library/rustc-std-workspace-core +core = { version = "1.0.0", optional = true, package = "rustc-std-workspace-core" } + +[build-dependencies] +cc = { optional = true, version = "1.0" } + +[dev-dependencies] +panic-handler = { path = "../crates/panic-handler" } + +[features] +default = ["compiler-builtins"] + +# Enable compilation of C code in compiler-rt, filling in some more optimized +# implementations and also filling in unimplemented intrinsics +c = ["dep:cc"] + +# Workaround for the Cranelift codegen backend. Disables any implementations +# which use inline assembly and fall back to pure Rust versions (if available). +no-asm = [] + +# Workaround for codegen backends which haven't yet implemented `f16` and +# `f128` support. Disabled any intrinsics which use those types. +no-f16-f128 = [] + +# Flag this library as the unstable compiler-builtins lib +compiler-builtins = [] + +# Generate memory-related intrinsics like memcpy +mem = [] + +# Mangle all names so this can be linked in with other versions or other +# compiler-rt implementations. Also used for testing +mangled-names = [] + +# Only used in the compiler's build system +rustc-dep-of-std = ["compiler-builtins", "dep:core"] + +# This makes certain traits and function specializations public that +# are not normally public but are required by the `builtins-test` +unstable-public-internals = [] + +[lints.rust] +# The cygwin config can be dropped after our benchmark toolchain is bumped +unexpected_cfgs = { level = "warn", check-cfg = ['cfg(bootstrap)', 'cfg(target_os, values("cygwin"))'] } diff --git a/library/compiler-builtins/compiler-builtins/README.md b/library/compiler-builtins/compiler-builtins/README.md new file mode 100644 index 0000000000000..387b70c0499a6 --- /dev/null +++ b/library/compiler-builtins/compiler-builtins/README.md @@ -0,0 +1,436 @@ +# `compiler-builtins` + +This crate provides external symbols that the compiler expects to be available +when building Rust projects, typically software routines for basic operations +that do not have hardware support. It is largely a port of LLVM's +[`compiler-rt`]. + +It is distributed as part of Rust's sysroot. `compiler-builtins` does not need +to be added as an explicit dependency in `Cargo.toml`. + +[`compiler-rt`]: https://github.com/llvm/llvm-project/tree/1b1dc505057322f4fa1110ef4f53c44347f52986/compiler-rt + +## Contributing + +See [CONTRIBUTING.md](CONTRIBUTING.md). + +## Progress + +- [x] aarch64/chkstk.S +- [x] adddf3.c +- [x] addsf3.c +- [x] arm/addsf3.S +- [x] arm/aeabi_dcmp.S +- [x] arm/aeabi_fcmp.S +- [x] arm/aeabi_idivmod.S +- [x] arm/aeabi_ldivmod.S +- [x] arm/aeabi_memcpy.S +- [x] arm/aeabi_memmove.S +- [x] arm/aeabi_memset.S +- [x] arm/aeabi_uidivmod.S +- [x] arm/aeabi_uldivmod.S +- [ ] arm/chkstk.S +- [ ] arm/divmodsi4.S (generic version is done) +- [ ] arm/divsi3.S (generic version is done) +- [ ] arm/modsi3.S (generic version is done) +- [x] arm/softfloat-alias.list +- [ ] arm/udivmodsi4.S (generic version is done) +- [ ] arm/udivsi3.S (generic version is done) +- [ ] arm/umodsi3.S (generic version is done) +- [x] ashldi3.c +- [x] ashrdi3.c +- [ ] avr/divmodhi4.S +- [ ] avr/divmodqi4.S +- [ ] avr/mulhi3.S +- [ ] avr/mulqi3.S +- [ ] avr/udivmodhi4.S +- [ ] avr/udivmodqi4.S +- [x] bswapdi2.c +- [x] bswapsi2.c +- [x] bswapti2.c +- [x] clzdi2.c +- [x] clzsi2.c +- [x] clzti2.c +- [x] comparedf2.c +- [x] comparesf2.c +- [x] ctzdi2.c +- [x] ctzsi2.c +- [x] ctzti2.c +- [x] divdf3.c +- [x] divdi3.c +- [x] divmoddi4.c +- [x] divmodsi4.c +- [x] divmodti4.c +- [x] divsf3.c +- [x] divsi3.c +- [x] extendsfdf2.c +- [x] fixdfdi.c +- [x] fixdfsi.c +- [x] fixsfdi.c +- [x] fixsfsi.c +- [x] fixunsdfdi.c +- [x] fixunsdfsi.c +- [x] fixunssfdi.c +- [x] fixunssfsi.c +- [x] floatdidf.c +- [x] floatdisf.c +- [x] floatsidf.c +- [x] floatsisf.c +- [x] floatundidf.c +- [x] floatundisf.c +- [x] floatunsidf.c +- [x] floatunsisf.c +- [ ] i386/ashldi3.S +- [ ] i386/ashrdi3.S +- [x] i386/chkstk.S +- [ ] i386/divdi3.S +- [ ] i386/lshrdi3.S +- [ ] i386/moddi3.S +- [ ] i386/muldi3.S +- [ ] i386/udivdi3.S +- [ ] i386/umoddi3.S +- [x] lshrdi3.c +- [x] moddi3.c +- [x] modsi3.c +- [x] muldf3.c +- [x] muldi3.c +- [x] mulodi4.c +- [x] mulosi4.c +- [x] mulsf3.c +- [x] powidf2.c +- [x] powisf2.c +- [ ] riscv/muldi3.S +- [ ] riscv/mulsi3.S +- [x] subdf3.c +- [x] subsf3.c +- [x] truncdfsf2.c +- [x] udivdi3.c +- [x] udivmoddi4.c +- [x] udivmodsi4.c +- [x] udivsi3.c +- [x] umoddi3.c +- [x] umodsi3.c +- [x] x86_64/chkstk.S + +These builtins are needed to support 128-bit integers. + +- [x] ashlti3.c +- [x] ashrti3.c +- [x] divti3.c +- [x] fixdfti.c +- [x] fixsfti.c +- [x] fixunsdfti.c +- [x] fixunssfti.c +- [x] floattidf.c +- [x] floattisf.c +- [x] floatuntidf.c +- [x] floatuntisf.c +- [x] lshrti3.c +- [x] modti3.c +- [x] muloti4.c +- [x] multi3.c +- [x] udivmodti4.c +- [x] udivti3.c +- [x] umodti3.c + +These builtins are needed to support `f16` and `f128`, which are in the process +of being added to Rust. + +- [x] addtf3.c +- [x] comparetf2.c +- [x] divtf3.c +- [x] extenddftf2.c +- [x] extendhfsf2.c +- [x] extendhftf2.c +- [x] extendsftf2.c +- [x] fixtfdi.c +- [x] fixtfsi.c +- [x] fixtfti.c +- [x] fixunstfdi.c +- [x] fixunstfsi.c +- [x] fixunstfti.c +- [x] floatditf.c +- [x] floatsitf.c +- [x] floattitf.c +- [x] floatunditf.c +- [x] floatunsitf.c +- [x] floatuntitf.c +- [x] multf3.c +- [x] powitf2.c +- [x] subtf3.c +- [x] truncdfhf2.c +- [x] truncsfhf2.c +- [x] trunctfdf2.c +- [x] trunctfhf2.c +- [x] trunctfsf2.c + + +These builtins are used by the Hexagon DSP + +- [ ] hexagon/common_entry_exit_abi1.S +- [ ] hexagon/common_entry_exit_abi2.S +- [ ] hexagon/common_entry_exit_legacy.S +- [x] hexagon/dfaddsub.S~~ +- [x] hexagon/dfdiv.S~~ +- [x] hexagon/dffma.S~~ +- [x] hexagon/dfminmax.S~~ +- [x] hexagon/dfmul.S~~ +- [x] hexagon/dfsqrt.S~~ +- [x] hexagon/divdi3.S~~ +- [x] hexagon/divsi3.S~~ +- [x] hexagon/fastmath2_dlib_asm.S~~ +- [x] hexagon/fastmath2_ldlib_asm.S~~ +- [x] hexagon/fastmath_dlib_asm.S~~ +- [x] hexagon/memcpy_forward_vp4cp4n2.S~~ +- [x] hexagon/memcpy_likely_aligned.S~~ +- [x] hexagon/moddi3.S~~ +- [x] hexagon/modsi3.S~~ +- [x] hexagon/sfdiv_opt.S~~ +- [x] hexagon/sfsqrt_opt.S~~ +- [x] hexagon/udivdi3.S~~ +- [x] hexagon/udivmoddi4.S~~ +- [x] hexagon/udivmodsi4.S~~ +- [x] hexagon/udivsi3.S~~ +- [x] hexagon/umoddi3.S~~ +- [x] hexagon/umodsi3.S~~ + +## Unimplemented functions + +These builtins are for x87 `f80` floating-point numbers that are not supported +by Rust. + +- ~~extendxftf2.c~~ +- ~~fixunsxfdi.c~~ +- ~~fixunsxfsi.c~~ +- ~~fixunsxfti.c~~ +- ~~fixxfdi.c~~ +- ~~fixxfti.c~~ +- ~~floatdixf.c~~ +- ~~floattixf.c~~ +- ~~floatundixf.c~~ +- ~~floatuntixf.c~~ +- ~~i386/floatdixf.S~~ +- ~~i386/floatundixf.S~~ +- ~~x86_64/floatdixf.c~~ +- ~~x86_64/floatundixf.S~~ + +These builtins are for IBM "extended double" non-IEEE 128-bit floating-point +numbers. + +- ~~ppc/divtc3.c~~ +- ~~ppc/fixtfdi.c~~ +- ~~ppc/fixtfti.c~~ +- ~~ppc/fixunstfdi.c~~ +- ~~ppc/fixunstfti.c~~ +- ~~ppc/floatditf.c~~ +- ~~ppc/floattitf.c~~ +- ~~ppc/floatunditf.c~~ +- ~~ppc/gcc_qadd.c~~ +- ~~ppc/gcc_qdiv.c~~ +- ~~ppc/gcc_qmul.c~~ +- ~~ppc/gcc_qsub.c~~ +- ~~ppc/multc3.c~~ + +These builtins are for 16-bit brain floating-point numbers that are not +supported by Rust. + +- ~~truncdfbf2.c~~ +- ~~truncsfbf2.c~~ +- ~~trunctfxf2.c~~ + +These builtins involve complex floating-point types that are not supported by +Rust. + +- ~~divdc3.c~~ +- ~~divsc3.c~~ +- ~~divtc3.c~~ +- ~~divxc3.c~~ +- ~~muldc3.c~~ +- ~~mulsc3.c~~ +- ~~multc3.c~~ +- ~~mulxc3.c~~ +- ~~powixf2.c~~ + +These builtins are never called by LLVM. + +- ~~absvdi2.c~~ +- ~~absvsi2.c~~ +- ~~absvti2.c~~ +- ~~addvdi3.c~~ +- ~~addvsi3.c~~ +- ~~addvti3.c~~ +- ~~arm/aeabi_cdcmp.S~~ +- ~~arm/aeabi_cdcmpeq_check_nan.c~~ +- ~~arm/aeabi_cfcmp.S~~ +- ~~arm/aeabi_cfcmpeq_check_nan.c~~ +- ~~arm/aeabi_div0.c~~ +- ~~arm/aeabi_drsub.c~~ +- ~~arm/aeabi_frsub.c~~ +- ~~arm/aeabi_memcmp.S~~ +- ~~arm/bswapdi2.S~~ +- ~~arm/bswapsi2.S~~ +- ~~arm/clzdi2.S~~ +- ~~arm/clzsi2.S~~ +- ~~arm/comparesf2.S~~ +- ~~arm/restore_vfp_d8_d15_regs.S~~ +- ~~arm/save_vfp_d8_d15_regs.S~~ +- ~~arm/switch16.S~~ +- ~~arm/switch32.S~~ +- ~~arm/switch8.S~~ +- ~~arm/switchu8.S~~ +- ~~cmpdi2.c~~ +- ~~cmpti2.c~~ +- ~~ffssi2.c~~ +- ~~ffsdi2.c~~ - this is [called by gcc][jemalloc-fail] though! +- ~~ffsti2.c~~ +- ~~mulvdi3.c~~ +- ~~mulvsi3.c~~ +- ~~mulvti3.c~~ +- ~~negdf2.c~~ +- ~~negdi2.c~~ +- ~~negsf2.c~~ +- ~~negti2.c~~ +- ~~negvdi2.c~~ +- ~~negvsi2.c~~ +- ~~negvti2.c~~ +- ~~paritydi2.c~~ +- ~~paritysi2.c~~ +- ~~parityti2.c~~ +- ~~popcountdi2.c~~ +- ~~popcountsi2.c~~ +- ~~popcountti2.c~~ +- ~~ppc/restFP.S~~ +- ~~ppc/saveFP.S~~ +- ~~subvdi3.c~~ +- ~~subvsi3.c~~ +- ~~subvti3.c~~ +- ~~ucmpdi2.c~~ +- ~~ucmpti2.c~~ +- ~~udivmodti4.c~~ + +[jemalloc-fail]: https://travis-ci.org/rust-lang/rust/jobs/249772758 + +Rust only exposes atomic types on platforms that support them, and therefore does not need to fall back to software implementations. + +- ~~arm/sync_fetch_and_add_4.S~~ +- ~~arm/sync_fetch_and_add_8.S~~ +- ~~arm/sync_fetch_and_and_4.S~~ +- ~~arm/sync_fetch_and_and_8.S~~ +- ~~arm/sync_fetch_and_max_4.S~~ +- ~~arm/sync_fetch_and_max_8.S~~ +- ~~arm/sync_fetch_and_min_4.S~~ +- ~~arm/sync_fetch_and_min_8.S~~ +- ~~arm/sync_fetch_and_nand_4.S~~ +- ~~arm/sync_fetch_and_nand_8.S~~ +- ~~arm/sync_fetch_and_or_4.S~~ +- ~~arm/sync_fetch_and_or_8.S~~ +- ~~arm/sync_fetch_and_sub_4.S~~ +- ~~arm/sync_fetch_and_sub_8.S~~ +- ~~arm/sync_fetch_and_umax_4.S~~ +- ~~arm/sync_fetch_and_umax_8.S~~ +- ~~arm/sync_fetch_and_umin_4.S~~ +- ~~arm/sync_fetch_and_umin_8.S~~ +- ~~arm/sync_fetch_and_xor_4.S~~ +- ~~arm/sync_fetch_and_xor_8.S~~ +- ~~arm/sync_synchronize.S~~ +- ~~atomic.c~~ +- ~~atomic_flag_clear.c~~ +- ~~atomic_flag_clear_explicit.c~~ +- ~~atomic_flag_test_and_set.c~~ +- ~~atomic_flag_test_and_set_explicit.c~~ +- ~~atomic_signal_fence.c~~ +- ~~atomic_thread_fence.c~~ + +Miscellaneous functionality that is not used by Rust. + +- ~~aarch64/fp_mode.c~~ +- ~~aarch64/lse.S~~ (LSE atomics) +- ~~aarch64/sme-abi-init.c~~ (matrix extension) +- ~~aarch64/sme-abi.S~~ (matrix extension) +- ~~aarch64/sme-libc-routines.c~~ (matrix extension) +- ~~apple_versioning.c~~ +- ~~arm/fp_mode.c~~ +- ~~avr/exit.S~~ +- ~~clear_cache.c~~ +- ~~cpu_model/aarch64.c~~ +- ~~cpu_model/x86.c~~ +- ~~crtbegin.c~~ +- ~~crtend.c~~ +- ~~emutls.c~~ +- ~~enable_execute_stack.c~~ +- ~~eprintf.c~~ +- ~~fp_mode.c~~ (float exception handling) +- ~~gcc_personality_v0.c~~ +- ~~i386/fp_mode.c~~ +- ~~int_util.c~~ +- ~~loongarch/fp_mode.c~~ +- ~~os_version_check.c~~ +- ~~riscv/fp_mode.c~~ +- ~~riscv/restore.S~~ (callee-saved registers) +- ~~riscv/save.S~~ (callee-saved registers) +- ~~trampoline_setup.c~~ +- ~~ve/grow_stack.S~~ +- ~~ve/grow_stack_align.S~~ + +Floating-point implementations of builtins that are only called from soft-float code. It would be better to simply use the generic soft-float versions in this case. + +- ~~i386/floatdidf.S~~ +- ~~i386/floatdisf.S~~ +- ~~i386/floatundidf.S~~ +- ~~i386/floatundisf.S~~ +- ~~x86_64/floatundidf.S~~ +- ~~x86_64/floatundisf.S~~ +- ~~x86_64/floatdidf.c~~ +- ~~x86_64/floatdisf.c~~ + +Unsupported in any current target: used on old versions of 32-bit iOS with ARMv5. + +- ~~arm/adddf3vfp.S~~ +- ~~arm/addsf3vfp.S~~ +- ~~arm/divdf3vfp.S~~ +- ~~arm/divsf3vfp.S~~ +- ~~arm/eqdf2vfp.S~~ +- ~~arm/eqsf2vfp.S~~ +- ~~arm/extendsfdf2vfp.S~~ +- ~~arm/fixdfsivfp.S~~ +- ~~arm/fixsfsivfp.S~~ +- ~~arm/fixunsdfsivfp.S~~ +- ~~arm/fixunssfsivfp.S~~ +- ~~arm/floatsidfvfp.S~~ +- ~~arm/floatsisfvfp.S~~ +- ~~arm/floatunssidfvfp.S~~ +- ~~arm/floatunssisfvfp.S~~ +- ~~arm/gedf2vfp.S~~ +- ~~arm/gesf2vfp.S~~ +- ~~arm/gtdf2vfp.S~~ +- ~~arm/gtsf2vfp.S~~ +- ~~arm/ledf2vfp.S~~ +- ~~arm/lesf2vfp.S~~ +- ~~arm/ltdf2vfp.S~~ +- ~~arm/ltsf2vfp.S~~ +- ~~arm/muldf3vfp.S~~ +- ~~arm/mulsf3vfp.S~~ +- ~~arm/nedf2vfp.S~~ +- ~~arm/negdf2vfp.S~~ +- ~~arm/negsf2vfp.S~~ +- ~~arm/nesf2vfp.S~~ +- ~~arm/subdf3vfp.S~~ +- ~~arm/subsf3vfp.S~~ +- ~~arm/truncdfsf2vfp.S~~ +- ~~arm/unorddf2vfp.S~~ +- ~~arm/unordsf2vfp.S~~ + +## License + +Usage is allowed under the [MIT License] and the [Apache License, Version 2.0] +with the LLVM exception. + +[MIT License]: https://opensource.org/license/mit +[Apache License, Version 2.0]: htps://www.apache.org/licenses/LICENSE-2.0 + +### Contribution + +Contributions are licensed under the MIT License, the Apache License, +Version 2.0, and the Apache-2.0 license with the LLVM exception. + +See [LICENSE.txt](../LICENSE.txt) for full details. diff --git a/library/compiler-builtins/compiler-builtins/build.rs b/library/compiler-builtins/compiler-builtins/build.rs new file mode 100644 index 0000000000000..90d98ec7ce940 --- /dev/null +++ b/library/compiler-builtins/compiler-builtins/build.rs @@ -0,0 +1,712 @@ +mod configure; + +use std::collections::BTreeMap; +use std::env; +use std::path::PathBuf; +use std::sync::atomic::Ordering; + +use configure::{Target, configure_aliases, configure_f16_f128}; + +fn main() { + println!("cargo::rerun-if-changed=build.rs"); + println!("cargo::rerun-if-changed=configure.rs"); + + let target = Target::from_env(); + let cwd = env::current_dir().unwrap(); + + configure_check_cfg(); + configure_f16_f128(&target); + configure_aliases(&target); + + configure_libm(&target); + + println!("cargo:compiler-rt={}", cwd.join("compiler-rt").display()); + + // Emscripten's runtime includes all the builtins + if target.os == "emscripten" { + return; + } + + // OpenBSD provides compiler_rt by default, use it instead of rebuilding it from source + if target.os == "openbsd" { + println!("cargo:rustc-link-search=native=/usr/lib"); + println!("cargo:rustc-link-lib=compiler_rt"); + return; + } + + // Forcibly enable memory intrinsics on wasm & SGX as we don't have a libc to + // provide them. + if (target.triple.contains("wasm") && !target.triple.contains("wasi")) + || (target.triple.contains("sgx") && target.triple.contains("fortanix")) + || target.triple.contains("-none") + || target.triple.contains("nvptx") + || target.triple.contains("uefi") + || target.triple.contains("xous") + { + println!("cargo:rustc-cfg=feature=\"mem\""); + } + + // These targets have hardware unaligned access support. + println!("cargo::rustc-check-cfg=cfg(feature, values(\"mem-unaligned\"))"); + if target.arch.contains("x86_64") + || target.arch.contains("x86") + || target.arch.contains("aarch64") + || target.arch.contains("bpf") + { + println!("cargo:rustc-cfg=feature=\"mem-unaligned\""); + } + + // NOTE we are going to assume that llvm-target, what determines our codegen option, matches the + // target triple. This is usually correct for our built-in targets but can break in presence of + // custom targets, which can have arbitrary names. + let llvm_target = target.triple.split('-').collect::>(); + + // Build missing intrinsics from compiler-rt C source code. If we're + // mangling names though we assume that we're also in test mode so we don't + // build anything and we rely on the upstream implementation of compiler-rt + // functions + if !cfg!(feature = "mangled-names") && cfg!(feature = "c") { + // Don't use a C compiler for these targets: + // + // * nvptx - everything is bitcode, not compatible with mixed C/Rust + if !target.arch.contains("nvptx") { + #[cfg(feature = "c")] + c::compile(&llvm_target, &target); + } + } + + // Only emit the ARM Linux atomic emulation on pre-ARMv6 architectures. This + // includes the old androideabi. It is deprecated but it is available as a + // rustc target (arm-linux-androideabi). + println!("cargo::rustc-check-cfg=cfg(kernel_user_helpers)"); + if llvm_target[0] == "armv4t" + || llvm_target[0] == "armv5te" + || target.triple == "arm-linux-androideabi" + { + println!("cargo:rustc-cfg=kernel_user_helpers") + } + + if llvm_target[0].starts_with("aarch64") { + generate_aarch64_outlined_atomics(); + } +} + +/// Run configuration for `libm` since it is included directly. +/// +/// Much of this is copied from `libm/configure.rs`. +fn configure_libm(target: &Target) { + println!("cargo:rustc-check-cfg=cfg(intrinsics_enabled)"); + println!("cargo:rustc-check-cfg=cfg(arch_enabled)"); + println!("cargo:rustc-check-cfg=cfg(optimizations_enabled)"); + println!("cargo:rustc-check-cfg=cfg(feature, values(\"unstable-public-internals\"))"); + + // Always use intrinsics + println!("cargo:rustc-cfg=intrinsics_enabled"); + + // The arch module may contain assembly. + if !cfg!(feature = "no-asm") { + println!("cargo:rustc-cfg=arch_enabled"); + } + + println!("cargo:rustc-check-cfg=cfg(optimizations_enabled)"); + if !matches!(target.opt_level.as_str(), "0" | "1") { + println!("cargo:rustc-cfg=optimizations_enabled"); + } + + // Config shorthands + println!("cargo:rustc-check-cfg=cfg(x86_no_sse)"); + if target.arch == "x86" && !target.features.iter().any(|f| f == "sse") { + // Shorthand to detect i586 targets + println!("cargo:rustc-cfg=x86_no_sse"); + } + + println!( + "cargo:rustc-env=CFG_CARGO_FEATURES={:?}", + target.cargo_features + ); + println!("cargo:rustc-env=CFG_OPT_LEVEL={}", target.opt_level); + println!("cargo:rustc-env=CFG_TARGET_FEATURES={:?}", target.features); + + // Activate libm's unstable features to make full use of Nightly. + println!("cargo:rustc-cfg=feature=\"unstable-intrinsics\""); +} + +fn aarch64_symbol(ordering: Ordering) -> &'static str { + match ordering { + Ordering::Relaxed => "relax", + Ordering::Acquire => "acq", + Ordering::Release => "rel", + Ordering::AcqRel => "acq_rel", + _ => panic!("unknown symbol for {ordering:?}"), + } +} + +/// The `concat_idents` macro is extremely annoying and doesn't allow us to define new items. +/// Define them from the build script instead. +/// Note that the majority of the code is still defined in `aarch64.rs` through inline macros. +fn generate_aarch64_outlined_atomics() { + use std::fmt::Write; + // #[macro_export] so that we can use this in tests + let gen_macro = + |name| format!("#[macro_export] macro_rules! foreach_{name} {{ ($macro:path) => {{\n"); + + // Generate different macros for add/clr/eor/set so that we can test them separately. + let sym_names = ["cas", "ldadd", "ldclr", "ldeor", "ldset", "swp"]; + let mut macros = BTreeMap::new(); + for sym in sym_names { + macros.insert(sym, gen_macro(sym)); + } + + // Only CAS supports 16 bytes, and it has a different implementation that uses a different macro. + let mut cas16 = gen_macro("cas16"); + + for ordering in [ + Ordering::Relaxed, + Ordering::Acquire, + Ordering::Release, + Ordering::AcqRel, + ] { + let sym_ordering = aarch64_symbol(ordering); + for size in [1, 2, 4, 8] { + for (sym, macro_) in &mut macros { + let name = format!("__aarch64_{sym}{size}_{sym_ordering}"); + writeln!(macro_, "$macro!( {ordering:?}, {size}, {name} );").unwrap(); + } + } + let name = format!("__aarch64_cas16_{sym_ordering}"); + writeln!(cas16, "$macro!( {ordering:?}, {name} );").unwrap(); + } + + let mut buf = String::new(); + for macro_def in macros.values().chain(std::iter::once(&cas16)) { + buf += macro_def; + buf += "}; }\n"; + } + let out_dir = PathBuf::from(std::env::var("OUT_DIR").unwrap()); + std::fs::write(out_dir.join("outlined_atomics.rs"), buf).unwrap(); +} + +/// Emit directives for features we expect to support that aren't in `Cargo.toml`. +/// +/// These are mostly cfg elements emitted by this `build.rs`. +fn configure_check_cfg() { + // Functions where we can set the "optimized-c" flag + const HAS_OPTIMIZED_C: &[&str] = &[ + "__ashldi3", + "__ashlsi3", + "__ashrdi3", + "__ashrsi3", + "__bswapsi2", + "__bswapdi2", + "__bswapti2", + "__divdi3", + "__divsi3", + "__divmoddi4", + "__divmodsi4", + "__divmodsi4", + "__divmodti4", + "__lshrdi3", + "__lshrsi3", + "__moddi3", + "__modsi3", + "__muldi3", + "__udivdi3", + "__udivmoddi4", + "__udivmodsi4", + "__udivsi3", + "__umoddi3", + "__umodsi3", + ]; + + // Build a list of all aarch64 atomic operation functions + let mut aarch_atomic = Vec::new(); + for aarch_op in ["cas", "ldadd", "ldclr", "ldeor", "ldset", "swp"] { + let op_sizes = if aarch_op == "cas" { + [1, 2, 4, 8, 16].as_slice() + } else { + [1, 2, 4, 8].as_slice() + }; + + for op_size in op_sizes { + for ordering in ["relax", "acq", "rel", "acq_rel"] { + aarch_atomic.push(format!("__aarch64_{aarch_op}{op_size}_{ordering}")); + } + } + } + + for fn_name in HAS_OPTIMIZED_C + .iter() + .copied() + .chain(aarch_atomic.iter().map(|s| s.as_str())) + { + println!("cargo::rustc-check-cfg=cfg({fn_name}, values(\"optimized-c\"))",); + } + + // Rustc is unaware of sparc target features, but this does show up from + // `rustc --print target-features --target sparc64-unknown-linux-gnu`. + println!("cargo::rustc-check-cfg=cfg(target_feature, values(\"vis3\"))"); + + // FIXME: these come from libm and should be changed there + println!("cargo::rustc-check-cfg=cfg(feature, values(\"checked\"))"); + println!("cargo::rustc-check-cfg=cfg(assert_no_panic)"); +} + +#[cfg(feature = "c")] +mod c { + use std::collections::{BTreeMap, HashSet}; + use std::env; + use std::fs::{self, File}; + use std::io::Write; + use std::path::{Path, PathBuf}; + + use super::Target; + + struct Sources { + // SYMBOL -> PATH TO SOURCE + map: BTreeMap<&'static str, &'static str>, + } + + impl Sources { + fn new() -> Sources { + Sources { + map: BTreeMap::new(), + } + } + + fn extend(&mut self, sources: &[(&'static str, &'static str)]) { + // NOTE Some intrinsics have both a generic implementation (e.g. + // `floatdidf.c`) and an arch optimized implementation + // (`x86_64/floatdidf.c`). In those cases, we keep the arch optimized + // implementation and discard the generic implementation. If we don't + // and keep both implementations, the linker will yell at us about + // duplicate symbols! + for (symbol, src) in sources { + if src.contains("/") { + // Arch-optimized implementation (preferred) + self.map.insert(symbol, src); + } else { + // Generic implementation + if !self.map.contains_key(symbol) { + self.map.insert(symbol, src); + } + } + } + } + + fn remove(&mut self, symbols: &[&str]) { + for symbol in symbols { + self.map.remove(*symbol).unwrap(); + } + } + } + + /// Compile intrinsics from the compiler-rt C source code + pub fn compile(llvm_target: &[&str], target: &Target) { + let mut consider_float_intrinsics = true; + let cfg = &mut cc::Build::new(); + + // AArch64 GCCs exit with an error condition when they encounter any kind of floating point + // code if the `nofp` and/or `nosimd` compiler flags have been set. + // + // Therefore, evaluate if those flags are present and set a boolean that causes any + // compiler-rt intrinsics that contain floating point source to be excluded for this target. + if target.arch == "aarch64" { + let cflags_key = String::from("CFLAGS_") + &(target.triple.replace("-", "_")); + if let Ok(cflags_value) = env::var(cflags_key) { + if cflags_value.contains("+nofp") || cflags_value.contains("+nosimd") { + consider_float_intrinsics = false; + } + } + } + + // `compiler-rt` requires `COMPILER_RT_HAS_FLOAT16` to be defined to make it use the + // `_Float16` type for `f16` intrinsics. This shouldn't matter as all existing `f16` + // intrinsics have been ported to Rust in `compiler-builtins` as C compilers don't + // support `_Float16` on all targets (whereas Rust does). However, define the macro + // anyway to prevent issues like rust#118813 and rust#123885 silently reoccuring if more + // `f16` intrinsics get accidentally added here in the future. + cfg.define("COMPILER_RT_HAS_FLOAT16", None); + + cfg.warnings(false); + + if target.env == "msvc" { + // Don't pull in extra libraries on MSVC + cfg.flag("/Zl"); + + // Emulate C99 and C++11's __func__ for MSVC prior to 2013 CTP + cfg.define("__func__", Some("__FUNCTION__")); + } else { + // Turn off various features of gcc and such, mostly copying + // compiler-rt's build system already + cfg.flag("-fno-builtin"); + cfg.flag("-fvisibility=hidden"); + cfg.flag("-ffreestanding"); + // Avoid the following warning appearing once **per file**: + // clang: warning: optimization flag '-fomit-frame-pointer' is not supported for target 'armv7' [-Wignored-optimization-argument] + // + // Note that compiler-rt's build system also checks + // + // `check_cxx_compiler_flag(-fomit-frame-pointer COMPILER_RT_HAS_FOMIT_FRAME_POINTER_FLAG)` + // + // in https://github.com/rust-lang/compiler-rt/blob/c8fbcb3/cmake/config-ix.cmake#L19. + cfg.flag_if_supported("-fomit-frame-pointer"); + cfg.define("VISIBILITY_HIDDEN", None); + + if let "aarch64" | "arm64ec" = target.arch.as_str() { + // FIXME(llvm20): Older GCCs on A64 fail to build with + // -Werror=implicit-function-declaration due to a compiler-rt bug. + // With a newer LLVM we should be able to enable the flag everywhere. + // https://github.com/llvm/llvm-project/commit/8aa9d6206ce55bdaaf422839c351fbd63f033b89 + } else { + // Avoid implicitly creating references to undefined functions + cfg.flag("-Werror=implicit-function-declaration"); + } + } + + // int_util.c tries to include stdlib.h if `_WIN32` is defined, + // which it is when compiling UEFI targets with clang. This is + // at odds with compiling with `-ffreestanding`, as the header + // may be incompatible or not present. Create a minimal stub + // header to use instead. + if target.os == "uefi" { + let out_dir = PathBuf::from(env::var("OUT_DIR").unwrap()); + let include_dir = out_dir.join("include"); + if !include_dir.exists() { + fs::create_dir(&include_dir).unwrap(); + } + fs::write(include_dir.join("stdlib.h"), "#include ").unwrap(); + cfg.flag(&format!("-I{}", include_dir.to_str().unwrap())); + } + + let mut sources = Sources::new(); + sources.extend(&[ + ("__absvdi2", "absvdi2.c"), + ("__absvsi2", "absvsi2.c"), + ("__addvdi3", "addvdi3.c"), + ("__addvsi3", "addvsi3.c"), + ("__cmpdi2", "cmpdi2.c"), + ("__int_util", "int_util.c"), + ("__mulvdi3", "mulvdi3.c"), + ("__mulvsi3", "mulvsi3.c"), + ("__negdi2", "negdi2.c"), + ("__negvdi2", "negvdi2.c"), + ("__negvsi2", "negvsi2.c"), + ("__paritydi2", "paritydi2.c"), + ("__paritysi2", "paritysi2.c"), + ("__popcountdi2", "popcountdi2.c"), + ("__popcountsi2", "popcountsi2.c"), + ("__subvdi3", "subvdi3.c"), + ("__subvsi3", "subvsi3.c"), + ("__ucmpdi2", "ucmpdi2.c"), + ]); + + if consider_float_intrinsics { + sources.extend(&[ + ("__divdc3", "divdc3.c"), + ("__divsc3", "divsc3.c"), + ("__muldc3", "muldc3.c"), + ("__mulsc3", "mulsc3.c"), + ("__negdf2", "negdf2.c"), + ("__negsf2", "negsf2.c"), + ]); + } + + // On iOS and 32-bit OSX these are all just empty intrinsics, no need to + // include them. + if target.vendor != "apple" || target.arch != "x86" { + sources.extend(&[ + ("__absvti2", "absvti2.c"), + ("__addvti3", "addvti3.c"), + ("__cmpti2", "cmpti2.c"), + ("__ffsti2", "ffsti2.c"), + ("__mulvti3", "mulvti3.c"), + ("__negti2", "negti2.c"), + ("__parityti2", "parityti2.c"), + ("__popcountti2", "popcountti2.c"), + ("__subvti3", "subvti3.c"), + ("__ucmpti2", "ucmpti2.c"), + ]); + + if consider_float_intrinsics { + sources.extend(&[("__negvti2", "negvti2.c")]); + } + } + + if target.vendor == "apple" { + sources.extend(&[ + ("atomic_flag_clear", "atomic_flag_clear.c"), + ("atomic_flag_clear_explicit", "atomic_flag_clear_explicit.c"), + ("atomic_flag_test_and_set", "atomic_flag_test_and_set.c"), + ( + "atomic_flag_test_and_set_explicit", + "atomic_flag_test_and_set_explicit.c", + ), + ("atomic_signal_fence", "atomic_signal_fence.c"), + ("atomic_thread_fence", "atomic_thread_fence.c"), + ]); + } + + if target.env != "msvc" { + if target.arch == "x86" { + sources.extend(&[ + ("__ashldi3", "i386/ashldi3.S"), + ("__ashrdi3", "i386/ashrdi3.S"), + ("__divdi3", "i386/divdi3.S"), + ("__lshrdi3", "i386/lshrdi3.S"), + ("__moddi3", "i386/moddi3.S"), + ("__muldi3", "i386/muldi3.S"), + ("__udivdi3", "i386/udivdi3.S"), + ("__umoddi3", "i386/umoddi3.S"), + ]); + } + } + + if target.arch == "arm" && target.vendor != "apple" && target.env != "msvc" { + sources.extend(&[ + ("__aeabi_div0", "arm/aeabi_div0.c"), + ("__aeabi_drsub", "arm/aeabi_drsub.c"), + ("__aeabi_frsub", "arm/aeabi_frsub.c"), + ("__bswapdi2", "arm/bswapdi2.S"), + ("__bswapsi2", "arm/bswapsi2.S"), + ("__divmodsi4", "arm/divmodsi4.S"), + ("__divsi3", "arm/divsi3.S"), + ("__modsi3", "arm/modsi3.S"), + ("__switch16", "arm/switch16.S"), + ("__switch32", "arm/switch32.S"), + ("__switch8", "arm/switch8.S"), + ("__switchu8", "arm/switchu8.S"), + ("__sync_synchronize", "arm/sync_synchronize.S"), + ("__udivmodsi4", "arm/udivmodsi4.S"), + ("__udivsi3", "arm/udivsi3.S"), + ("__umodsi3", "arm/umodsi3.S"), + ]); + + if target.os == "freebsd" { + sources.extend(&[("__clear_cache", "clear_cache.c")]); + } + + // First of all aeabi_cdcmp and aeabi_cfcmp are never called by LLVM. + // Second are little-endian only, so build fail on big-endian targets. + // Temporally workaround: exclude these files for big-endian targets. + if !llvm_target[0].starts_with("thumbeb") && !llvm_target[0].starts_with("armeb") { + sources.extend(&[ + ("__aeabi_cdcmp", "arm/aeabi_cdcmp.S"), + ("__aeabi_cdcmpeq_check_nan", "arm/aeabi_cdcmpeq_check_nan.c"), + ("__aeabi_cfcmp", "arm/aeabi_cfcmp.S"), + ("__aeabi_cfcmpeq_check_nan", "arm/aeabi_cfcmpeq_check_nan.c"), + ]); + } + } + + if llvm_target[0] == "armv7" { + sources.extend(&[ + ("__sync_fetch_and_add_4", "arm/sync_fetch_and_add_4.S"), + ("__sync_fetch_and_add_8", "arm/sync_fetch_and_add_8.S"), + ("__sync_fetch_and_and_4", "arm/sync_fetch_and_and_4.S"), + ("__sync_fetch_and_and_8", "arm/sync_fetch_and_and_8.S"), + ("__sync_fetch_and_max_4", "arm/sync_fetch_and_max_4.S"), + ("__sync_fetch_and_max_8", "arm/sync_fetch_and_max_8.S"), + ("__sync_fetch_and_min_4", "arm/sync_fetch_and_min_4.S"), + ("__sync_fetch_and_min_8", "arm/sync_fetch_and_min_8.S"), + ("__sync_fetch_and_nand_4", "arm/sync_fetch_and_nand_4.S"), + ("__sync_fetch_and_nand_8", "arm/sync_fetch_and_nand_8.S"), + ("__sync_fetch_and_or_4", "arm/sync_fetch_and_or_4.S"), + ("__sync_fetch_and_or_8", "arm/sync_fetch_and_or_8.S"), + ("__sync_fetch_and_sub_4", "arm/sync_fetch_and_sub_4.S"), + ("__sync_fetch_and_sub_8", "arm/sync_fetch_and_sub_8.S"), + ("__sync_fetch_and_umax_4", "arm/sync_fetch_and_umax_4.S"), + ("__sync_fetch_and_umax_8", "arm/sync_fetch_and_umax_8.S"), + ("__sync_fetch_and_umin_4", "arm/sync_fetch_and_umin_4.S"), + ("__sync_fetch_and_umin_8", "arm/sync_fetch_and_umin_8.S"), + ("__sync_fetch_and_xor_4", "arm/sync_fetch_and_xor_4.S"), + ("__sync_fetch_and_xor_8", "arm/sync_fetch_and_xor_8.S"), + ]); + } + + if llvm_target.last().unwrap().ends_with("eabihf") { + if !llvm_target[0].starts_with("thumbv7em") + && !llvm_target[0].starts_with("thumbv8m.main") + { + // The FPU option chosen for these architectures in cc-rs, ie: + // -mfpu=fpv4-sp-d16 for thumbv7em + // -mfpu=fpv5-sp-d16 for thumbv8m.main + // do not support double precision floating points conversions so the files + // that include such instructions are not included for these targets. + sources.extend(&[ + ("__fixdfsivfp", "arm/fixdfsivfp.S"), + ("__fixunsdfsivfp", "arm/fixunsdfsivfp.S"), + ("__floatsidfvfp", "arm/floatsidfvfp.S"), + ("__floatunssidfvfp", "arm/floatunssidfvfp.S"), + ]); + } + + sources.extend(&[ + ("__fixsfsivfp", "arm/fixsfsivfp.S"), + ("__fixunssfsivfp", "arm/fixunssfsivfp.S"), + ("__floatsisfvfp", "arm/floatsisfvfp.S"), + ("__floatunssisfvfp", "arm/floatunssisfvfp.S"), + ("__floatunssisfvfp", "arm/floatunssisfvfp.S"), + ("__restore_vfp_d8_d15_regs", "arm/restore_vfp_d8_d15_regs.S"), + ("__save_vfp_d8_d15_regs", "arm/save_vfp_d8_d15_regs.S"), + ("__negdf2vfp", "arm/negdf2vfp.S"), + ("__negsf2vfp", "arm/negsf2vfp.S"), + ]); + } + + if (target.arch == "aarch64" || target.arch == "arm64ec") && consider_float_intrinsics { + sources.extend(&[ + ("__comparetf2", "comparetf2.c"), + ("__fe_getround", "fp_mode.c"), + ("__fe_raise_inexact", "fp_mode.c"), + ]); + + if target.os != "windows" && target.os != "cygwin" { + sources.extend(&[("__multc3", "multc3.c")]); + } + } + + if target.arch == "mips" || target.arch == "riscv32" || target.arch == "riscv64" { + sources.extend(&[("__bswapsi2", "bswapsi2.c")]); + } + + if target.arch == "mips64" { + sources.extend(&[("__netf2", "comparetf2.c"), ("__fe_getround", "fp_mode.c")]); + } + + if target.arch == "loongarch64" { + sources.extend(&[("__netf2", "comparetf2.c"), ("__fe_getround", "fp_mode.c")]); + } + + // Remove the assembly implementations that won't compile for the target + if llvm_target[0] == "thumbv6m" || llvm_target[0] == "thumbv8m.base" || target.os == "uefi" + { + let mut to_remove = Vec::new(); + for (k, v) in sources.map.iter() { + if v.ends_with(".S") { + to_remove.push(*k); + } + } + sources.remove(&to_remove); + } + + if llvm_target[0] == "thumbv7m" || llvm_target[0] == "thumbv7em" { + sources.remove(&["__aeabi_cdcmp", "__aeabi_cfcmp"]); + } + + // Android and Cygwin uses emulated TLS so we need a runtime support function. + if target.os == "android" || target.os == "cygwin" { + sources.extend(&[("__emutls_get_address", "emutls.c")]); + } + + // Work around a bug in the NDK headers (fixed in + // https://r.android.com/2038949 which will be released in a future + // NDK version) by providing a definition of LONG_BIT. + if target.os == "android" { + cfg.define("LONG_BIT", "(8 * sizeof(long))"); + } + + // OpenHarmony also uses emulated TLS. + if target.env == "ohos" { + sources.extend(&[("__emutls_get_address", "emutls.c")]); + } + + // When compiling the C code we require the user to tell us where the + // source code is, and this is largely done so when we're compiling as + // part of rust-lang/rust we can use the same llvm-project repository as + // rust-lang/rust. + let root = match env::var_os("RUST_COMPILER_RT_ROOT") { + Some(s) => PathBuf::from(s), + None => { + panic!( + "RUST_COMPILER_RT_ROOT is not set. You may need to run \ + `ci/download-compiler-rt.sh`." + ); + } + }; + if !root.exists() { + panic!("RUST_COMPILER_RT_ROOT={} does not exist", root.display()); + } + + // Support deterministic builds by remapping the __FILE__ prefix if the + // compiler supports it. This fixes the nondeterminism caused by the + // use of that macro in lib/builtins/int_util.h in compiler-rt. + cfg.flag_if_supported(&format!("-ffile-prefix-map={}=.", root.display())); + + // Include out-of-line atomics for aarch64, which are all generated by supplying different + // sets of flags to the same source file. + // Note: Out-of-line aarch64 atomics are not supported by the msvc toolchain (#430) and + // on uefi. + let src_dir = root.join("lib/builtins"); + if target.arch == "aarch64" && target.env != "msvc" && target.os != "uefi" { + // See below for why we're building these as separate libraries. + build_aarch64_out_of_line_atomics_libraries(&src_dir, cfg); + + // Some run-time CPU feature detection is necessary, as well. + let cpu_model_src = if src_dir.join("cpu_model.c").exists() { + "cpu_model.c" + } else { + "cpu_model/aarch64.c" + }; + sources.extend(&[("__aarch64_have_lse_atomics", cpu_model_src)]); + } + + let mut added_sources = HashSet::new(); + for (sym, src) in sources.map.iter() { + let src = src_dir.join(src); + if added_sources.insert(src.clone()) { + cfg.file(&src); + println!("cargo:rerun-if-changed={}", src.display()); + } + println!("cargo:rustc-cfg={}=\"optimized-c\"", sym); + } + + cfg.compile("libcompiler-rt.a"); + } + + fn build_aarch64_out_of_line_atomics_libraries(builtins_dir: &Path, cfg: &mut cc::Build) { + let out_dir = PathBuf::from(env::var("OUT_DIR").unwrap()); + let outlined_atomics_file = builtins_dir.join("aarch64").join("lse.S"); + println!("cargo:rerun-if-changed={}", outlined_atomics_file.display()); + + cfg.include(&builtins_dir); + + for instruction_type in &["cas", "swp", "ldadd", "ldclr", "ldeor", "ldset"] { + for size in &[1, 2, 4, 8, 16] { + if *size == 16 && *instruction_type != "cas" { + continue; + } + + for (model_number, model_name) in + &[(1, "relax"), (2, "acq"), (3, "rel"), (4, "acq_rel")] + { + // The original compiler-rt build system compiles the same + // source file multiple times with different compiler + // options. Here we do something slightly different: we + // create multiple .S files with the proper #defines and + // then include the original file. + // + // This is needed because the cc crate doesn't allow us to + // override the name of object files and libtool requires + // all objects in an archive to have unique names. + let path = + out_dir.join(format!("lse_{}{}_{}.S", instruction_type, size, model_name)); + let mut file = File::create(&path).unwrap(); + writeln!(file, "#define L_{}", instruction_type).unwrap(); + writeln!(file, "#define SIZE {}", size).unwrap(); + writeln!(file, "#define MODEL {}", model_number).unwrap(); + writeln!( + file, + "#include \"{}\"", + outlined_atomics_file.canonicalize().unwrap().display() + ) + .unwrap(); + drop(file); + cfg.file(path); + + let sym = format!("__aarch64_{}{}_{}", instruction_type, size, model_name); + println!("cargo:rustc-cfg={}=\"optimized-c\"", sym); + } + } + } + } +} diff --git a/library/compiler-builtins/compiler-builtins/configure.rs b/library/compiler-builtins/compiler-builtins/configure.rs new file mode 100644 index 0000000000000..d825f35a9aa05 --- /dev/null +++ b/library/compiler-builtins/compiler-builtins/configure.rs @@ -0,0 +1,136 @@ +// Configuration that is shared between `compiler_builtins` and `builtins_test`. + +use std::env; + +#[derive(Debug)] +#[allow(dead_code)] +pub struct Target { + pub triple: String, + pub triple_split: Vec, + pub opt_level: String, + pub cargo_features: Vec, + pub os: String, + pub arch: String, + pub vendor: String, + pub env: String, + pub pointer_width: u8, + pub little_endian: bool, + pub features: Vec, +} + +impl Target { + pub fn from_env() -> Self { + let triple = env::var("TARGET").unwrap(); + let triple_split = triple.split('-').map(ToOwned::to_owned).collect(); + let little_endian = match env::var("CARGO_CFG_TARGET_ENDIAN").unwrap().as_str() { + "little" => true, + "big" => false, + x => panic!("unknown endian {x}"), + }; + let cargo_features = env::vars() + .filter_map(|(name, _value)| name.strip_prefix("CARGO_FEATURE_").map(ToOwned::to_owned)) + .map(|s| s.to_lowercase().replace("_", "-")) + .collect(); + + Self { + triple, + triple_split, + os: env::var("CARGO_CFG_TARGET_OS").unwrap(), + opt_level: env::var("OPT_LEVEL").unwrap(), + cargo_features, + arch: env::var("CARGO_CFG_TARGET_ARCH").unwrap(), + vendor: env::var("CARGO_CFG_TARGET_VENDOR").unwrap(), + env: env::var("CARGO_CFG_TARGET_ENV").unwrap(), + pointer_width: env::var("CARGO_CFG_TARGET_POINTER_WIDTH") + .unwrap() + .parse() + .unwrap(), + little_endian, + features: env::var("CARGO_CFG_TARGET_FEATURE") + .unwrap_or_default() + .split(",") + .map(ToOwned::to_owned) + .collect(), + } + } + + #[allow(dead_code)] + pub fn has_feature(&self, feature: &str) -> bool { + self.features.iter().any(|f| f == feature) + } +} + +pub fn configure_aliases(target: &Target) { + // To compile builtins-test-intrinsics for thumb targets, where there is no libc + println!("cargo::rustc-check-cfg=cfg(thumb)"); + if target.triple_split[0].starts_with("thumb") { + println!("cargo:rustc-cfg=thumb") + } + + // compiler-rt `cfg`s away some intrinsics for thumbv6m and thumbv8m.base because + // these targets do not have full Thumb-2 support but only original Thumb-1. + // We have to cfg our code accordingly. + println!("cargo::rustc-check-cfg=cfg(thumb_1)"); + if target.triple_split[0] == "thumbv6m" || target.triple_split[0] == "thumbv8m.base" { + println!("cargo:rustc-cfg=thumb_1") + } +} + +/// Configure whether or not `f16` and `f128` support should be enabled. +pub fn configure_f16_f128(target: &Target) { + // Set whether or not `f16` and `f128` are supported at a basic level by LLVM. This only means + // that the backend will not crash when using these types and generates code that can be called + // without crashing (no infinite recursion). This does not mean that the platform doesn't have + // ABI or other bugs. + // + // We do this here rather than in `rust-lang/rust` because configuring via cargo features is + // not straightforward. + // + // Original source of this list: + // + let f16_enabled = match target.arch.as_str() { + // Unsupported + "arm64ec" => false, + // Selection failure + "s390x" => false, + // Infinite recursion + "csky" => false, + "hexagon" => false, + "powerpc" | "powerpc64" => false, + "sparc" | "sparc64" => false, + "wasm32" | "wasm64" => false, + // Most everything else works as of LLVM 19 + _ => true, + }; + + let f128_enabled = match target.arch.as_str() { + // Unsupported (libcall is not supported) + "amdgpu" => false, + // Unsupported + "arm64ec" => false, + // FIXME(llvm20): fixed by + "mips64" | "mips64r6" => false, + // Selection failure + "nvptx64" => false, + // Selection failure + "powerpc64" if &target.os == "aix" => false, + // Selection failure + "sparc" => false, + // Most everything else works as of LLVM 19 + _ => true, + }; + + // If the feature is set, disable these types. + let disable_both = env::var_os("CARGO_FEATURE_NO_F16_F128").is_some(); + + println!("cargo::rustc-check-cfg=cfg(f16_enabled)"); + println!("cargo::rustc-check-cfg=cfg(f128_enabled)"); + + if f16_enabled && !disable_both { + println!("cargo::rustc-cfg=f16_enabled"); + } + + if f128_enabled && !disable_both { + println!("cargo::rustc-cfg=f128_enabled"); + } +} diff --git a/library/compiler-builtins/compiler-builtins/src/aarch64.rs b/library/compiler-builtins/compiler-builtins/src/aarch64.rs new file mode 100644 index 0000000000000..80392187c89b4 --- /dev/null +++ b/library/compiler-builtins/compiler-builtins/src/aarch64.rs @@ -0,0 +1,21 @@ +#![allow(unused_imports)] + +use core::intrinsics; + +intrinsics! { + #[unsafe(naked)] + #[cfg(all(target_os = "uefi", not(feature = "no-asm")))] + pub unsafe extern "C" fn __chkstk() { + core::arch::naked_asm!( + ".p2align 2", + "lsl x16, x15, #4", + "mov x17, sp", + "1:", + "sub x17, x17, 4096", + "subs x16, x16, 4096", + "ldr xzr, [x17]", + "b.gt 1b", + "ret", + ); + } +} diff --git a/library/compiler-builtins/compiler-builtins/src/aarch64_linux.rs b/library/compiler-builtins/compiler-builtins/src/aarch64_linux.rs new file mode 100644 index 0000000000000..e238d0237eb31 --- /dev/null +++ b/library/compiler-builtins/compiler-builtins/src/aarch64_linux.rs @@ -0,0 +1,273 @@ +//! Aarch64 targets have two possible implementations for atomics: +//! 1. Load-Locked, Store-Conditional (LL/SC), older and slower. +//! 2. Large System Extensions (LSE), newer and faster. +//! To avoid breaking backwards compat, C toolchains introduced a concept of "outlined atomics", +//! where atomic operations call into the compiler runtime to dispatch between two depending on +//! which is supported on the current CPU. +//! See https://community.arm.com/arm-community-blogs/b/tools-software-ides-blog/posts/making-the-most-of-the-arm-architecture-in-gcc-10#:~:text=out%20of%20line%20atomics for more discussion. +//! +//! Currently we only support LL/SC, because LSE requires `getauxval` from libc in order to do runtime detection. +//! Use the `compiler-rt` intrinsics if you want LSE support. +//! +//! Ported from `aarch64/lse.S` in LLVM's compiler-rt. +//! +//! Generate functions for each of the following symbols: +//! __aarch64_casM_ORDER +//! __aarch64_swpN_ORDER +//! __aarch64_ldaddN_ORDER +//! __aarch64_ldclrN_ORDER +//! __aarch64_ldeorN_ORDER +//! __aarch64_ldsetN_ORDER +//! for N = {1, 2, 4, 8}, M = {1, 2, 4, 8, 16}, ORDER = { relax, acq, rel, acq_rel } +//! +//! The original `lse.S` has some truly horrifying code that expects to be compiled multiple times with different constants. +//! We do something similar, but with macro arguments. +#![cfg_attr(feature = "c", allow(unused_macros))] // avoid putting the macros into a submodule + +// We don't do runtime dispatch so we don't have to worry about the `__aarch64_have_lse_atomics` global ctor. + +/// Translate a byte size to a Rust type. +#[rustfmt::skip] +macro_rules! int_ty { + (1) => { i8 }; + (2) => { i16 }; + (4) => { i32 }; + (8) => { i64 }; + (16) => { i128 }; +} + +/// Given a byte size and a register number, return a register of the appropriate size. +/// +/// See . +#[rustfmt::skip] +macro_rules! reg { + (1, $num:literal) => { concat!("w", $num) }; + (2, $num:literal) => { concat!("w", $num) }; + (4, $num:literal) => { concat!("w", $num) }; + (8, $num:literal) => { concat!("x", $num) }; +} + +/// Given an atomic ordering, translate it to the acquire suffix for the lxdr aarch64 ASM instruction. +#[rustfmt::skip] +macro_rules! acquire { + (Relaxed) => { "" }; + (Acquire) => { "a" }; + (Release) => { "" }; + (AcqRel) => { "a" }; +} + +/// Given an atomic ordering, translate it to the release suffix for the stxr aarch64 ASM instruction. +#[rustfmt::skip] +macro_rules! release { + (Relaxed) => { "" }; + (Acquire) => { "" }; + (Release) => { "l" }; + (AcqRel) => { "l" }; +} + +/// Given a size in bytes, translate it to the byte suffix for an aarch64 ASM instruction. +#[rustfmt::skip] +macro_rules! size { + (1) => { "b" }; + (2) => { "h" }; + (4) => { "" }; + (8) => { "" }; + (16) => { "" }; +} + +/// Given a byte size, translate it to an Unsigned eXTend instruction +/// with the correct semantics. +/// +/// See +#[rustfmt::skip] +macro_rules! uxt { + (1) => { "uxtb" }; + (2) => { "uxth" }; + ($_:tt) => { "mov" }; +} + +/// Given an atomic ordering and byte size, translate it to a LoaD eXclusive Register instruction +/// with the correct semantics. +/// +/// See . +macro_rules! ldxr { + ($ordering:ident, $bytes:tt) => { + concat!("ld", acquire!($ordering), "xr", size!($bytes)) + }; +} + +/// Given an atomic ordering and byte size, translate it to a STore eXclusive Register instruction +/// with the correct semantics. +/// +/// See . +macro_rules! stxr { + ($ordering:ident, $bytes:tt) => { + concat!("st", release!($ordering), "xr", size!($bytes)) + }; +} + +/// Given an atomic ordering and byte size, translate it to a LoaD eXclusive Pair of registers instruction +/// with the correct semantics. +/// +/// See +macro_rules! ldxp { + ($ordering:ident) => { + concat!("ld", acquire!($ordering), "xp") + }; +} + +/// Given an atomic ordering and byte size, translate it to a STore eXclusive Pair of registers instruction +/// with the correct semantics. +/// +/// See . +macro_rules! stxp { + ($ordering:ident) => { + concat!("st", release!($ordering), "xp") + }; +} + +/// See . +macro_rules! compare_and_swap { + ($ordering:ident, $bytes:tt, $name:ident) => { + intrinsics! { + #[maybe_use_optimized_c_shim] + #[unsafe(naked)] + pub unsafe extern "C" fn $name ( + expected: int_ty!($bytes), desired: int_ty!($bytes), ptr: *mut int_ty!($bytes) + ) -> int_ty!($bytes) { + // We can't use `AtomicI8::compare_and_swap`; we *are* compare_and_swap. + core::arch::naked_asm! { + // UXT s(tmp0), s(0) + concat!(uxt!($bytes), " ", reg!($bytes, 16), ", ", reg!($bytes, 0)), + "0:", + // LDXR s(0), [x2] + concat!(ldxr!($ordering, $bytes), " ", reg!($bytes, 0), ", [x2]"), + // cmp s(0), s(tmp0) + concat!("cmp ", reg!($bytes, 0), ", ", reg!($bytes, 16)), + "bne 1f", + // STXR w(tmp1), s(1), [x2] + concat!(stxr!($ordering, $bytes), " w17, ", reg!($bytes, 1), ", [x2]"), + "cbnz w17, 0b", + "1:", + "ret", + } + } + } + }; +} + +// i128 uses a completely different impl, so it has its own macro. +macro_rules! compare_and_swap_i128 { + ($ordering:ident, $name:ident) => { + intrinsics! { + #[maybe_use_optimized_c_shim] + #[unsafe(naked)] + pub unsafe extern "C" fn $name ( + expected: i128, desired: i128, ptr: *mut i128 + ) -> i128 { + core::arch::naked_asm! { + "mov x16, x0", + "mov x17, x1", + "0:", + // LDXP x0, x1, [x4] + concat!(ldxp!($ordering), " x0, x1, [x4]"), + "cmp x0, x16", + "ccmp x1, x17, #0, eq", + "bne 1f", + // STXP w(tmp2), x2, x3, [x4] + concat!(stxp!($ordering), " w15, x2, x3, [x4]"), + "cbnz w15, 0b", + "1:", + "ret", + } + } + } + }; +} + +/// See . +macro_rules! swap { + ($ordering:ident, $bytes:tt, $name:ident) => { + intrinsics! { + #[maybe_use_optimized_c_shim] + #[unsafe(naked)] + pub unsafe extern "C" fn $name ( + left: int_ty!($bytes), right_ptr: *mut int_ty!($bytes) + ) -> int_ty!($bytes) { + core::arch::naked_asm! { + // mov s(tmp0), s(0) + concat!("mov ", reg!($bytes, 16), ", ", reg!($bytes, 0)), + "0:", + // LDXR s(0), [x1] + concat!(ldxr!($ordering, $bytes), " ", reg!($bytes, 0), ", [x1]"), + // STXR w(tmp1), s(tmp0), [x1] + concat!(stxr!($ordering, $bytes), " w17, ", reg!($bytes, 16), ", [x1]"), + "cbnz w17, 0b", + "ret", + } + } + } + }; +} + +/// See (e.g.) . +macro_rules! fetch_op { + ($ordering:ident, $bytes:tt, $name:ident, $op:literal) => { + intrinsics! { + #[maybe_use_optimized_c_shim] + #[unsafe(naked)] + pub unsafe extern "C" fn $name ( + val: int_ty!($bytes), ptr: *mut int_ty!($bytes) + ) -> int_ty!($bytes) { + core::arch::naked_asm! { + // mov s(tmp0), s(0) + concat!("mov ", reg!($bytes, 16), ", ", reg!($bytes, 0)), + "0:", + // LDXR s(0), [x1] + concat!(ldxr!($ordering, $bytes), " ", reg!($bytes, 0), ", [x1]"), + // OP s(tmp1), s(0), s(tmp0) + concat!($op, " ", reg!($bytes, 17), ", ", reg!($bytes, 0), ", ", reg!($bytes, 16)), + // STXR w(tmp2), s(tmp1), [x1] + concat!(stxr!($ordering, $bytes), " w15, ", reg!($bytes, 17), ", [x1]"), + "cbnz w15, 0b", + "ret", + } + } + } + } +} + +// We need a single macro to pass to `foreach_ldadd`. +macro_rules! add { + ($ordering:ident, $bytes:tt, $name:ident) => { + fetch_op! { $ordering, $bytes, $name, "add" } + }; +} + +macro_rules! and { + ($ordering:ident, $bytes:tt, $name:ident) => { + fetch_op! { $ordering, $bytes, $name, "bic" } + }; +} + +macro_rules! xor { + ($ordering:ident, $bytes:tt, $name:ident) => { + fetch_op! { $ordering, $bytes, $name, "eor" } + }; +} + +macro_rules! or { + ($ordering:ident, $bytes:tt, $name:ident) => { + fetch_op! { $ordering, $bytes, $name, "orr" } + }; +} + +// See `generate_aarch64_outlined_atomics` in build.rs. +include!(concat!(env!("OUT_DIR"), "/outlined_atomics.rs")); +foreach_cas!(compare_and_swap); +foreach_cas16!(compare_and_swap_i128); +foreach_swp!(swap); +foreach_ldadd!(add); +foreach_ldclr!(and); +foreach_ldeor!(xor); +foreach_ldset!(or); diff --git a/library/compiler-builtins/compiler-builtins/src/arm.rs b/library/compiler-builtins/compiler-builtins/src/arm.rs new file mode 100644 index 0000000000000..a9107e3cdfda4 --- /dev/null +++ b/library/compiler-builtins/compiler-builtins/src/arm.rs @@ -0,0 +1,280 @@ +#![cfg(not(feature = "no-asm"))] + +// Interfaces used by naked trampolines. +extern "C" { + fn __udivmodsi4(a: u32, b: u32, rem: *mut u32) -> u32; + fn __udivmoddi4(a: u64, b: u64, rem: *mut u64) -> u64; + fn __divmoddi4(a: i64, b: i64, rem: *mut i64) -> i64; +} + +extern "aapcs" { + // AAPCS is not always the correct ABI for these intrinsics, but we only use this to + // forward another `__aeabi_` call so it doesn't matter. + fn __aeabi_idiv(a: i32, b: i32) -> i32; +} + +intrinsics! { + // NOTE This function and the ones below are implemented using assembly because they are using a + // custom calling convention which can't be implemented using a normal Rust function. + #[unsafe(naked)] + #[cfg(not(target_env = "msvc"))] + pub unsafe extern "C" fn __aeabi_uidivmod() { + core::arch::naked_asm!( + "push {{lr}}", + "sub sp, sp, #4", + "mov r2, sp", + "bl {trampoline}", + "ldr r1, [sp]", + "add sp, sp, #4", + "pop {{pc}}", + trampoline = sym crate::arm::__udivmodsi4 + ); + } + + #[unsafe(naked)] + pub unsafe extern "C" fn __aeabi_uldivmod() { + core::arch::naked_asm!( + "push {{r4, lr}}", + "sub sp, sp, #16", + "add r4, sp, #8", + "str r4, [sp]", + "bl {trampoline}", + "ldr r2, [sp, #8]", + "ldr r3, [sp, #12]", + "add sp, sp, #16", + "pop {{r4, pc}}", + trampoline = sym crate::arm::__udivmoddi4 + ); + } + + #[unsafe(naked)] + pub unsafe extern "C" fn __aeabi_idivmod() { + core::arch::naked_asm!( + "push {{r0, r1, r4, lr}}", + "bl {trampoline}", + "pop {{r1, r2}}", + "muls r2, r2, r0", + "subs r1, r1, r2", + "pop {{r4, pc}}", + trampoline = sym crate::arm::__aeabi_idiv, + ); + } + + #[unsafe(naked)] + pub unsafe extern "C" fn __aeabi_ldivmod() { + core::arch::naked_asm!( + "push {{r4, lr}}", + "sub sp, sp, #16", + "add r4, sp, #8", + "str r4, [sp]", + "bl {trampoline}", + "ldr r2, [sp, #8]", + "ldr r3, [sp, #12]", + "add sp, sp, #16", + "pop {{r4, pc}}", + trampoline = sym crate::arm::__divmoddi4, + ); + } + + // FIXME(arm): The `*4` and `*8` variants should be defined as aliases. + + /// `memcpy` provided with the `aapcs` ABI. + /// + /// # Safety + /// + /// Usual `memcpy` requirements apply. + #[cfg(not(target_vendor = "apple"))] + pub unsafe extern "aapcs" fn __aeabi_memcpy(dst: *mut u8, src: *const u8, n: usize) { + // SAFETY: memcpy preconditions apply. + unsafe { crate::mem::memcpy(dst, src, n) }; + } + + /// `memcpy` for 4-byte alignment. + /// + /// # Safety + /// + /// Usual `memcpy` requirements apply. Additionally, `dest` and `src` must be aligned to + /// four bytes. + #[cfg(not(target_vendor = "apple"))] + pub unsafe extern "aapcs" fn __aeabi_memcpy4(dst: *mut u8, src: *const u8, n: usize) { + // We are guaranteed 4-alignment, so accessing at u32 is okay. + let mut dst = dst.cast::(); + let mut src = src.cast::(); + debug_assert!(dst.is_aligned()); + debug_assert!(src.is_aligned()); + let mut n = n; + + while n >= 4 { + // SAFETY: `dst` and `src` are both valid for at least 4 bytes, from + // `memcpy` preconditions and the loop guard. + unsafe { *dst = *src }; + + // FIXME(addr): if we can make this end-of-address-space safe without losing + // performance, we may want to consider that. + // SAFETY: memcpy is not expected to work at the end of the address space + unsafe { + dst = dst.offset(1); + src = src.offset(1); + } + + n -= 4; + } + + // SAFETY: `dst` and `src` will still be valid for `n` bytes + unsafe { __aeabi_memcpy(dst.cast::(), src.cast::(), n) }; + } + + /// `memcpy` for 8-byte alignment. + /// + /// # Safety + /// + /// Usual `memcpy` requirements apply. Additionally, `dest` and `src` must be aligned to + /// eight bytes. + #[cfg(not(target_vendor = "apple"))] + pub unsafe extern "aapcs" fn __aeabi_memcpy8(dst: *mut u8, src: *const u8, n: usize) { + debug_assert!(dst.addr() & 7 == 0); + debug_assert!(src.addr() & 7 == 0); + + // SAFETY: memcpy preconditions apply, less strict alignment. + unsafe { __aeabi_memcpy4(dst, src, n) }; + } + + /// `memmove` provided with the `aapcs` ABI. + /// + /// # Safety + /// + /// Usual `memmove` requirements apply. + #[cfg(not(target_vendor = "apple"))] + pub unsafe extern "aapcs" fn __aeabi_memmove(dst: *mut u8, src: *const u8, n: usize) { + // SAFETY: memmove preconditions apply. + unsafe { crate::mem::memmove(dst, src, n) }; + } + + /// `memmove` for 4-byte alignment. + /// + /// # Safety + /// + /// Usual `memmove` requirements apply. Additionally, `dest` and `src` must be aligned to + /// four bytes. + #[cfg(not(any(target_vendor = "apple", target_env = "msvc")))] + pub unsafe extern "aapcs" fn __aeabi_memmove4(dst: *mut u8, src: *const u8, n: usize) { + debug_assert!(dst.addr() & 3 == 0); + debug_assert!(src.addr() & 3 == 0); + + // SAFETY: same preconditions, less strict aligment. + unsafe { __aeabi_memmove(dst, src, n) }; + } + + /// `memmove` for 8-byte alignment. + /// + /// # Safety + /// + /// Usual `memmove` requirements apply. Additionally, `dst` and `src` must be aligned to + /// eight bytes. + #[cfg(not(any(target_vendor = "apple", target_env = "msvc")))] + pub unsafe extern "aapcs" fn __aeabi_memmove8(dst: *mut u8, src: *const u8, n: usize) { + debug_assert!(dst.addr() & 7 == 0); + debug_assert!(src.addr() & 7 == 0); + + // SAFETY: memmove preconditions apply, less strict alignment. + unsafe { __aeabi_memmove(dst, src, n) }; + } + + /// `memset` provided with the `aapcs` ABI. + /// + /// # Safety + /// + /// Usual `memset` requirements apply. + #[cfg(not(target_vendor = "apple"))] + pub unsafe extern "aapcs" fn __aeabi_memset(dst: *mut u8, n: usize, c: i32) { + // Note the different argument order + // SAFETY: memset preconditions apply. + unsafe { crate::mem::memset(dst, c, n) }; + } + + /// `memset` for 4-byte alignment. + /// + /// # Safety + /// + /// Usual `memset` requirements apply. Additionally, `dest` and `src` must be aligned to + /// four bytes. + #[cfg(not(target_vendor = "apple"))] + pub unsafe extern "aapcs" fn __aeabi_memset4(dst: *mut u8, n: usize, c: i32) { + let mut dst = dst.cast::(); + debug_assert!(dst.is_aligned()); + let mut n = n; + + let byte = (c as u32) & 0xff; + let c = (byte << 24) | (byte << 16) | (byte << 8) | byte; + + while n >= 4 { + // SAFETY: `dst` is valid for at least 4 bytes, from `memset` preconditions and + // the loop guard. + unsafe { *dst = c }; + + // FIXME(addr): if we can make this end-of-address-space safe without losing + // performance, we may want to consider that. + // SAFETY: memcpy is not expected to work at the end of the address space + unsafe { + dst = dst.offset(1); + } + n -= 4; + } + + // SAFETY: `dst` will still be valid for `n` bytes + unsafe { __aeabi_memset(dst.cast::(), n, byte as i32) }; + } + + /// `memset` for 8-byte alignment. + /// + /// # Safety + /// + /// Usual `memset` requirements apply. Additionally, `dst` and `src` must be aligned to + /// eight bytes. + #[cfg(not(target_vendor = "apple"))] + pub unsafe extern "aapcs" fn __aeabi_memset8(dst: *mut u8, n: usize, c: i32) { + debug_assert!(dst.addr() & 7 == 0); + + // SAFETY: memset preconditions apply, less strict alignment. + unsafe { __aeabi_memset4(dst, n, c) }; + } + + /// `memclr` provided with the `aapcs` ABI. + /// + /// # Safety + /// + /// Usual `memclr` requirements apply. + #[cfg(not(target_vendor = "apple"))] + pub unsafe extern "aapcs" fn __aeabi_memclr(dst: *mut u8, n: usize) { + // SAFETY: memclr preconditions apply, less strict alignment. + unsafe { __aeabi_memset(dst, n, 0) }; + } + + /// `memclr` for 4-byte alignment. + /// + /// # Safety + /// + /// Usual `memclr` requirements apply. Additionally, `dest` and `src` must be aligned to + /// four bytes. + #[cfg(not(any(target_vendor = "apple", target_env = "msvc")))] + pub unsafe extern "aapcs" fn __aeabi_memclr4(dst: *mut u8, n: usize) { + debug_assert!(dst.addr() & 3 == 0); + + // SAFETY: memclr preconditions apply, less strict alignment. + unsafe { __aeabi_memset4(dst, n, 0) }; + } + + /// `memclr` for 8-byte alignment. + /// + /// # Safety + /// + /// Usual `memclr` requirements apply. Additionally, `dst` and `src` must be aligned to + /// eight bytes. + #[cfg(not(any(target_vendor = "apple", target_env = "msvc")))] + pub unsafe extern "aapcs" fn __aeabi_memclr8(dst: *mut u8, n: usize) { + debug_assert!(dst.addr() & 7 == 0); + + // SAFETY: memclr preconditions apply, less strict alignment. + unsafe { __aeabi_memset4(dst, n, 0) }; + } +} diff --git a/library/compiler-builtins/compiler-builtins/src/arm_linux.rs b/library/compiler-builtins/compiler-builtins/src/arm_linux.rs new file mode 100644 index 0000000000000..6ce67ba719c96 --- /dev/null +++ b/library/compiler-builtins/compiler-builtins/src/arm_linux.rs @@ -0,0 +1,290 @@ +use core::sync::atomic::{AtomicU32, Ordering}; +use core::{arch, mem}; + +// Kernel-provided user-mode helper functions: +// https://www.kernel.org/doc/Documentation/arm/kernel_user_helpers.txt +unsafe fn __kuser_cmpxchg(oldval: u32, newval: u32, ptr: *mut u32) -> bool { + let f: extern "C" fn(u32, u32, *mut u32) -> u32 = mem::transmute(0xffff0fc0usize as *const ()); + f(oldval, newval, ptr) == 0 +} + +unsafe fn __kuser_memory_barrier() { + let f: extern "C" fn() = mem::transmute(0xffff0fa0usize as *const ()); + f(); +} + +// Word-align a pointer +fn align_ptr(ptr: *mut T) -> *mut u32 { + // This gives us a mask of 0 when T == u32 since the pointer is already + // supposed to be aligned, which avoids any masking in that case. + let ptr_mask = 3 & (4 - mem::size_of::()); + (ptr as usize & !ptr_mask) as *mut u32 +} + +// Calculate the shift and mask of a value inside an aligned word +fn get_shift_mask(ptr: *mut T) -> (u32, u32) { + // Mask to get the low byte/halfword/word + let mask = match mem::size_of::() { + 1 => 0xff, + 2 => 0xffff, + 4 => 0xffffffff, + _ => unreachable!(), + }; + + // If we are on big-endian then we need to adjust the shift accordingly + let endian_adjust = if cfg!(target_endian = "little") { + 0 + } else { + 4 - mem::size_of::() as u32 + }; + + // Shift to get the desired element in the word + let ptr_mask = 3 & (4 - mem::size_of::()); + let shift = ((ptr as usize & ptr_mask) as u32 ^ endian_adjust) * 8; + + (shift, mask) +} + +// Extract a value from an aligned word +fn extract_aligned(aligned: u32, shift: u32, mask: u32) -> u32 { + (aligned >> shift) & mask +} + +// Insert a value into an aligned word +fn insert_aligned(aligned: u32, val: u32, shift: u32, mask: u32) -> u32 { + (aligned & !(mask << shift)) | ((val & mask) << shift) +} + +/// Performs a relaxed atomic load of 4 bytes at `ptr`. Some of the bytes are allowed to be out of +/// bounds as long as `size_of::()` bytes are in bounds. +/// +/// # Safety +/// +/// - `ptr` must be 4-aligned. +/// - `size_of::()` must be at most 4. +/// - if `size_of::() == 1`, `ptr` or `ptr` offset by 1, 2 or 3 bytes must be valid for a relaxed +/// atomic read of 1 byte. +/// - if `size_of::() == 2`, `ptr` or `ptr` offset by 2 bytes must be valid for a relaxed atomic +/// read of 2 bytes. +/// - if `size_of::() == 4`, `ptr` must be valid for a relaxed atomic read of 4 bytes. +unsafe fn atomic_load_aligned(ptr: *mut u32) -> u32 { + if mem::size_of::() == 4 { + // SAFETY: As `T` has a size of 4, the caller garantees this is sound. + unsafe { AtomicU32::from_ptr(ptr).load(Ordering::Relaxed) } + } else { + // SAFETY: + // As all 4 bytes pointed to by `ptr` might not be dereferenceable due to being out of + // bounds when doing atomic operations on a `u8`/`i8`/`u16`/`i16`, inline ASM is used to + // avoid causing undefined behaviour. However, as `ptr` is 4-aligned and at least 1 byte of + // `ptr` is dereferencable, the load won't cause a segfault as the page size is always + // larger than 4 bytes. + // The `ldr` instruction does not touch the stack or flags, or write to memory, so + // `nostack`, `preserves_flags` and `readonly` are sound. The caller garantees that `ptr` is + // 4-aligned, as required by `ldr`. + unsafe { + let res: u32; + arch::asm!( + "ldr {res}, [{ptr}]", + ptr = in(reg) ptr, + res = lateout(reg) res, + options(nostack, preserves_flags, readonly) + ); + res + } + } +} + +// Generic atomic read-modify-write operation +unsafe fn atomic_rmw u32, G: Fn(u32, u32) -> u32>(ptr: *mut T, f: F, g: G) -> u32 { + let aligned_ptr = align_ptr(ptr); + let (shift, mask) = get_shift_mask(ptr); + + loop { + let curval_aligned = atomic_load_aligned::(aligned_ptr); + let curval = extract_aligned(curval_aligned, shift, mask); + let newval = f(curval); + let newval_aligned = insert_aligned(curval_aligned, newval, shift, mask); + if __kuser_cmpxchg(curval_aligned, newval_aligned, aligned_ptr) { + return g(curval, newval); + } + } +} + +// Generic atomic compare-exchange operation +unsafe fn atomic_cmpxchg(ptr: *mut T, oldval: u32, newval: u32) -> u32 { + let aligned_ptr = align_ptr(ptr); + let (shift, mask) = get_shift_mask(ptr); + + loop { + let curval_aligned = atomic_load_aligned::(aligned_ptr); + let curval = extract_aligned(curval_aligned, shift, mask); + if curval != oldval { + return curval; + } + let newval_aligned = insert_aligned(curval_aligned, newval, shift, mask); + if __kuser_cmpxchg(curval_aligned, newval_aligned, aligned_ptr) { + return oldval; + } + } +} + +macro_rules! atomic_rmw { + ($name:ident, $ty:ty, $op:expr, $fetch:expr) => { + intrinsics! { + pub unsafe extern "C" fn $name(ptr: *mut $ty, val: $ty) -> $ty { + atomic_rmw(ptr, |x| $op(x as $ty, val) as u32, |old, new| $fetch(old, new)) as $ty + } + } + }; + + (@old $name:ident, $ty:ty, $op:expr) => { + atomic_rmw!($name, $ty, $op, |old, _| old); + }; + + (@new $name:ident, $ty:ty, $op:expr) => { + atomic_rmw!($name, $ty, $op, |_, new| new); + }; +} +macro_rules! atomic_cmpxchg { + ($name:ident, $ty:ty) => { + intrinsics! { + pub unsafe extern "C" fn $name(ptr: *mut $ty, oldval: $ty, newval: $ty) -> $ty { + atomic_cmpxchg(ptr, oldval as u32, newval as u32) as $ty + } + } + }; +} + +atomic_rmw!(@old __sync_fetch_and_add_1, u8, |a: u8, b: u8| a.wrapping_add(b)); +atomic_rmw!(@old __sync_fetch_and_add_2, u16, |a: u16, b: u16| a + .wrapping_add(b)); +atomic_rmw!(@old __sync_fetch_and_add_4, u32, |a: u32, b: u32| a + .wrapping_add(b)); + +atomic_rmw!(@new __sync_add_and_fetch_1, u8, |a: u8, b: u8| a.wrapping_add(b)); +atomic_rmw!(@new __sync_add_and_fetch_2, u16, |a: u16, b: u16| a + .wrapping_add(b)); +atomic_rmw!(@new __sync_add_and_fetch_4, u32, |a: u32, b: u32| a + .wrapping_add(b)); + +atomic_rmw!(@old __sync_fetch_and_sub_1, u8, |a: u8, b: u8| a.wrapping_sub(b)); +atomic_rmw!(@old __sync_fetch_and_sub_2, u16, |a: u16, b: u16| a + .wrapping_sub(b)); +atomic_rmw!(@old __sync_fetch_and_sub_4, u32, |a: u32, b: u32| a + .wrapping_sub(b)); + +atomic_rmw!(@new __sync_sub_and_fetch_1, u8, |a: u8, b: u8| a.wrapping_sub(b)); +atomic_rmw!(@new __sync_sub_and_fetch_2, u16, |a: u16, b: u16| a + .wrapping_sub(b)); +atomic_rmw!(@new __sync_sub_and_fetch_4, u32, |a: u32, b: u32| a + .wrapping_sub(b)); + +atomic_rmw!(@old __sync_fetch_and_and_1, u8, |a: u8, b: u8| a & b); +atomic_rmw!(@old __sync_fetch_and_and_2, u16, |a: u16, b: u16| a & b); +atomic_rmw!(@old __sync_fetch_and_and_4, u32, |a: u32, b: u32| a & b); + +atomic_rmw!(@new __sync_and_and_fetch_1, u8, |a: u8, b: u8| a & b); +atomic_rmw!(@new __sync_and_and_fetch_2, u16, |a: u16, b: u16| a & b); +atomic_rmw!(@new __sync_and_and_fetch_4, u32, |a: u32, b: u32| a & b); + +atomic_rmw!(@old __sync_fetch_and_or_1, u8, |a: u8, b: u8| a | b); +atomic_rmw!(@old __sync_fetch_and_or_2, u16, |a: u16, b: u16| a | b); +atomic_rmw!(@old __sync_fetch_and_or_4, u32, |a: u32, b: u32| a | b); + +atomic_rmw!(@new __sync_or_and_fetch_1, u8, |a: u8, b: u8| a | b); +atomic_rmw!(@new __sync_or_and_fetch_2, u16, |a: u16, b: u16| a | b); +atomic_rmw!(@new __sync_or_and_fetch_4, u32, |a: u32, b: u32| a | b); + +atomic_rmw!(@old __sync_fetch_and_xor_1, u8, |a: u8, b: u8| a ^ b); +atomic_rmw!(@old __sync_fetch_and_xor_2, u16, |a: u16, b: u16| a ^ b); +atomic_rmw!(@old __sync_fetch_and_xor_4, u32, |a: u32, b: u32| a ^ b); + +atomic_rmw!(@new __sync_xor_and_fetch_1, u8, |a: u8, b: u8| a ^ b); +atomic_rmw!(@new __sync_xor_and_fetch_2, u16, |a: u16, b: u16| a ^ b); +atomic_rmw!(@new __sync_xor_and_fetch_4, u32, |a: u32, b: u32| a ^ b); + +atomic_rmw!(@old __sync_fetch_and_nand_1, u8, |a: u8, b: u8| !(a & b)); +atomic_rmw!(@old __sync_fetch_and_nand_2, u16, |a: u16, b: u16| !(a & b)); +atomic_rmw!(@old __sync_fetch_and_nand_4, u32, |a: u32, b: u32| !(a & b)); + +atomic_rmw!(@new __sync_nand_and_fetch_1, u8, |a: u8, b: u8| !(a & b)); +atomic_rmw!(@new __sync_nand_and_fetch_2, u16, |a: u16, b: u16| !(a & b)); +atomic_rmw!(@new __sync_nand_and_fetch_4, u32, |a: u32, b: u32| !(a & b)); + +atomic_rmw!(@old __sync_fetch_and_max_1, i8, |a: i8, b: i8| if a > b { + a +} else { + b +}); +atomic_rmw!(@old __sync_fetch_and_max_2, i16, |a: i16, b: i16| if a > b { + a +} else { + b +}); +atomic_rmw!(@old __sync_fetch_and_max_4, i32, |a: i32, b: i32| if a > b { + a +} else { + b +}); + +atomic_rmw!(@old __sync_fetch_and_umax_1, u8, |a: u8, b: u8| if a > b { + a +} else { + b +}); +atomic_rmw!(@old __sync_fetch_and_umax_2, u16, |a: u16, b: u16| if a > b { + a +} else { + b +}); +atomic_rmw!(@old __sync_fetch_and_umax_4, u32, |a: u32, b: u32| if a > b { + a +} else { + b +}); + +atomic_rmw!(@old __sync_fetch_and_min_1, i8, |a: i8, b: i8| if a < b { + a +} else { + b +}); +atomic_rmw!(@old __sync_fetch_and_min_2, i16, |a: i16, b: i16| if a < b { + a +} else { + b +}); +atomic_rmw!(@old __sync_fetch_and_min_4, i32, |a: i32, b: i32| if a < b { + a +} else { + b +}); + +atomic_rmw!(@old __sync_fetch_and_umin_1, u8, |a: u8, b: u8| if a < b { + a +} else { + b +}); +atomic_rmw!(@old __sync_fetch_and_umin_2, u16, |a: u16, b: u16| if a < b { + a +} else { + b +}); +atomic_rmw!(@old __sync_fetch_and_umin_4, u32, |a: u32, b: u32| if a < b { + a +} else { + b +}); + +atomic_rmw!(@old __sync_lock_test_and_set_1, u8, |_: u8, b: u8| b); +atomic_rmw!(@old __sync_lock_test_and_set_2, u16, |_: u16, b: u16| b); +atomic_rmw!(@old __sync_lock_test_and_set_4, u32, |_: u32, b: u32| b); + +atomic_cmpxchg!(__sync_val_compare_and_swap_1, u8); +atomic_cmpxchg!(__sync_val_compare_and_swap_2, u16); +atomic_cmpxchg!(__sync_val_compare_and_swap_4, u32); + +intrinsics! { + pub unsafe extern "C" fn __sync_synchronize() { + __kuser_memory_barrier(); + } +} diff --git a/library/compiler-builtins/compiler-builtins/src/avr.rs b/library/compiler-builtins/compiler-builtins/src/avr.rs new file mode 100644 index 0000000000000..359a1d1acc1a7 --- /dev/null +++ b/library/compiler-builtins/compiler-builtins/src/avr.rs @@ -0,0 +1,23 @@ +intrinsics! { + pub unsafe extern "C" fn abort() -> ! { + // On AVRs, an architecture that doesn't support traps, unreachable code + // paths get lowered into calls to `abort`: + // + // https://github.com/llvm/llvm-project/blob/cbe8f3ad7621e402b050e768f400ff0d19c3aedd/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp#L4462 + // + // When control gets here, it means that either core::intrinsics::abort() + // was called or an undefined bebavior has occurred, so there's not that + // much we can do to recover - we can't `panic!()`, because for all we + // know the environment is gone now, so panicking might end up with us + // getting back to this very function. + // + // So let's do the next best thing, loop. + // + // Alternatively we could (try to) restart the program, but since + // undefined behavior is undefined, there's really no obligation for us + // to do anything here - for all we care, we could just set the chip on + // fire; but that'd be bad for the environment. + + loop {} + } +} diff --git a/library/compiler-builtins/compiler-builtins/src/float/add.rs b/library/compiler-builtins/compiler-builtins/src/float/add.rs new file mode 100644 index 0000000000000..0426c9cc44fbb --- /dev/null +++ b/library/compiler-builtins/compiler-builtins/src/float/add.rs @@ -0,0 +1,209 @@ +use crate::float::Float; +use crate::int::{CastInto, Int, MinInt}; + +/// Returns `a + b` +fn add(a: F, b: F) -> F +where + u32: CastInto, + F::Int: CastInto, + i32: CastInto, + F::Int: CastInto, +{ + let one = F::Int::ONE; + let zero = F::Int::ZERO; + + let bits = F::BITS.cast(); + let significand_bits = F::SIG_BITS; + let max_exponent = F::EXP_SAT; + + let implicit_bit = F::IMPLICIT_BIT; + let significand_mask = F::SIG_MASK; + let sign_bit = F::SIGN_MASK as F::Int; + let abs_mask = sign_bit - one; + let exponent_mask = F::EXP_MASK; + let inf_rep = exponent_mask; + let quiet_bit = implicit_bit >> 1; + let qnan_rep = exponent_mask | quiet_bit; + + let mut a_rep = a.to_bits(); + let mut b_rep = b.to_bits(); + let a_abs = a_rep & abs_mask; + let b_abs = b_rep & abs_mask; + + // Detect if a or b is zero, infinity, or NaN. + if a_abs.wrapping_sub(one) >= inf_rep - one || b_abs.wrapping_sub(one) >= inf_rep - one { + // NaN + anything = qNaN + if a_abs > inf_rep { + return F::from_bits(a_abs | quiet_bit); + } + // anything + NaN = qNaN + if b_abs > inf_rep { + return F::from_bits(b_abs | quiet_bit); + } + + if a_abs == inf_rep { + // +/-infinity + -/+infinity = qNaN + if (a.to_bits() ^ b.to_bits()) == sign_bit { + return F::from_bits(qnan_rep); + } else { + // +/-infinity + anything remaining = +/- infinity + return a; + } + } + + // anything remaining + +/-infinity = +/-infinity + if b_abs == inf_rep { + return b; + } + + // zero + anything = anything + if a_abs == MinInt::ZERO { + // but we need to get the sign right for zero + zero + if b_abs == MinInt::ZERO { + return F::from_bits(a.to_bits() & b.to_bits()); + } else { + return b; + } + } + + // anything + zero = anything + if b_abs == MinInt::ZERO { + return a; + } + } + + // Swap a and b if necessary so that a has the larger absolute value. + if b_abs > a_abs { + // Don't use mem::swap because it may generate references to memcpy in unoptimized code. + let tmp = a_rep; + a_rep = b_rep; + b_rep = tmp; + } + + // Extract the exponent and significand from the (possibly swapped) a and b. + let mut a_exponent: i32 = ((a_rep & exponent_mask) >> significand_bits).cast(); + let mut b_exponent: i32 = ((b_rep & exponent_mask) >> significand_bits).cast(); + let mut a_significand = a_rep & significand_mask; + let mut b_significand = b_rep & significand_mask; + + // normalize any denormals, and adjust the exponent accordingly. + if a_exponent == 0 { + let (exponent, significand) = F::normalize(a_significand); + a_exponent = exponent; + a_significand = significand; + } + if b_exponent == 0 { + let (exponent, significand) = F::normalize(b_significand); + b_exponent = exponent; + b_significand = significand; + } + + // The sign of the result is the sign of the larger operand, a. If they + // have opposite signs, we are performing a subtraction; otherwise addition. + let result_sign = a_rep & sign_bit; + let subtraction = ((a_rep ^ b_rep) & sign_bit) != zero; + + // Shift the significands to give us round, guard and sticky, and or in the + // implicit significand bit. (If we fell through from the denormal path it + // was already set by normalize(), but setting it twice won't hurt + // anything.) + a_significand = (a_significand | implicit_bit) << 3; + b_significand = (b_significand | implicit_bit) << 3; + + // Shift the significand of b by the difference in exponents, with a sticky + // bottom bit to get rounding correct. + let align = a_exponent.wrapping_sub(b_exponent).cast(); + if align != MinInt::ZERO { + if align < bits { + let sticky = + F::Int::from_bool(b_significand << bits.wrapping_sub(align).cast() != MinInt::ZERO); + b_significand = (b_significand >> align.cast()) | sticky; + } else { + b_significand = one; // sticky; b is known to be non-zero. + } + } + if subtraction { + a_significand = a_significand.wrapping_sub(b_significand); + // If a == -b, return +zero. + if a_significand == MinInt::ZERO { + return F::from_bits(MinInt::ZERO); + } + + // If partial cancellation occured, we need to left-shift the result + // and adjust the exponent: + if a_significand < implicit_bit << 3 { + let shift = + a_significand.leading_zeros() as i32 - (implicit_bit << 3).leading_zeros() as i32; + a_significand <<= shift; + a_exponent -= shift; + } + } else { + // addition + a_significand += b_significand; + + // If the addition carried up, we need to right-shift the result and + // adjust the exponent: + if a_significand & (implicit_bit << 4) != MinInt::ZERO { + let sticky = F::Int::from_bool(a_significand & one != MinInt::ZERO); + a_significand = (a_significand >> 1) | sticky; + a_exponent += 1; + } + } + + // If we have overflowed the type, return +/- infinity: + if a_exponent >= max_exponent as i32 { + return F::from_bits(inf_rep | result_sign); + } + + if a_exponent <= 0 { + // Result is denormal before rounding; the exponent is zero and we + // need to shift the significand. + let shift = (1 - a_exponent).cast(); + let sticky = + F::Int::from_bool((a_significand << bits.wrapping_sub(shift).cast()) != MinInt::ZERO); + a_significand = (a_significand >> shift.cast()) | sticky; + a_exponent = 0; + } + + // Low three bits are round, guard, and sticky. + let a_significand_i32: i32 = a_significand.cast(); + let round_guard_sticky: i32 = a_significand_i32 & 0x7; + + // Shift the significand into place, and mask off the implicit bit. + let mut result = (a_significand >> 3) & significand_mask; + + // Insert the exponent and sign. + result |= a_exponent.cast() << significand_bits; + result |= result_sign; + + // Final rounding. The result may overflow to infinity, but that is the + // correct result in that case. + if round_guard_sticky > 0x4 { + result += one; + } + if round_guard_sticky == 0x4 { + result += result & one; + } + + F::from_bits(result) +} + +intrinsics! { + #[aapcs_on_arm] + #[arm_aeabi_alias = __aeabi_fadd] + pub extern "C" fn __addsf3(a: f32, b: f32) -> f32 { + add(a, b) + } + + #[aapcs_on_arm] + #[arm_aeabi_alias = __aeabi_dadd] + pub extern "C" fn __adddf3(a: f64, b: f64) -> f64 { + add(a, b) + } + + #[ppc_alias = __addkf3] + #[cfg(f128_enabled)] + pub extern "C" fn __addtf3(a: f128, b: f128) -> f128 { + add(a, b) + } +} diff --git a/library/compiler-builtins/compiler-builtins/src/float/cmp.rs b/library/compiler-builtins/compiler-builtins/src/float/cmp.rs new file mode 100644 index 0000000000000..296952821cb46 --- /dev/null +++ b/library/compiler-builtins/compiler-builtins/src/float/cmp.rs @@ -0,0 +1,248 @@ +#![allow(unreachable_code)] + +use crate::float::Float; +use crate::int::MinInt; + +// https://github.com/llvm/llvm-project/blob/1e6ba3cd2fe96be00b6ed6ba28b3d9f9271d784d/compiler-rt/lib/builtins/fp_compare_impl.inc#L22 +#[cfg(target_arch = "avr")] +pub type CmpResult = i8; + +// https://github.com/llvm/llvm-project/blob/1e6ba3cd2fe96be00b6ed6ba28b3d9f9271d784d/compiler-rt/lib/builtins/fp_compare_impl.inc#L25 +#[cfg(not(target_arch = "avr"))] +pub type CmpResult = i32; + +#[derive(Clone, Copy)] +enum Result { + Less, + Equal, + Greater, + Unordered, +} + +impl Result { + fn to_le_abi(self) -> CmpResult { + match self { + Result::Less => -1, + Result::Equal => 0, + Result::Greater => 1, + Result::Unordered => 1, + } + } + + fn to_ge_abi(self) -> CmpResult { + match self { + Result::Less => -1, + Result::Equal => 0, + Result::Greater => 1, + Result::Unordered => -1, + } + } +} + +fn cmp(a: F, b: F) -> Result { + let one = F::Int::ONE; + let zero = F::Int::ZERO; + let szero = F::SignedInt::ZERO; + + let sign_bit = F::SIGN_MASK as F::Int; + let abs_mask = sign_bit - one; + let exponent_mask = F::EXP_MASK; + let inf_rep = exponent_mask; + + let a_rep = a.to_bits(); + let b_rep = b.to_bits(); + let a_abs = a_rep & abs_mask; + let b_abs = b_rep & abs_mask; + + // If either a or b is NaN, they are unordered. + if a_abs > inf_rep || b_abs > inf_rep { + return Result::Unordered; + } + + // If a and b are both zeros, they are equal. + if a_abs | b_abs == zero { + return Result::Equal; + } + + let a_srep = a.to_bits_signed(); + let b_srep = b.to_bits_signed(); + + // If at least one of a and b is positive, we get the same result comparing + // a and b as signed integers as we would with a fp_ting-point compare. + if a_srep & b_srep >= szero { + if a_srep < b_srep { + Result::Less + } else if a_srep == b_srep { + Result::Equal + } else { + Result::Greater + } + // Otherwise, both are negative, so we need to flip the sense of the + // comparison to get the correct result. (This assumes a twos- or ones- + // complement integer representation; if integers are represented in a + // sign-magnitude representation, then this flip is incorrect). + } else if a_srep > b_srep { + Result::Less + } else if a_srep == b_srep { + Result::Equal + } else { + Result::Greater + } +} + +fn unord(a: F, b: F) -> bool { + let one = F::Int::ONE; + + let sign_bit = F::SIGN_MASK as F::Int; + let abs_mask = sign_bit - one; + let exponent_mask = F::EXP_MASK; + let inf_rep = exponent_mask; + + let a_rep = a.to_bits(); + let b_rep = b.to_bits(); + let a_abs = a_rep & abs_mask; + let b_abs = b_rep & abs_mask; + + a_abs > inf_rep || b_abs > inf_rep +} + +intrinsics! { + pub extern "C" fn __lesf2(a: f32, b: f32) -> crate::float::cmp::CmpResult { + cmp(a, b).to_le_abi() + } + + pub extern "C" fn __gesf2(a: f32, b: f32) -> crate::float::cmp::CmpResult { + cmp(a, b).to_ge_abi() + } + + #[arm_aeabi_alias = __aeabi_fcmpun] + pub extern "C" fn __unordsf2(a: f32, b: f32) -> crate::float::cmp::CmpResult { + unord(a, b) as crate::float::cmp::CmpResult + } + + pub extern "C" fn __eqsf2(a: f32, b: f32) -> crate::float::cmp::CmpResult { + cmp(a, b).to_le_abi() + } + + pub extern "C" fn __ltsf2(a: f32, b: f32) -> crate::float::cmp::CmpResult { + cmp(a, b).to_le_abi() + } + + pub extern "C" fn __nesf2(a: f32, b: f32) -> crate::float::cmp::CmpResult { + cmp(a, b).to_le_abi() + } + + pub extern "C" fn __gtsf2(a: f32, b: f32) -> crate::float::cmp::CmpResult { + cmp(a, b).to_ge_abi() + } + + pub extern "C" fn __ledf2(a: f64, b: f64) -> crate::float::cmp::CmpResult { + cmp(a, b).to_le_abi() + } + + pub extern "C" fn __gedf2(a: f64, b: f64) -> crate::float::cmp::CmpResult { + cmp(a, b).to_ge_abi() + } + + #[arm_aeabi_alias = __aeabi_dcmpun] + pub extern "C" fn __unorddf2(a: f64, b: f64) -> crate::float::cmp::CmpResult { + unord(a, b) as crate::float::cmp::CmpResult + } + + pub extern "C" fn __eqdf2(a: f64, b: f64) -> crate::float::cmp::CmpResult { + cmp(a, b).to_le_abi() + } + + pub extern "C" fn __ltdf2(a: f64, b: f64) -> crate::float::cmp::CmpResult { + cmp(a, b).to_le_abi() + } + + pub extern "C" fn __nedf2(a: f64, b: f64) -> crate::float::cmp::CmpResult { + cmp(a, b).to_le_abi() + } + + pub extern "C" fn __gtdf2(a: f64, b: f64) -> crate::float::cmp::CmpResult { + cmp(a, b).to_ge_abi() + } +} + +#[cfg(f128_enabled)] +intrinsics! { + #[ppc_alias = __lekf2] + pub extern "C" fn __letf2(a: f128, b: f128) -> crate::float::cmp::CmpResult { + cmp(a, b).to_le_abi() + } + + #[ppc_alias = __gekf2] + pub extern "C" fn __getf2(a: f128, b: f128) -> crate::float::cmp::CmpResult { + cmp(a, b).to_ge_abi() + } + + #[ppc_alias = __unordkf2] + pub extern "C" fn __unordtf2(a: f128, b: f128) -> crate::float::cmp::CmpResult { + unord(a, b) as crate::float::cmp::CmpResult + } + + #[ppc_alias = __eqkf2] + pub extern "C" fn __eqtf2(a: f128, b: f128) -> crate::float::cmp::CmpResult { + cmp(a, b).to_le_abi() + } + + #[ppc_alias = __ltkf2] + pub extern "C" fn __lttf2(a: f128, b: f128) -> crate::float::cmp::CmpResult { + cmp(a, b).to_le_abi() + } + + #[ppc_alias = __nekf2] + pub extern "C" fn __netf2(a: f128, b: f128) -> crate::float::cmp::CmpResult { + cmp(a, b).to_le_abi() + } + + #[ppc_alias = __gtkf2] + pub extern "C" fn __gttf2(a: f128, b: f128) -> crate::float::cmp::CmpResult { + cmp(a, b).to_ge_abi() + } +} + +#[cfg(target_arch = "arm")] +intrinsics! { + pub extern "aapcs" fn __aeabi_fcmple(a: f32, b: f32) -> i32 { + (__lesf2(a, b) <= 0) as i32 + } + + pub extern "aapcs" fn __aeabi_fcmpge(a: f32, b: f32) -> i32 { + (__gesf2(a, b) >= 0) as i32 + } + + pub extern "aapcs" fn __aeabi_fcmpeq(a: f32, b: f32) -> i32 { + (__eqsf2(a, b) == 0) as i32 + } + + pub extern "aapcs" fn __aeabi_fcmplt(a: f32, b: f32) -> i32 { + (__ltsf2(a, b) < 0) as i32 + } + + pub extern "aapcs" fn __aeabi_fcmpgt(a: f32, b: f32) -> i32 { + (__gtsf2(a, b) > 0) as i32 + } + + pub extern "aapcs" fn __aeabi_dcmple(a: f64, b: f64) -> i32 { + (__ledf2(a, b) <= 0) as i32 + } + + pub extern "aapcs" fn __aeabi_dcmpge(a: f64, b: f64) -> i32 { + (__gedf2(a, b) >= 0) as i32 + } + + pub extern "aapcs" fn __aeabi_dcmpeq(a: f64, b: f64) -> i32 { + (__eqdf2(a, b) == 0) as i32 + } + + pub extern "aapcs" fn __aeabi_dcmplt(a: f64, b: f64) -> i32 { + (__ltdf2(a, b) < 0) as i32 + } + + pub extern "aapcs" fn __aeabi_dcmpgt(a: f64, b: f64) -> i32 { + (__gtdf2(a, b) > 0) as i32 + } +} diff --git a/library/compiler-builtins/compiler-builtins/src/float/conv.rs b/library/compiler-builtins/compiler-builtins/src/float/conv.rs new file mode 100644 index 0000000000000..f5427a11390fc --- /dev/null +++ b/library/compiler-builtins/compiler-builtins/src/float/conv.rs @@ -0,0 +1,489 @@ +use core::ops::Neg; + +use super::Float; +use crate::int::{CastFrom, CastInto, Int, MinInt}; + +/// Conversions from integers to floats. +/// +/// The algorithm is explained here: . It roughly does the following: +/// - Calculate a base mantissa by shifting the integer into mantissa position. This gives us a +/// mantissa _with the implicit bit set_! +/// - Figure out if rounding needs to occur by classifying the bits that are to be truncated. Some +/// patterns are used to simplify this. Adjust the mantissa with the result if needed. +/// - Calculate the exponent based on the base-2 logarithm of `i` (leading zeros). Subtract one. +/// - Shift the exponent and add the mantissa to create the final representation. Subtracting one +/// from the exponent (above) accounts for the explicit bit being set in the mantissa. +/// +/// # Terminology +/// +/// - `i`: the original integer +/// - `i_m`: the integer, shifted fully left (no leading zeros) +/// - `n`: number of leading zeroes +/// - `e`: the resulting exponent. Usually 1 is subtracted to offset the mantissa implicit bit. +/// - `m_base`: the mantissa before adjusting for truncated bits. Implicit bit is usually set. +/// - `adj`: the bits that will be truncated, possibly compressed in some way. +/// - `m`: the resulting mantissa. Implicit bit is usually set. +mod int_to_float { + use super::*; + + /// Calculate the exponent from the number of leading zeros. + /// + /// Usually 1 is subtracted from this function's result, so that a mantissa with the implicit + /// bit set can be added back later. + fn exp>>(n: u32) -> F::Int { + F::Int::cast_from(F::EXP_BIAS - 1 + I::BITS - n) + } + + /// Adjust a mantissa with dropped bits to perform correct rounding. + /// + /// The dropped bits should be exactly the bits that get truncated (left-aligned), but they + /// can be combined or compressed in some way that simplifies operations. + fn m_adj(m_base: F::Int, dropped_bits: F::Int) -> F::Int { + // Branchlessly extract a `1` if rounding up should happen, 0 otherwise + // This accounts for rounding to even. + let adj = (dropped_bits - ((dropped_bits >> (F::BITS - 1)) & !m_base)) >> (F::BITS - 1); + + // Add one when we need to round up. Break ties to even. + m_base + adj + } + + /// Shift the exponent to its position and add the mantissa. + /// + /// If the mantissa has the implicit bit set, the exponent should be one less than its actual + /// value to cancel it out. + fn repr(e: F::Int, m: F::Int) -> F::Int { + // + rather than | so the mantissa can overflow into the exponent + (e << F::SIG_BITS) + m + } + + /// Shift distance from a left-aligned integer to a smaller float. + fn shift_f_lt_i() -> u32 { + (I::BITS - F::BITS) + F::EXP_BITS + } + + /// Shift distance from an integer with `n` leading zeros to a smaller float. + fn shift_f_gt_i(n: u32) -> u32 { + F::SIG_BITS - I::BITS + 1 + n + } + + /// Perform a signed operation as unsigned, then add the sign back. + pub fn signed(i: I, conv: Conv) -> F + where + F: Float, + I: Int, + F::Int: CastFrom, + Conv: Fn(I::UnsignedInt) -> F::Int, + { + let sign_bit = F::Int::cast_from(i >> (I::BITS - 1)) << (F::BITS - 1); + F::from_bits(conv(i.unsigned_abs()) | sign_bit) + } + + pub fn u32_to_f32_bits(i: u32) -> u32 { + if i == 0 { + return 0; + } + let n = i.leading_zeros(); + // Mantissa with implicit bit set (significant bits) + let m_base = (i << n) >> f32::EXP_BITS; + // Bits that will be dropped (insignificant bits) + let adj = (i << n) << (f32::SIG_BITS + 1); + let m = m_adj::(m_base, adj); + let e = exp::(n) - 1; + repr::(e, m) + } + + pub fn u32_to_f64_bits(i: u32) -> u64 { + if i == 0 { + return 0; + } + let n = i.leading_zeros(); + // Mantissa with implicit bit set + let m = (i as u64) << shift_f_gt_i::(n); + let e = exp::(n) - 1; + repr::(e, m) + } + + #[cfg(f128_enabled)] + pub fn u32_to_f128_bits(i: u32) -> u128 { + if i == 0 { + return 0; + } + let n = i.leading_zeros(); + + // Shift into mantissa position that is correct for the type, but shifted into the lower + // 64 bits over so can can avoid 128-bit math. + let m = (i as u64) << (shift_f_gt_i::(n) - 64); + let e = exp::(n) as u64 - 1; + // High 64 bits of f128 representation. + let h = (e << (f128::SIG_BITS - 64)) + m; + + // Shift back to the high bits, the rest of the mantissa will always be 0. + (h as u128) << 64 + } + + pub fn u64_to_f32_bits(i: u64) -> u32 { + let n = i.leading_zeros(); + let i_m = i.wrapping_shl(n); + // Mantissa with implicit bit set + let m_base: u32 = (i_m >> shift_f_lt_i::()) as u32; + // The entire lower half of `i` will be truncated (masked portion), plus the + // next `EXP_BITS` bits. + let adj = ((i_m >> f32::EXP_BITS) | i_m & 0xFFFF) as u32; + let m = m_adj::(m_base, adj); + let e = if i == 0 { 0 } else { exp::(n) - 1 }; + repr::(e, m) + } + + pub fn u64_to_f64_bits(i: u64) -> u64 { + if i == 0 { + return 0; + } + let n = i.leading_zeros(); + // Mantissa with implicit bit set + let m_base = (i << n) >> f64::EXP_BITS; + let adj = (i << n) << (f64::SIG_BITS + 1); + let m = m_adj::(m_base, adj); + let e = exp::(n) - 1; + repr::(e, m) + } + + #[cfg(f128_enabled)] + pub fn u64_to_f128_bits(i: u64) -> u128 { + if i == 0 { + return 0; + } + let n = i.leading_zeros(); + // Mantissa with implicit bit set + let m = (i as u128) << shift_f_gt_i::(n); + let e = exp::(n) - 1; + repr::(e, m) + } + + pub fn u128_to_f32_bits(i: u128) -> u32 { + let n = i.leading_zeros(); + let i_m = i.wrapping_shl(n); // Mantissa, shifted so the first bit is nonzero + let m_base: u32 = (i_m >> shift_f_lt_i::()) as u32; + + // Within the upper `F::BITS`, everything except for the signifcand + // gets truncated + let d1: u32 = (i_m >> (u128::BITS - f32::BITS - f32::SIG_BITS - 1)).cast(); + + // The entire rest of `i_m` gets truncated. Zero the upper `F::BITS` then just + // check if it is nonzero. + let d2: u32 = (i_m << f32::BITS >> f32::BITS != 0).into(); + let adj = d1 | d2; + + // Mantissa with implicit bit set + let m = m_adj::(m_base, adj); + let e = if i == 0 { 0 } else { exp::(n) - 1 }; + repr::(e, m) + } + + pub fn u128_to_f64_bits(i: u128) -> u64 { + let n = i.leading_zeros(); + let i_m = i.wrapping_shl(n); + // Mantissa with implicit bit set + let m_base: u64 = (i_m >> shift_f_lt_i::()) as u64; + // The entire lower half of `i` will be truncated (masked portion), plus the + // next `EXP_BITS` bits. + let adj = ((i_m >> f64::EXP_BITS) | i_m & 0xFFFF_FFFF) as u64; + let m = m_adj::(m_base, adj); + let e = if i == 0 { 0 } else { exp::(n) - 1 }; + repr::(e, m) + } + + #[cfg(f128_enabled)] + pub fn u128_to_f128_bits(i: u128) -> u128 { + if i == 0 { + return 0; + } + let n = i.leading_zeros(); + // Mantissa with implicit bit set + let m_base = (i << n) >> f128::EXP_BITS; + let adj = (i << n) << (f128::SIG_BITS + 1); + let m = m_adj::(m_base, adj); + let e = exp::(n) - 1; + repr::(e, m) + } +} + +// Conversions from unsigned integers to floats. +intrinsics! { + #[arm_aeabi_alias = __aeabi_ui2f] + pub extern "C" fn __floatunsisf(i: u32) -> f32 { + f32::from_bits(int_to_float::u32_to_f32_bits(i)) + } + + #[arm_aeabi_alias = __aeabi_ui2d] + pub extern "C" fn __floatunsidf(i: u32) -> f64 { + f64::from_bits(int_to_float::u32_to_f64_bits(i)) + } + + #[arm_aeabi_alias = __aeabi_ul2f] + pub extern "C" fn __floatundisf(i: u64) -> f32 { + f32::from_bits(int_to_float::u64_to_f32_bits(i)) + } + + #[arm_aeabi_alias = __aeabi_ul2d] + pub extern "C" fn __floatundidf(i: u64) -> f64 { + f64::from_bits(int_to_float::u64_to_f64_bits(i)) + } + + #[cfg_attr(target_os = "uefi", unadjusted_on_win64)] + pub extern "C" fn __floatuntisf(i: u128) -> f32 { + f32::from_bits(int_to_float::u128_to_f32_bits(i)) + } + + #[cfg_attr(target_os = "uefi", unadjusted_on_win64)] + pub extern "C" fn __floatuntidf(i: u128) -> f64 { + f64::from_bits(int_to_float::u128_to_f64_bits(i)) + } + + #[ppc_alias = __floatunsikf] + #[cfg(f128_enabled)] + pub extern "C" fn __floatunsitf(i: u32) -> f128 { + f128::from_bits(int_to_float::u32_to_f128_bits(i)) + } + + #[ppc_alias = __floatundikf] + #[cfg(f128_enabled)] + pub extern "C" fn __floatunditf(i: u64) -> f128 { + f128::from_bits(int_to_float::u64_to_f128_bits(i)) + } + + #[ppc_alias = __floatuntikf] + #[cfg(f128_enabled)] + pub extern "C" fn __floatuntitf(i: u128) -> f128 { + f128::from_bits(int_to_float::u128_to_f128_bits(i)) + } +} + +// Conversions from signed integers to floats. +intrinsics! { + #[arm_aeabi_alias = __aeabi_i2f] + pub extern "C" fn __floatsisf(i: i32) -> f32 { + int_to_float::signed(i, int_to_float::u32_to_f32_bits) + } + + #[arm_aeabi_alias = __aeabi_i2d] + pub extern "C" fn __floatsidf(i: i32) -> f64 { + int_to_float::signed(i, int_to_float::u32_to_f64_bits) + } + + #[arm_aeabi_alias = __aeabi_l2f] + pub extern "C" fn __floatdisf(i: i64) -> f32 { + int_to_float::signed(i, int_to_float::u64_to_f32_bits) + } + + #[arm_aeabi_alias = __aeabi_l2d] + pub extern "C" fn __floatdidf(i: i64) -> f64 { + int_to_float::signed(i, int_to_float::u64_to_f64_bits) + } + + #[cfg_attr(target_os = "uefi", unadjusted_on_win64)] + pub extern "C" fn __floattisf(i: i128) -> f32 { + int_to_float::signed(i, int_to_float::u128_to_f32_bits) + } + + #[cfg_attr(target_os = "uefi", unadjusted_on_win64)] + pub extern "C" fn __floattidf(i: i128) -> f64 { + int_to_float::signed(i, int_to_float::u128_to_f64_bits) + } + + #[ppc_alias = __floatsikf] + #[cfg(f128_enabled)] + pub extern "C" fn __floatsitf(i: i32) -> f128 { + int_to_float::signed(i, int_to_float::u32_to_f128_bits) + } + + #[ppc_alias = __floatdikf] + #[cfg(f128_enabled)] + pub extern "C" fn __floatditf(i: i64) -> f128 { + int_to_float::signed(i, int_to_float::u64_to_f128_bits) + } + + #[ppc_alias = __floattikf] + #[cfg(f128_enabled)] + pub extern "C" fn __floattitf(i: i128) -> f128 { + int_to_float::signed(i, int_to_float::u128_to_f128_bits) + } +} + +/// Generic float to unsigned int conversions. +fn float_to_unsigned_int(f: F) -> U +where + F: Float, + U: Int, + F::Int: CastInto, + F::Int: CastFrom, + F::Int: CastInto, + u32: CastFrom, +{ + float_to_int_inner::(f.to_bits(), |i: U| i, || U::MAX) +} + +/// Generic float to signed int conversions. +fn float_to_signed_int(f: F) -> I +where + F: Float, + I: Int + Neg, + I::UnsignedInt: Int, + F::Int: CastInto, + F::Int: CastFrom, + u32: CastFrom, +{ + float_to_int_inner::( + f.to_bits() & !F::SIGN_MASK, + |i: I| if f.is_sign_negative() { -i } else { i }, + || if f.is_sign_negative() { I::MIN } else { I::MAX }, + ) +} + +/// Float to int conversions, generic for both signed and unsigned. +/// +/// Parameters: +/// - `fbits`: `abg(f)` bitcasted to an integer. +/// - `map_inbounds`: apply this transformation to integers that are within range (add the sign back). +/// - `out_of_bounds`: return value when out of range for `I`. +fn float_to_int_inner( + fbits: F::Int, + map_inbounds: FnFoo, + out_of_bounds: FnOob, +) -> I +where + F: Float, + I: Int, + FnFoo: FnOnce(I) -> I, + FnOob: FnOnce() -> I, + I::UnsignedInt: Int, + F::Int: CastInto, + F::Int: CastFrom, + u32: CastFrom, +{ + let int_max_exp = F::EXP_BIAS + I::MAX.ilog2() + 1; + let foobar = F::EXP_BIAS + I::UnsignedInt::BITS - 1; + + if fbits < F::ONE.to_bits() { + // < 0 gets rounded to 0 + I::ZERO + } else if fbits < F::Int::cast_from(int_max_exp) << F::SIG_BITS { + // >= 1, < integer max + let m_base = if I::UnsignedInt::BITS >= F::Int::BITS { + I::UnsignedInt::cast_from(fbits) << (I::BITS - F::SIG_BITS - 1) + } else { + I::UnsignedInt::cast_from(fbits >> (F::SIG_BITS - I::BITS + 1)) + }; + + // Set the implicit 1-bit. + let m: I::UnsignedInt = (I::UnsignedInt::ONE << (I::BITS - 1)) | m_base; + + // Shift based on the exponent and bias. + let s: u32 = (foobar) - u32::cast_from(fbits >> F::SIG_BITS); + + let unsigned = m >> s; + map_inbounds(I::from_unsigned(unsigned)) + } else if fbits <= F::EXP_MASK { + // >= max (incl. inf) + out_of_bounds() + } else { + I::ZERO + } +} + +// Conversions from floats to unsigned integers. +intrinsics! { + #[arm_aeabi_alias = __aeabi_f2uiz] + pub extern "C" fn __fixunssfsi(f: f32) -> u32 { + float_to_unsigned_int(f) + } + + #[arm_aeabi_alias = __aeabi_f2ulz] + pub extern "C" fn __fixunssfdi(f: f32) -> u64 { + float_to_unsigned_int(f) + } + + pub extern "C" fn __fixunssfti(f: f32) -> u128 { + float_to_unsigned_int(f) + } + + #[arm_aeabi_alias = __aeabi_d2uiz] + pub extern "C" fn __fixunsdfsi(f: f64) -> u32 { + float_to_unsigned_int(f) + } + + #[arm_aeabi_alias = __aeabi_d2ulz] + pub extern "C" fn __fixunsdfdi(f: f64) -> u64 { + float_to_unsigned_int(f) + } + + pub extern "C" fn __fixunsdfti(f: f64) -> u128 { + float_to_unsigned_int(f) + } + + #[ppc_alias = __fixunskfsi] + #[cfg(f128_enabled)] + pub extern "C" fn __fixunstfsi(f: f128) -> u32 { + float_to_unsigned_int(f) + } + + #[ppc_alias = __fixunskfdi] + #[cfg(f128_enabled)] + pub extern "C" fn __fixunstfdi(f: f128) -> u64 { + float_to_unsigned_int(f) + } + + #[ppc_alias = __fixunskfti] + #[cfg(f128_enabled)] + pub extern "C" fn __fixunstfti(f: f128) -> u128 { + float_to_unsigned_int(f) + } +} + +// Conversions from floats to signed integers. +intrinsics! { + #[arm_aeabi_alias = __aeabi_f2iz] + pub extern "C" fn __fixsfsi(f: f32) -> i32 { + float_to_signed_int(f) + } + + #[arm_aeabi_alias = __aeabi_f2lz] + pub extern "C" fn __fixsfdi(f: f32) -> i64 { + float_to_signed_int(f) + } + + pub extern "C" fn __fixsfti(f: f32) -> i128 { + float_to_signed_int(f) + } + + #[arm_aeabi_alias = __aeabi_d2iz] + pub extern "C" fn __fixdfsi(f: f64) -> i32 { + float_to_signed_int(f) + } + + #[arm_aeabi_alias = __aeabi_d2lz] + pub extern "C" fn __fixdfdi(f: f64) -> i64 { + float_to_signed_int(f) + } + + pub extern "C" fn __fixdfti(f: f64) -> i128 { + float_to_signed_int(f) + } + + #[ppc_alias = __fixkfsi] + #[cfg(f128_enabled)] + pub extern "C" fn __fixtfsi(f: f128) -> i32 { + float_to_signed_int(f) + } + + #[ppc_alias = __fixkfdi] + #[cfg(f128_enabled)] + pub extern "C" fn __fixtfdi(f: f128) -> i64 { + float_to_signed_int(f) + } + + #[ppc_alias = __fixkfti] + #[cfg(f128_enabled)] + pub extern "C" fn __fixtfti(f: f128) -> i128 { + float_to_signed_int(f) + } +} diff --git a/library/compiler-builtins/compiler-builtins/src/float/div.rs b/library/compiler-builtins/compiler-builtins/src/float/div.rs new file mode 100644 index 0000000000000..5df637c7e0f9b --- /dev/null +++ b/library/compiler-builtins/compiler-builtins/src/float/div.rs @@ -0,0 +1,635 @@ +//! Floating point division routines. +//! +//! This module documentation gives an overview of the method used. More documentation is inline. +//! +//! # Relevant notation +//! +//! - `m_a`: the mantissa of `a`, in base 2 +//! - `p_a`: the exponent of `a`, in base 2. I.e. `a = m_a * 2^p_a` +//! - `uqN` (e.g. `uq1`): this refers to Q notation for fixed-point numbers. UQ1.31 is an unsigned +//! fixed-point number with 1 integral bit, and 31 decimal bits. A `uqN` variable of type `uM` +//! will have N bits of integer and M-N bits of fraction. +//! - `hw`: half width, i.e. for `f64` this will be a `u32`. +//! - `x` is the best estimate of `1/m_b` +//! +//! # Method Overview +//! +//! Division routines must solve for `a / b`, which is `res = m_a*2^p_a / m_b*2^p_b`. The basic +//! process is as follows: +//! +//! - Rearange the exponent and significand to simplify the operations: +//! `res = (m_a / m_b) * 2^{p_a - p_b}`. +//! - Check for early exits (infinity, zero, etc). +//! - If `a` or `b` are subnormal, normalize by shifting the mantissa and adjusting the exponent. +//! - Set the implicit bit so math is correct. +//! - Shift mantissa significant digits (with implicit bit) fully left such that fixed-point UQ1 +//! or UQ0 numbers can be used for mantissa math. These will have greater precision than the +//! actual mantissa, which is important for correct rounding. +//! - Calculate the reciprocal of `m_b`, `x`. +//! - Use the reciprocal to multiply rather than divide: `res = m_a * x_b * 2^{p_a - p_b}`. +//! - Reapply rounding. +//! +//! # Reciprocal calculation +//! +//! Calculating the reciprocal is the most complicated part of this process. It uses the +//! [Newton-Raphson method], which picks an initial estimation (of the reciprocal) and performs +//! a number of iterations to increase its precision. +//! +//! In general, Newton's method takes the following form: +//! +//! ```text +//! `x_n` is a guess or the result of a previous iteration. Increasing `n` converges to the +//! desired result. +//! +//! The result approaches a zero of `f(x)` by applying a correction to the previous gues. +//! +//! x_{n+1} = x_n - f(x_n) / f'(x_n) +//! ``` +//! +//! Applying this to find the reciprocal: +//! +//! ```text +//! 1 / x = b +//! +//! Rearrange so we can solve by finding a zero +//! 0 = (1 / x) - b = f(x) +//! +//! f'(x) = -x^{-2} +//! +//! x_{n+1} = 2*x_n - b*x_n^2 +//! ``` +//! +//! This is a process that can be repeated to calculate the reciprocal with enough precision to +//! achieve a correctly rounded result for the overall division operation. The maximum required +//! number of iterations is known since precision doubles with each iteration. +//! +//! # Half-width operations +//! +//! Calculating the reciprocal requires widening multiplication and performing arithmetic on the +//! results, meaning that emulated integer arithmetic on `u128` (for `f64`) and `u256` (for `f128`) +//! gets used instead of native math. +//! +//! To make this more efficient, all but the final operation can be computed using half-width +//! integers. For example, rather than computing four iterations using 128-bit integers for `f64`, +//! we can instead perform three iterations using native 64-bit integers and only one final +//! iteration using the full 128 bits. +//! +//! This works because of precision doubling. Some leeway is allowed here because the fixed-point +//! number has more bits than the final mantissa will. +//! +//! [Newton-Raphson method]: https://en.wikipedia.org/wiki/Newton%27s_method + +use core::mem::size_of; +use core::ops; + +use super::HalfRep; +use crate::float::Float; +use crate::int::{CastFrom, CastInto, DInt, HInt, Int, MinInt}; + +fn div(a: F, b: F) -> F +where + F::Int: CastInto, + F::Int: From>, + F::Int: From, + F::Int: HInt + DInt, + ::D: ops::Shr::D>, + F::Int: From, + u16: CastInto, + i32: CastInto, + u32: CastInto, + u128: CastInto>, +{ + let one = F::Int::ONE; + let zero = F::Int::ZERO; + let one_hw = HalfRep::::ONE; + let zero_hw = HalfRep::::ZERO; + let hw = F::BITS / 2; + let lo_mask = F::Int::MAX >> hw; + + let significand_bits = F::SIG_BITS; + // Saturated exponent, representing infinity + let exponent_sat: F::Int = F::EXP_SAT.cast(); + + let exponent_bias = F::EXP_BIAS; + let implicit_bit = F::IMPLICIT_BIT; + let significand_mask = F::SIG_MASK; + let sign_bit = F::SIGN_MASK; + let abs_mask = sign_bit - one; + let exponent_mask = F::EXP_MASK; + let inf_rep = exponent_mask; + let quiet_bit = implicit_bit >> 1; + let qnan_rep = exponent_mask | quiet_bit; + let (mut half_iterations, full_iterations) = get_iterations::(); + let recip_precision = reciprocal_precision::(); + + if F::BITS == 128 { + // FIXME(tgross35): f128 seems to require one more half iteration than expected + half_iterations += 1; + } + + let a_rep = a.to_bits(); + let b_rep = b.to_bits(); + + // Exponent numeric representationm not accounting for bias + let a_exponent = (a_rep >> significand_bits) & exponent_sat; + let b_exponent = (b_rep >> significand_bits) & exponent_sat; + let quotient_sign = (a_rep ^ b_rep) & sign_bit; + + let mut a_significand = a_rep & significand_mask; + let mut b_significand = b_rep & significand_mask; + + // The exponent of our final result in its encoded form + let mut res_exponent: i32 = + i32::cast_from(a_exponent) - i32::cast_from(b_exponent) + (exponent_bias as i32); + + // Detect if a or b is zero, denormal, infinity, or NaN. + if a_exponent.wrapping_sub(one) >= (exponent_sat - one) + || b_exponent.wrapping_sub(one) >= (exponent_sat - one) + { + let a_abs = a_rep & abs_mask; + let b_abs = b_rep & abs_mask; + + // NaN / anything = qNaN + if a_abs > inf_rep { + return F::from_bits(a_rep | quiet_bit); + } + + // anything / NaN = qNaN + if b_abs > inf_rep { + return F::from_bits(b_rep | quiet_bit); + } + + if a_abs == inf_rep { + if b_abs == inf_rep { + // infinity / infinity = NaN + return F::from_bits(qnan_rep); + } else { + // infinity / anything else = +/- infinity + return F::from_bits(a_abs | quotient_sign); + } + } + + // anything else / infinity = +/- 0 + if b_abs == inf_rep { + return F::from_bits(quotient_sign); + } + + if a_abs == zero { + if b_abs == zero { + // zero / zero = NaN + return F::from_bits(qnan_rep); + } else { + // zero / anything else = +/- zero + return F::from_bits(quotient_sign); + } + } + + // anything else / zero = +/- infinity + if b_abs == zero { + return F::from_bits(inf_rep | quotient_sign); + } + + // a is denormal. Renormalize it and set the scale to include the necessary exponent + // adjustment. + if a_abs < implicit_bit { + let (exponent, significand) = F::normalize(a_significand); + res_exponent += exponent; + a_significand = significand; + } + + // b is denormal. Renormalize it and set the scale to include the necessary exponent + // adjustment. + if b_abs < implicit_bit { + let (exponent, significand) = F::normalize(b_significand); + res_exponent -= exponent; + b_significand = significand; + } + } + + // Set the implicit significand bit. If we fell through from the + // denormal path it was already set by normalize( ), but setting it twice + // won't hurt anything. + a_significand |= implicit_bit; + b_significand |= implicit_bit; + + // Transform to a fixed-point representation by shifting the significand to the high bits. We + // know this is in the range [1.0, 2.0] since the implicit bit is set to 1 above. + let b_uq1 = b_significand << (F::BITS - significand_bits - 1); + + // Align the significand of b as a UQ1.(n-1) fixed-point number in the range + // [1.0, 2.0) and get a UQ0.n approximate reciprocal using a small minimax + // polynomial approximation: x0 = 3/4 + 1/sqrt(2) - b/2. + // The max error for this approximation is achieved at endpoints, so + // abs(x0(b) - 1/b) <= abs(x0(1) - 1/1) = 3/4 - 1/sqrt(2) = 0.04289..., + // which is about 4.5 bits. + // The initial approximation is between x0(1.0) = 0.9571... and x0(2.0) = 0.4571... + // + // Then, refine the reciprocal estimate using a quadratically converging + // Newton-Raphson iteration: + // x_{n+1} = x_n * (2 - x_n * b) + // + // Let b be the original divisor considered "in infinite precision" and + // obtained from IEEE754 representation of function argument (with the + // implicit bit set). Corresponds to rep_t-sized b_UQ1 represented in + // UQ1.(W-1). + // + // Let b_hw be an infinitely precise number obtained from the highest (HW-1) + // bits of divisor significand (with the implicit bit set). Corresponds to + // half_rep_t-sized b_UQ1_hw represented in UQ1.(HW-1) that is a **truncated** + // version of b_UQ1. + // + // Let e_n := x_n - 1/b_hw + // E_n := x_n - 1/b + // abs(E_n) <= abs(e_n) + (1/b_hw - 1/b) + // = abs(e_n) + (b - b_hw) / (b*b_hw) + // <= abs(e_n) + 2 * 2^-HW + // + // rep_t-sized iterations may be slower than the corresponding half-width + // variant depending on the handware and whether single/double/quad precision + // is selected. + // + // NB: Using half-width iterations increases computation errors due to + // rounding, so error estimations have to be computed taking the selected + // mode into account! + let mut x_uq0 = if half_iterations > 0 { + // Starting with (n-1) half-width iterations + let b_uq1_hw: HalfRep = b_uq1.hi(); + + // C is (3/4 + 1/sqrt(2)) - 1 truncated to W0 fractional bits as UQ0.HW + // with W0 being either 16 or 32 and W0 <= HW. + // That is, C is the aforementioned 3/4 + 1/sqrt(2) constant (from which + // b/2 is subtracted to obtain x0) wrapped to [0, 1) range. + let c_hw = c_hw::(); + + // Check that the top bit is set, i.e. value is within `[1, 2)`. + debug_assert!(b_uq1_hw & (one_hw << (HalfRep::::BITS - 1)) > zero_hw); + + // b >= 1, thus an upper bound for 3/4 + 1/sqrt(2) - b/2 is about 0.9572, + // so x0 fits to UQ0.HW without wrapping. + let mut x_uq0_hw: HalfRep = + c_hw.wrapping_sub(b_uq1_hw /* exact b_hw/2 as UQ0.HW */); + + // An e_0 error is comprised of errors due to + // * x0 being an inherently imprecise first approximation of 1/b_hw + // * C_hw being some (irrational) number **truncated** to W0 bits + // Please note that e_0 is calculated against the infinitely precise + // reciprocal of b_hw (that is, **truncated** version of b). + // + // e_0 <= 3/4 - 1/sqrt(2) + 2^-W0 + // + // By construction, 1 <= b < 2 + // f(x) = x * (2 - b*x) = 2*x - b*x^2 + // f'(x) = 2 * (1 - b*x) + // + // On the [0, 1] interval, f(0) = 0, + // then it increses until f(1/b) = 1 / b, maximum on (0, 1), + // then it decreses to f(1) = 2 - b + // + // Let g(x) = x - f(x) = b*x^2 - x. + // On (0, 1/b), g(x) < 0 <=> f(x) > x + // On (1/b, 1], g(x) > 0 <=> f(x) < x + // + // For half-width iterations, b_hw is used instead of b. + for _ in 0..half_iterations { + // corr_UQ1_hw can be **larger** than 2 - b_hw*x by at most 1*Ulp + // of corr_UQ1_hw. + // "0.0 - (...)" is equivalent to "2.0 - (...)" in UQ1.(HW-1). + // On the other hand, corr_UQ1_hw should not overflow from 2.0 to 0.0 provided + // no overflow occurred earlier: ((rep_t)x_UQ0_hw * b_UQ1_hw >> HW) is + // expected to be strictly positive because b_UQ1_hw has its highest bit set + // and x_UQ0_hw should be rather large (it converges to 1/2 < 1/b_hw <= 1). + // + // Now, we should multiply UQ0.HW and UQ1.(HW-1) numbers, naturally + // obtaining an UQ1.(HW-1) number and proving its highest bit could be + // considered to be 0 to be able to represent it in UQ0.HW. + // From the above analysis of f(x), if corr_UQ1_hw would be represented + // without any intermediate loss of precision (that is, in twice_rep_t) + // x_UQ0_hw could be at most [1.]000... if b_hw is exactly 1.0 and strictly + // less otherwise. On the other hand, to obtain [1.]000..., one have to pass + // 1/b_hw == 1.0 to f(x), so this cannot occur at all without overflow (due + // to 1.0 being not representable as UQ0.HW). + // The fact corr_UQ1_hw was virtually round up (due to result of + // multiplication being **first** truncated, then negated - to improve + // error estimations) can increase x_UQ0_hw by up to 2*Ulp of x_UQ0_hw. + // + // Now, either no overflow occurred or x_UQ0_hw is 0 or 1 in its half_rep_t + // representation. In the latter case, x_UQ0_hw will be either 0 or 1 after + // any number of iterations, so just subtract 2 from the reciprocal + // approximation after last iteration. + // + // In infinite precision, with 0 <= eps1, eps2 <= U = 2^-HW: + // corr_UQ1_hw = 2 - (1/b_hw + e_n) * b_hw + 2*eps1 + // = 1 - e_n * b_hw + 2*eps1 + // x_UQ0_hw = (1/b_hw + e_n) * (1 - e_n*b_hw + 2*eps1) - eps2 + // = 1/b_hw - e_n + 2*eps1/b_hw + e_n - e_n^2*b_hw + 2*e_n*eps1 - eps2 + // = 1/b_hw + 2*eps1/b_hw - e_n^2*b_hw + 2*e_n*eps1 - eps2 + // e_{n+1} = -e_n^2*b_hw + 2*eps1/b_hw + 2*e_n*eps1 - eps2 + // = 2*e_n*eps1 - (e_n^2*b_hw + eps2) + 2*eps1/b_hw + // \------ >0 -------/ \-- >0 ---/ + // abs(e_{n+1}) <= 2*abs(e_n)*U + max(2*e_n^2 + U, 2 * U) + x_uq0_hw = next_guess(x_uq0_hw, b_uq1_hw); + } + + // For initial half-width iterations, U = 2^-HW + // Let abs(e_n) <= u_n * U, + // then abs(e_{n+1}) <= 2 * u_n * U^2 + max(2 * u_n^2 * U^2 + U, 2 * U) + // u_{n+1} <= 2 * u_n * U + max(2 * u_n^2 * U + 1, 2) + // + // Account for possible overflow (see above). For an overflow to occur for the + // first time, for "ideal" corr_UQ1_hw (that is, without intermediate + // truncation), the result of x_UQ0_hw * corr_UQ1_hw should be either maximum + // value representable in UQ0.HW or less by 1. This means that 1/b_hw have to + // be not below that value (see g(x) above), so it is safe to decrement just + // once after the final iteration. On the other hand, an effective value of + // divisor changes after this point (from b_hw to b), so adjust here. + x_uq0_hw = x_uq0_hw.wrapping_sub(one_hw); + + // Error estimations for full-precision iterations are calculated just + // as above, but with U := 2^-W and taking extra decrementing into account. + // We need at least one such iteration. + // + // Simulating operations on a twice_rep_t to perform a single final full-width + // iteration. Using ad-hoc multiplication implementations to take advantage + // of particular structure of operands. + let blo: F::Int = b_uq1 & lo_mask; + + // x_UQ0 = x_UQ0_hw * 2^HW - 1 + // x_UQ0 * b_UQ1 = (x_UQ0_hw * 2^HW) * (b_UQ1_hw * 2^HW + blo) - b_UQ1 + // + // <--- higher half ---><--- lower half ---> + // [x_UQ0_hw * b_UQ1_hw] + // + [ x_UQ0_hw * blo ] + // - [ b_UQ1 ] + // = [ result ][.... discarded ...] + let corr_uq1: F::Int = (F::Int::from(x_uq0_hw) * F::Int::from(b_uq1_hw) + + ((F::Int::from(x_uq0_hw) * blo) >> hw)) + .wrapping_sub(one) + .wrapping_neg(); // account for *possible* carry + + let lo_corr: F::Int = corr_uq1 & lo_mask; + let hi_corr: F::Int = corr_uq1 >> hw; + + // x_UQ0 * corr_UQ1 = (x_UQ0_hw * 2^HW) * (hi_corr * 2^HW + lo_corr) - corr_UQ1 + let mut x_uq0: F::Int = ((F::Int::from(x_uq0_hw) * hi_corr) << 1) + .wrapping_add((F::Int::from(x_uq0_hw) * lo_corr) >> (hw - 1)) + // 1 to account for the highest bit of corr_UQ1 can be 1 + // 1 to account for possible carry + // Just like the case of half-width iterations but with possibility + // of overflowing by one extra Ulp of x_UQ0. + .wrapping_sub(F::Int::from(2u8)); + + x_uq0 -= one; + // ... and then traditional fixup by 2 should work + + // On error estimation: + // abs(E_{N-1}) <= (u_{N-1} + 2 /* due to conversion e_n -> E_n */) * 2^-HW + // + (2^-HW + 2^-W)) + // abs(E_{N-1}) <= (u_{N-1} + 3.01) * 2^-HW + // + // Then like for the half-width iterations: + // With 0 <= eps1, eps2 < 2^-W + // E_N = 4 * E_{N-1} * eps1 - (E_{N-1}^2 * b + 4 * eps2) + 4 * eps1 / b + // abs(E_N) <= 2^-W * [ 4 * abs(E_{N-1}) + max(2 * abs(E_{N-1})^2 * 2^W + 4, 8)) ] + // abs(E_N) <= 2^-W * [ 4 * (u_{N-1} + 3.01) * 2^-HW + max(4 + 2 * (u_{N-1} + 3.01)^2, 8) ] + x_uq0 + } else { + // C is (3/4 + 1/sqrt(2)) - 1 truncated to 64 fractional bits as UQ0.n + let c: F::Int = F::Int::from(0x7504F333u32) << (F::BITS - 32); + let mut x_uq0: F::Int = c.wrapping_sub(b_uq1); + + // E_0 <= 3/4 - 1/sqrt(2) + 2 * 2^-64 + // x_uq0 + for _ in 0..full_iterations { + x_uq0 = next_guess(x_uq0, b_uq1); + } + + x_uq0 + }; + + // Finally, account for possible overflow, as explained above. + x_uq0 = x_uq0.wrapping_sub(2.cast()); + + // Suppose 1/b - P * 2^-W < x < 1/b + P * 2^-W + x_uq0 -= recip_precision.cast(); + + // Now 1/b - (2*P) * 2^-W < x < 1/b + // FIXME Is x_UQ0 still >= 0.5? + + let mut quotient_uq1: F::Int = x_uq0.widen_mul(a_significand << 1).hi(); + // Now, a/b - 4*P * 2^-W < q < a/b for q= in UQ1.(SB+1+W). + + // quotient_UQ1 is in [0.5, 2.0) as UQ1.(SB+1), + // adjust it to be in [1.0, 2.0) as UQ1.SB. + let mut residual_lo = if quotient_uq1 < (implicit_bit << 1) { + // Highest bit is 0, so just reinterpret quotient_UQ1 as UQ1.SB, + // effectively doubling its value as well as its error estimation. + let residual_lo = (a_significand << (significand_bits + 1)) + .wrapping_sub(quotient_uq1.wrapping_mul(b_significand)); + res_exponent -= 1; + a_significand <<= 1; + residual_lo + } else { + // Highest bit is 1 (the UQ1.(SB+1) value is in [1, 2)), convert it + // to UQ1.SB by right shifting by 1. Least significant bit is omitted. + quotient_uq1 >>= 1; + (a_significand << significand_bits).wrapping_sub(quotient_uq1.wrapping_mul(b_significand)) + }; + + // drop mutability + let quotient = quotient_uq1; + + // NB: residualLo is calculated above for the normal result case. + // It is re-computed on denormal path that is expected to be not so + // performance-sensitive. + // + // Now, q cannot be greater than a/b and can differ by at most 8*P * 2^-W + 2^-SB + // Each NextAfter() increments the floating point value by at least 2^-SB + // (more, if exponent was incremented). + // Different cases (<---> is of 2^-SB length, * = a/b that is shown as a midpoint): + // q + // | | * | | | | | + // <---> 2^t + // | | | | | * | | + // q + // To require at most one NextAfter(), an error should be less than 1.5 * 2^-SB. + // (8*P) * 2^-W + 2^-SB < 1.5 * 2^-SB + // (8*P) * 2^-W < 0.5 * 2^-SB + // P < 2^(W-4-SB) + // Generally, for at most R NextAfter() to be enough, + // P < (2*R - 1) * 2^(W-4-SB) + // For f32 (0+3): 10 < 32 (OK) + // For f32 (2+1): 32 < 74 < 32 * 3, so two NextAfter() are required + // For f64: 220 < 256 (OK) + // For f128: 4096 * 3 < 13922 < 4096 * 5 (three NextAfter() are required) + // + // If we have overflowed the exponent, return infinity + if res_exponent >= i32::cast_from(exponent_sat) { + return F::from_bits(inf_rep | quotient_sign); + } + + // Now, quotient <= the correctly-rounded result + // and may need taking NextAfter() up to 3 times (see error estimates above) + // r = a - b * q + let mut abs_result = if res_exponent > 0 { + let mut ret = quotient & significand_mask; + ret |= F::Int::from(res_exponent as u32) << significand_bits; + residual_lo <<= 1; + ret + } else { + if ((significand_bits as i32) + res_exponent) < 0 { + return F::from_bits(quotient_sign); + } + + let ret = quotient.wrapping_shr(u32::cast_from(res_exponent.wrapping_neg()) + 1); + residual_lo = a_significand + .wrapping_shl(significand_bits.wrapping_add(CastInto::::cast(res_exponent))) + .wrapping_sub(ret.wrapping_mul(b_significand) << 1); + ret + }; + + residual_lo += abs_result & one; // tie to even + // conditionally turns the below LT comparison into LTE + abs_result += u8::from(residual_lo > b_significand).into(); + + if F::BITS == 128 || (F::BITS == 32 && half_iterations > 0) { + // Do not round Infinity to NaN + abs_result += + u8::from(abs_result < inf_rep && residual_lo > (2 + 1).cast() * b_significand).into(); + } + + if F::BITS == 128 { + abs_result += + u8::from(abs_result < inf_rep && residual_lo > (4 + 1).cast() * b_significand).into(); + } + + F::from_bits(abs_result | quotient_sign) +} + +/// Calculate the number of iterations required for a float type's precision. +/// +/// This returns `(h, f)` where `h` is the number of iterations to be done using integers at half +/// the float's bit width, and `f` is the number of iterations done using integers of the float's +/// full width. This is further explained in the module documentation. +/// +/// # Requirements +/// +/// The initial estimate should have at least 8 bits of precision. If this is not true, results +/// will be inaccurate. +const fn get_iterations() -> (usize, usize) { + // Precision doubles with each iteration. Assume we start with 8 bits of precision. + let total_iterations = F::BITS.ilog2() as usize - 2; + + if 2 * size_of::() <= size_of::<*const ()>() { + // If widening multiplication will be efficient (uses word-sized integers), there is no + // reason to use half-sized iterations. + (0, total_iterations) + } else { + // Otherwise, do as many iterations as possible at half width. + (total_iterations - 1, 1) + } +} + +/// `u_n` for different precisions (with N-1 half-width iterations). +/// +/// W0 is the precision of C +/// u_0 = (3/4 - 1/sqrt(2) + 2^-W0) * 2^HW +/// +/// Estimated with bc: +/// +/// ```text +/// define half1(un) { return 2.0 * (un + un^2) / 2.0^hw + 1.0; } +/// define half2(un) { return 2.0 * un / 2.0^hw + 2.0; } +/// define full1(un) { return 4.0 * (un + 3.01) / 2.0^hw + 2.0 * (un + 3.01)^2 + 4.0; } +/// define full2(un) { return 4.0 * (un + 3.01) / 2.0^hw + 8.0; } +/// +/// | f32 (0 + 3) | f32 (2 + 1) | f64 (3 + 1) | f128 (4 + 1) +/// u_0 | < 184224974 | < 2812.1 | < 184224974 | < 791240234244348797 +/// u_1 | < 15804007 | < 242.7 | < 15804007 | < 67877681371350440 +/// u_2 | < 116308 | < 2.81 | < 116308 | < 499533100252317 +/// u_3 | < 7.31 | | < 7.31 | < 27054456580 +/// u_4 | | | | < 80.4 +/// Final (U_N) | same as u_3 | < 72 | < 218 | < 13920 +/// ```` +/// +/// Add 2 to `U_N` due to final decrement. +const fn reciprocal_precision() -> u16 { + let (half_iterations, full_iterations) = get_iterations::(); + + if full_iterations < 1 { + panic!("Must have at least one full iteration"); + } + + // FIXME(tgross35): calculate this programmatically + if F::BITS == 32 && half_iterations == 2 && full_iterations == 1 { + 74u16 + } else if F::BITS == 32 && half_iterations == 0 && full_iterations == 3 { + 10 + } else if F::BITS == 64 && half_iterations == 3 && full_iterations == 1 { + 220 + } else if F::BITS == 128 && half_iterations == 4 && full_iterations == 1 { + 13922 + } else { + panic!("Invalid number of iterations") + } +} + +/// The value of `C` adjusted to half width. +/// +/// C is (3/4 + 1/sqrt(2)) - 1 truncated to W0 fractional bits as UQ0.HW with W0 being either +/// 16 or 32 and W0 <= HW. That is, C is the aforementioned 3/4 + 1/sqrt(2) constant (from +/// which b/2 is subtracted to obtain x0) wrapped to [0, 1) range. +fn c_hw() -> HalfRep +where + F::Int: DInt, + u128: CastInto>, +{ + const C_U128: u128 = 0x7504f333f9de6108b2fb1366eaa6a542; + const { C_U128 >> (u128::BITS - >::BITS) }.cast() +} + +/// Perform one iteration at any width to approach `1/b`, given previous guess `x`. Returns +/// the next `x` as a UQ0 number. +/// +/// This is the `x_{n+1} = 2*x_n - b*x_n^2` algorithm, implemented as `x_n * (2 - b*x_n)`. It +/// uses widening multiplication to calculate the result with necessary precision. +fn next_guess(x_uq0: I, b_uq1: I) -> I +where + I: Int + HInt, + ::D: ops::Shr::D>, +{ + // `corr = 2 - b*x_n` + // + // This looks like `0 - b*x_n`. However, this works - in `UQ1`, `0.0 - x = 2.0 - x`. + let corr_uq1: I = I::ZERO.wrapping_sub(x_uq0.widen_mul(b_uq1).hi()); + + // `x_n * corr = x_n * (2 - b*x_n)` + (x_uq0.widen_mul(corr_uq1) >> (I::BITS - 1)).lo() +} + +intrinsics! { + #[arm_aeabi_alias = __aeabi_fdiv] + pub extern "C" fn __divsf3(a: f32, b: f32) -> f32 { + div(a, b) + } + + #[arm_aeabi_alias = __aeabi_ddiv] + pub extern "C" fn __divdf3(a: f64, b: f64) -> f64 { + div(a, b) + } + + #[ppc_alias = __divkf3] + #[cfg(f128_enabled)] + pub extern "C" fn __divtf3(a: f128, b: f128) -> f128 { + div(a, b) + } + + #[cfg(target_arch = "arm")] + pub extern "C" fn __divsf3vfp(a: f32, b: f32) -> f32 { + a / b + } + + #[cfg(target_arch = "arm")] + pub extern "C" fn __divdf3vfp(a: f64, b: f64) -> f64 { + a / b + } +} diff --git a/library/compiler-builtins/compiler-builtins/src/float/extend.rs b/library/compiler-builtins/compiler-builtins/src/float/extend.rs new file mode 100644 index 0000000000000..c4f1fe30e0ea8 --- /dev/null +++ b/library/compiler-builtins/compiler-builtins/src/float/extend.rs @@ -0,0 +1,123 @@ +use crate::float::Float; +use crate::int::{CastInto, Int, MinInt}; + +/// Generic conversion from a narrower to a wider IEEE-754 floating-point type +fn extend(a: F) -> R +where + F::Int: CastInto, + u64: CastInto, + u32: CastInto, + R::Int: CastInto, + R::Int: CastInto, + u64: CastInto, + F::Int: CastInto, +{ + let src_zero = F::Int::ZERO; + let src_one = F::Int::ONE; + let src_bits = F::BITS; + let src_sig_bits = F::SIG_BITS; + let src_exp_bias = F::EXP_BIAS; + let src_min_normal = F::IMPLICIT_BIT; + let src_infinity = F::EXP_MASK; + let src_sign_mask = F::SIGN_MASK; + let src_abs_mask = src_sign_mask - src_one; + let src_qnan = F::SIG_MASK; + let src_nan_code = src_qnan - src_one; + + let dst_bits = R::BITS; + let dst_sig_bits = R::SIG_BITS; + let dst_inf_exp = R::EXP_SAT; + let dst_exp_bias = R::EXP_BIAS; + let dst_min_normal = R::IMPLICIT_BIT; + + let sig_bits_delta = dst_sig_bits - src_sig_bits; + let exp_bias_delta = dst_exp_bias - src_exp_bias; + let a_abs = a.to_bits() & src_abs_mask; + let mut abs_result = R::Int::ZERO; + + if a_abs.wrapping_sub(src_min_normal) < src_infinity.wrapping_sub(src_min_normal) { + // a is a normal number. + // Extend to the destination type by shifting the significand and + // exponent into the proper position and rebiasing the exponent. + let abs_dst: R::Int = a_abs.cast(); + let bias_dst: R::Int = exp_bias_delta.cast(); + abs_result = abs_dst.wrapping_shl(sig_bits_delta); + abs_result += bias_dst.wrapping_shl(dst_sig_bits); + } else if a_abs >= src_infinity { + // a is NaN or infinity. + // Conjure the result by beginning with infinity, then setting the qNaN + // bit (if needed) and right-aligning the rest of the trailing NaN + // payload field. + let qnan_dst: R::Int = (a_abs & src_qnan).cast(); + let nan_code_dst: R::Int = (a_abs & src_nan_code).cast(); + let inf_exp_dst: R::Int = dst_inf_exp.cast(); + abs_result = inf_exp_dst.wrapping_shl(dst_sig_bits); + abs_result |= qnan_dst.wrapping_shl(sig_bits_delta); + abs_result |= nan_code_dst.wrapping_shl(sig_bits_delta); + } else if a_abs != src_zero { + // a is denormal. + // Renormalize the significand and clear the leading bit, then insert + // the correct adjusted exponent in the destination type. + let scale = a_abs.leading_zeros() - src_min_normal.leading_zeros(); + let abs_dst: R::Int = a_abs.cast(); + let bias_dst: R::Int = (exp_bias_delta - scale + 1).cast(); + abs_result = abs_dst.wrapping_shl(sig_bits_delta + scale); + abs_result = (abs_result ^ dst_min_normal) | (bias_dst.wrapping_shl(dst_sig_bits)); + } + + let sign_result: R::Int = (a.to_bits() & src_sign_mask).cast(); + R::from_bits(abs_result | (sign_result.wrapping_shl(dst_bits - src_bits))) +} + +intrinsics! { + #[aapcs_on_arm] + #[arm_aeabi_alias = __aeabi_f2d] + pub extern "C" fn __extendsfdf2(a: f32) -> f64 { + extend(a) + } +} + +intrinsics! { + #[aapcs_on_arm] + #[apple_f16_arg_abi] + #[arm_aeabi_alias = __aeabi_h2f] + #[cfg(f16_enabled)] + pub extern "C" fn __extendhfsf2(a: f16) -> f32 { + extend(a) + } + + #[aapcs_on_arm] + #[apple_f16_arg_abi] + #[cfg(f16_enabled)] + pub extern "C" fn __gnu_h2f_ieee(a: f16) -> f32 { + extend(a) + } + + #[aapcs_on_arm] + #[apple_f16_arg_abi] + #[cfg(f16_enabled)] + pub extern "C" fn __extendhfdf2(a: f16) -> f64 { + extend(a) + } + + #[aapcs_on_arm] + #[ppc_alias = __extendhfkf2] + #[cfg(all(f16_enabled, f128_enabled))] + pub extern "C" fn __extendhftf2(a: f16) -> f128 { + extend(a) + } + + #[aapcs_on_arm] + #[ppc_alias = __extendsfkf2] + #[cfg(f128_enabled)] + pub extern "C" fn __extendsftf2(a: f32) -> f128 { + extend(a) + } + + #[aapcs_on_arm] + #[ppc_alias = __extenddfkf2] + #[cfg(f128_enabled)] + pub extern "C" fn __extenddftf2(a: f64) -> f128 { + extend(a) + } +} diff --git a/library/compiler-builtins/compiler-builtins/src/float/mod.rs b/library/compiler-builtins/compiler-builtins/src/float/mod.rs new file mode 100644 index 0000000000000..4a379d0d3575b --- /dev/null +++ b/library/compiler-builtins/compiler-builtins/src/float/mod.rs @@ -0,0 +1,15 @@ +pub mod add; +pub mod cmp; +pub mod conv; +pub mod div; +pub mod extend; +pub mod mul; +pub mod pow; +pub mod sub; +pub(crate) mod traits; +pub mod trunc; + +#[cfg(not(feature = "unstable-public-internals"))] +pub(crate) use traits::{Float, HalfRep}; +#[cfg(feature = "unstable-public-internals")] +pub use traits::{Float, HalfRep}; diff --git a/library/compiler-builtins/compiler-builtins/src/float/mul.rs b/library/compiler-builtins/compiler-builtins/src/float/mul.rs new file mode 100644 index 0000000000000..7f1f19d9bd7fb --- /dev/null +++ b/library/compiler-builtins/compiler-builtins/src/float/mul.rs @@ -0,0 +1,200 @@ +use crate::float::Float; +use crate::int::{CastInto, DInt, HInt, Int, MinInt}; + +fn mul(a: F, b: F) -> F +where + u32: CastInto, + F::Int: CastInto, + i32: CastInto, + F::Int: CastInto, + F::Int: HInt, +{ + let one = F::Int::ONE; + let zero = F::Int::ZERO; + + let bits = F::BITS; + let significand_bits = F::SIG_BITS; + let max_exponent = F::EXP_SAT; + + let exponent_bias = F::EXP_BIAS; + + let implicit_bit = F::IMPLICIT_BIT; + let significand_mask = F::SIG_MASK; + let sign_bit = F::SIGN_MASK; + let abs_mask = sign_bit - one; + let exponent_mask = F::EXP_MASK; + let inf_rep = exponent_mask; + let quiet_bit = implicit_bit >> 1; + let qnan_rep = exponent_mask | quiet_bit; + let exponent_bits = F::EXP_BITS; + + let a_rep = a.to_bits(); + let b_rep = b.to_bits(); + + let a_exponent = (a_rep >> significand_bits) & max_exponent.cast(); + let b_exponent = (b_rep >> significand_bits) & max_exponent.cast(); + let product_sign = (a_rep ^ b_rep) & sign_bit; + + let mut a_significand = a_rep & significand_mask; + let mut b_significand = b_rep & significand_mask; + let mut scale = 0; + + // Detect if a or b is zero, denormal, infinity, or NaN. + if a_exponent.wrapping_sub(one) >= (max_exponent - 1).cast() + || b_exponent.wrapping_sub(one) >= (max_exponent - 1).cast() + { + let a_abs = a_rep & abs_mask; + let b_abs = b_rep & abs_mask; + + // NaN + anything = qNaN + if a_abs > inf_rep { + return F::from_bits(a_rep | quiet_bit); + } + // anything + NaN = qNaN + if b_abs > inf_rep { + return F::from_bits(b_rep | quiet_bit); + } + + if a_abs == inf_rep { + if b_abs != zero { + // infinity * non-zero = +/- infinity + return F::from_bits(a_abs | product_sign); + } else { + // infinity * zero = NaN + return F::from_bits(qnan_rep); + } + } + + if b_abs == inf_rep { + if a_abs != zero { + // infinity * non-zero = +/- infinity + return F::from_bits(b_abs | product_sign); + } else { + // infinity * zero = NaN + return F::from_bits(qnan_rep); + } + } + + // zero * anything = +/- zero + if a_abs == zero { + return F::from_bits(product_sign); + } + + // anything * zero = +/- zero + if b_abs == zero { + return F::from_bits(product_sign); + } + + // one or both of a or b is denormal, the other (if applicable) is a + // normal number. Renormalize one or both of a and b, and set scale to + // include the necessary exponent adjustment. + if a_abs < implicit_bit { + let (exponent, significand) = F::normalize(a_significand); + scale += exponent; + a_significand = significand; + } + + if b_abs < implicit_bit { + let (exponent, significand) = F::normalize(b_significand); + scale += exponent; + b_significand = significand; + } + } + + // Or in the implicit significand bit. (If we fell through from the + // denormal path it was already set by normalize( ), but setting it twice + // won't hurt anything.) + a_significand |= implicit_bit; + b_significand |= implicit_bit; + + // Get the significand of a*b. Before multiplying the significands, shift + // one of them left to left-align it in the field. Thus, the product will + // have (exponentBits + 2) integral digits, all but two of which must be + // zero. Normalizing this result is just a conditional left-shift by one + // and bumping the exponent accordingly. + let (mut product_low, mut product_high) = a_significand + .widen_mul(b_significand << exponent_bits) + .lo_hi(); + + let a_exponent_i32: i32 = a_exponent.cast(); + let b_exponent_i32: i32 = b_exponent.cast(); + let mut product_exponent: i32 = a_exponent_i32 + .wrapping_add(b_exponent_i32) + .wrapping_add(scale) + .wrapping_sub(exponent_bias as i32); + + // Normalize the significand, adjust exponent if needed. + if (product_high & implicit_bit) != zero { + product_exponent = product_exponent.wrapping_add(1); + } else { + product_high = (product_high << 1) | (product_low >> (bits - 1)); + product_low <<= 1; + } + + // If we have overflowed the type, return +/- infinity. + if product_exponent >= max_exponent as i32 { + return F::from_bits(inf_rep | product_sign); + } + + if product_exponent <= 0 { + // Result is denormal before rounding + // + // If the result is so small that it just underflows to zero, return + // a zero of the appropriate sign. Mathematically there is no need to + // handle this case separately, but we make it a special case to + // simplify the shift logic. + let shift = one.wrapping_sub(product_exponent.cast()).cast(); + if shift >= bits { + return F::from_bits(product_sign); + } + + // Otherwise, shift the significand of the result so that the round + // bit is the high bit of `product_low`. + // Ensure one of the non-highest bits in `product_low` is set if the shifted out bit are + // not all zero so that the result is correctly rounded below. + let sticky = product_low << (bits - shift) != zero; + product_low = + (product_high << (bits - shift)) | (product_low >> shift) | (sticky as u32).cast(); + product_high >>= shift; + } else { + // Result is normal before rounding; insert the exponent. + product_high &= significand_mask; + product_high |= product_exponent.cast() << significand_bits; + } + + // Insert the sign of the result: + product_high |= product_sign; + + // Final rounding. The final result may overflow to infinity, or underflow + // to zero, but those are the correct results in those cases. We use the + // default IEEE-754 round-to-nearest, ties-to-even rounding mode. + if product_low > sign_bit { + product_high += one; + } + + if product_low == sign_bit { + product_high += product_high & one; + } + + F::from_bits(product_high) +} + +intrinsics! { + #[aapcs_on_arm] + #[arm_aeabi_alias = __aeabi_fmul] + pub extern "C" fn __mulsf3(a: f32, b: f32) -> f32 { + mul(a, b) + } + + #[aapcs_on_arm] + #[arm_aeabi_alias = __aeabi_dmul] + pub extern "C" fn __muldf3(a: f64, b: f64) -> f64 { + mul(a, b) + } + + #[ppc_alias = __mulkf3] + #[cfg(f128_enabled)] + pub extern "C" fn __multf3(a: f128, b: f128) -> f128 { + mul(a, b) + } +} diff --git a/library/compiler-builtins/compiler-builtins/src/float/pow.rs b/library/compiler-builtins/compiler-builtins/src/float/pow.rs new file mode 100644 index 0000000000000..45a4ad9049de4 --- /dev/null +++ b/library/compiler-builtins/compiler-builtins/src/float/pow.rs @@ -0,0 +1,40 @@ +use crate::float::Float; +use crate::int::Int; + +/// Returns `a` raised to the power `b` +fn pow(a: F, b: i32) -> F { + let mut a = a; + let recip = b < 0; + let mut pow = Int::abs_diff(b, 0); + let mut mul = F::ONE; + loop { + if (pow & 1) != 0 { + mul *= a; + } + pow >>= 1; + if pow == 0 { + break; + } + a *= a; + } + + if recip { F::ONE / mul } else { mul } +} + +intrinsics! { + pub extern "C" fn __powisf2(a: f32, b: i32) -> f32 { + pow(a, b) + } + + pub extern "C" fn __powidf2(a: f64, b: i32) -> f64 { + pow(a, b) + } + + #[ppc_alias = __powikf2] + #[cfg(f128_enabled)] + // FIXME(f16_f128): MSVC cannot build these until `__divtf3` is available in nightly. + #[cfg(not(target_env = "msvc"))] + pub extern "C" fn __powitf2(a: f128, b: i32) -> f128 { + pow(a, b) + } +} diff --git a/library/compiler-builtins/compiler-builtins/src/float/sub.rs b/library/compiler-builtins/compiler-builtins/src/float/sub.rs new file mode 100644 index 0000000000000..a0fd9dff97fcf --- /dev/null +++ b/library/compiler-builtins/compiler-builtins/src/float/sub.rs @@ -0,0 +1,24 @@ +use crate::float::Float; + +intrinsics! { + #[arm_aeabi_alias = __aeabi_fsub] + pub extern "C" fn __subsf3(a: f32, b: f32) -> f32 { + crate::float::add::__addsf3(a, f32::from_bits(b.to_bits() ^ f32::SIGN_MASK)) + } + + #[arm_aeabi_alias = __aeabi_dsub] + pub extern "C" fn __subdf3(a: f64, b: f64) -> f64 { + crate::float::add::__adddf3(a, f64::from_bits(b.to_bits() ^ f64::SIGN_MASK)) + } + + #[ppc_alias = __subkf3] + #[cfg(f128_enabled)] + pub extern "C" fn __subtf3(a: f128, b: f128) -> f128 { + #[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))] + use crate::float::add::__addkf3 as __addtf3; + #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] + use crate::float::add::__addtf3; + + __addtf3(a, f128::from_bits(b.to_bits() ^ f128::SIGN_MASK)) + } +} diff --git a/library/compiler-builtins/compiler-builtins/src/float/traits.rs b/library/compiler-builtins/compiler-builtins/src/float/traits.rs new file mode 100644 index 0000000000000..8ccaa7bcbd78a --- /dev/null +++ b/library/compiler-builtins/compiler-builtins/src/float/traits.rs @@ -0,0 +1,189 @@ +use core::ops; + +use crate::int::{DInt, Int, MinInt}; + +/// Wrapper to extract the integer type half of the float's size +pub type HalfRep = <::Int as DInt>::H; + +/// Trait for some basic operations on floats +#[allow(dead_code)] +pub trait Float: + Copy + + core::fmt::Debug + + PartialEq + + PartialOrd + + ops::AddAssign + + ops::MulAssign + + ops::Add + + ops::Sub + + ops::Div + + ops::Rem +{ + /// A uint of the same width as the float + type Int: Int; + + /// A int of the same width as the float + type SignedInt: Int + MinInt; + + /// An int capable of containing the exponent bits plus a sign bit. This is signed. + type ExpInt: Int; + + const ZERO: Self; + const ONE: Self; + + /// The bitwidth of the float type. + const BITS: u32; + + /// The bitwidth of the significand. + const SIG_BITS: u32; + + /// The bitwidth of the exponent. + const EXP_BITS: u32 = Self::BITS - Self::SIG_BITS - 1; + + /// The saturated (maximum bitpattern) value of the exponent, i.e. the infinite + /// representation. + /// + /// This is in the rightmost position, use `EXP_MASK` for the shifted value. + const EXP_SAT: u32 = (1 << Self::EXP_BITS) - 1; + + /// The exponent bias value. + const EXP_BIAS: u32 = Self::EXP_SAT >> 1; + + /// A mask for the sign bit. + const SIGN_MASK: Self::Int; + + /// A mask for the significand. + const SIG_MASK: Self::Int; + + /// The implicit bit of the float format. + const IMPLICIT_BIT: Self::Int; + + /// A mask for the exponent. + const EXP_MASK: Self::Int; + + /// Returns `self` transmuted to `Self::Int` + fn to_bits(self) -> Self::Int; + + /// Returns `self` transmuted to `Self::SignedInt` + fn to_bits_signed(self) -> Self::SignedInt; + + /// Checks if two floats have the same bit representation. *Except* for NaNs! NaN can be + /// represented in multiple different ways. This method returns `true` if two NaNs are + /// compared. + fn eq_repr(self, rhs: Self) -> bool; + + /// Returns true if the sign is negative + fn is_sign_negative(self) -> bool; + + /// Returns the exponent, not adjusting for bias. + fn exp(self) -> Self::ExpInt; + + /// Returns the significand with no implicit bit (or the "fractional" part) + fn frac(self) -> Self::Int; + + /// Returns the significand with implicit bit + fn imp_frac(self) -> Self::Int; + + /// Returns a `Self::Int` transmuted back to `Self` + fn from_bits(a: Self::Int) -> Self; + + /// Constructs a `Self` from its parts. Inputs are treated as bits and shifted into position. + fn from_parts(negative: bool, exponent: Self::Int, significand: Self::Int) -> Self; + + fn abs(self) -> Self { + let abs_mask = !Self::SIGN_MASK; + Self::from_bits(self.to_bits() & abs_mask) + } + + /// Returns (normalized exponent, normalized significand) + fn normalize(significand: Self::Int) -> (i32, Self::Int); + + /// Returns if `self` is subnormal + fn is_subnormal(self) -> bool; +} + +macro_rules! float_impl { + ($ty:ident, $ity:ident, $sity:ident, $expty:ident, $bits:expr, $significand_bits:expr) => { + impl Float for $ty { + type Int = $ity; + type SignedInt = $sity; + type ExpInt = $expty; + + const ZERO: Self = 0.0; + const ONE: Self = 1.0; + + const BITS: u32 = $bits; + const SIG_BITS: u32 = $significand_bits; + + const SIGN_MASK: Self::Int = 1 << (Self::BITS - 1); + const SIG_MASK: Self::Int = (1 << Self::SIG_BITS) - 1; + const IMPLICIT_BIT: Self::Int = 1 << Self::SIG_BITS; + const EXP_MASK: Self::Int = !(Self::SIGN_MASK | Self::SIG_MASK); + + fn to_bits(self) -> Self::Int { + self.to_bits() + } + fn to_bits_signed(self) -> Self::SignedInt { + self.to_bits() as Self::SignedInt + } + fn eq_repr(self, rhs: Self) -> bool { + #[cfg(feature = "mangled-names")] + fn is_nan(x: $ty) -> bool { + // When using mangled-names, the "real" compiler-builtins might not have the + // necessary builtin (__unordtf2) to test whether `f128` is NaN. + // FIXME(f16_f128): Remove once the nightly toolchain has the __unordtf2 builtin + // x is NaN if all the bits of the exponent are set and the significand is non-0 + x.to_bits() & $ty::EXP_MASK == $ty::EXP_MASK && x.to_bits() & $ty::SIG_MASK != 0 + } + #[cfg(not(feature = "mangled-names"))] + fn is_nan(x: $ty) -> bool { + x.is_nan() + } + if is_nan(self) && is_nan(rhs) { + true + } else { + self.to_bits() == rhs.to_bits() + } + } + fn is_sign_negative(self) -> bool { + self.is_sign_negative() + } + fn exp(self) -> Self::ExpInt { + ((self.to_bits() & Self::EXP_MASK) >> Self::SIG_BITS) as Self::ExpInt + } + fn frac(self) -> Self::Int { + self.to_bits() & Self::SIG_MASK + } + fn imp_frac(self) -> Self::Int { + self.frac() | Self::IMPLICIT_BIT + } + fn from_bits(a: Self::Int) -> Self { + Self::from_bits(a) + } + fn from_parts(negative: bool, exponent: Self::Int, significand: Self::Int) -> Self { + Self::from_bits( + ((negative as Self::Int) << (Self::BITS - 1)) + | ((exponent << Self::SIG_BITS) & Self::EXP_MASK) + | (significand & Self::SIG_MASK), + ) + } + fn normalize(significand: Self::Int) -> (i32, Self::Int) { + let shift = significand.leading_zeros().wrapping_sub(Self::EXP_BITS); + ( + 1i32.wrapping_sub(shift as i32), + significand << shift as Self::Int, + ) + } + fn is_subnormal(self) -> bool { + (self.to_bits() & Self::EXP_MASK) == Self::Int::ZERO + } + } + }; +} + +#[cfg(f16_enabled)] +float_impl!(f16, u16, i16, i8, 16, 10); +float_impl!(f32, u32, i32, i16, 32, 23); +float_impl!(f64, u64, i64, i16, 64, 52); +#[cfg(f128_enabled)] +float_impl!(f128, u128, i128, i16, 128, 112); diff --git a/library/compiler-builtins/compiler-builtins/src/float/trunc.rs b/library/compiler-builtins/compiler-builtins/src/float/trunc.rs new file mode 100644 index 0000000000000..ca8a0f368b563 --- /dev/null +++ b/library/compiler-builtins/compiler-builtins/src/float/trunc.rs @@ -0,0 +1,169 @@ +use crate::float::Float; +use crate::int::{CastInto, Int, MinInt}; + +fn trunc(a: F) -> R +where + F::Int: CastInto, + F::Int: CastInto, + u64: CastInto, + u32: CastInto, + R::Int: CastInto, + u32: CastInto, + F::Int: CastInto, +{ + let src_zero = F::Int::ZERO; + let src_one = F::Int::ONE; + let src_bits = F::BITS; + let src_exp_bias = F::EXP_BIAS; + + let src_min_normal = F::IMPLICIT_BIT; + let src_sig_mask = F::SIG_MASK; + let src_infinity = F::EXP_MASK; + let src_sign_mask = F::SIGN_MASK; + let src_abs_mask = src_sign_mask - src_one; + let round_mask = (src_one << (F::SIG_BITS - R::SIG_BITS)) - src_one; + let halfway = src_one << (F::SIG_BITS - R::SIG_BITS - 1); + let src_qnan = src_one << (F::SIG_BITS - 1); + let src_nan_code = src_qnan - src_one; + + let dst_zero = R::Int::ZERO; + let dst_one = R::Int::ONE; + let dst_bits = R::BITS; + let dst_inf_exp = R::EXP_SAT; + let dst_exp_bias = R::EXP_BIAS; + + let underflow_exponent: F::Int = (src_exp_bias + 1 - dst_exp_bias).cast(); + let overflow_exponent: F::Int = (src_exp_bias + dst_inf_exp - dst_exp_bias).cast(); + let underflow: F::Int = underflow_exponent << F::SIG_BITS; + let overflow: F::Int = overflow_exponent << F::SIG_BITS; + + let dst_qnan = R::Int::ONE << (R::SIG_BITS - 1); + let dst_nan_code = dst_qnan - dst_one; + + let sig_bits_delta = F::SIG_BITS - R::SIG_BITS; + // Break a into a sign and representation of the absolute value. + let a_abs = a.to_bits() & src_abs_mask; + let sign = a.to_bits() & src_sign_mask; + let mut abs_result: R::Int; + + if a_abs.wrapping_sub(underflow) < a_abs.wrapping_sub(overflow) { + // The exponent of a is within the range of normal numbers in the + // destination format. We can convert by simply right-shifting with + // rounding and adjusting the exponent. + abs_result = (a_abs >> sig_bits_delta).cast(); + // Cast before shifting to prevent overflow. + let bias_diff: R::Int = src_exp_bias.wrapping_sub(dst_exp_bias).cast(); + let tmp = bias_diff << R::SIG_BITS; + abs_result = abs_result.wrapping_sub(tmp); + + let round_bits = a_abs & round_mask; + if round_bits > halfway { + // Round to nearest. + abs_result += dst_one; + } else if round_bits == halfway { + // Tie to even. + abs_result += abs_result & dst_one; + }; + } else if a_abs > src_infinity { + // a is NaN. + // Conjure the result by beginning with infinity, setting the qNaN + // bit and inserting the (truncated) trailing NaN field. + // Cast before shifting to prevent overflow. + let dst_inf_exp: R::Int = dst_inf_exp.cast(); + abs_result = dst_inf_exp << R::SIG_BITS; + abs_result |= dst_qnan; + abs_result |= dst_nan_code & ((a_abs & src_nan_code) >> (F::SIG_BITS - R::SIG_BITS)).cast(); + } else if a_abs >= overflow { + // a overflows to infinity. + // Cast before shifting to prevent overflow. + let dst_inf_exp: R::Int = dst_inf_exp.cast(); + abs_result = dst_inf_exp << R::SIG_BITS; + } else { + // a underflows on conversion to the destination type or is an exact + // zero. The result may be a denormal or zero. Extract the exponent + // to get the shift amount for the denormalization. + let a_exp: u32 = (a_abs >> F::SIG_BITS).cast(); + let shift = src_exp_bias - dst_exp_bias - a_exp + 1; + + let significand = (a.to_bits() & src_sig_mask) | src_min_normal; + + // Right shift by the denormalization amount with sticky. + if shift > F::SIG_BITS { + abs_result = dst_zero; + } else { + let sticky = if (significand << (src_bits - shift)) != src_zero { + src_one + } else { + src_zero + }; + let denormalized_significand: F::Int = (significand >> shift) | sticky; + abs_result = (denormalized_significand >> (F::SIG_BITS - R::SIG_BITS)).cast(); + let round_bits = denormalized_significand & round_mask; + // Round to nearest + if round_bits > halfway { + abs_result += dst_one; + } + // Ties to even + else if round_bits == halfway { + abs_result += abs_result & dst_one; + }; + } + } + + // Apply the signbit to the absolute value. + R::from_bits(abs_result | sign.wrapping_shr(src_bits - dst_bits).cast()) +} + +intrinsics! { + #[aapcs_on_arm] + #[arm_aeabi_alias = __aeabi_d2f] + pub extern "C" fn __truncdfsf2(a: f64) -> f32 { + trunc(a) + } +} + +intrinsics! { + #[aapcs_on_arm] + #[apple_f16_ret_abi] + #[arm_aeabi_alias = __aeabi_f2h] + #[cfg(f16_enabled)] + pub extern "C" fn __truncsfhf2(a: f32) -> f16 { + trunc(a) + } + + #[aapcs_on_arm] + #[apple_f16_ret_abi] + #[cfg(f16_enabled)] + pub extern "C" fn __gnu_f2h_ieee(a: f32) -> f16 { + trunc(a) + } + + #[aapcs_on_arm] + #[apple_f16_ret_abi] + #[arm_aeabi_alias = __aeabi_d2h] + #[cfg(f16_enabled)] + pub extern "C" fn __truncdfhf2(a: f64) -> f16 { + trunc(a) + } + + #[aapcs_on_arm] + #[ppc_alias = __trunckfhf2] + #[cfg(all(f16_enabled, f128_enabled))] + pub extern "C" fn __trunctfhf2(a: f128) -> f16 { + trunc(a) + } + + #[aapcs_on_arm] + #[ppc_alias = __trunckfsf2] + #[cfg(f128_enabled)] + pub extern "C" fn __trunctfsf2(a: f128) -> f32 { + trunc(a) + } + + #[aapcs_on_arm] + #[ppc_alias = __trunckfdf2] + #[cfg(f128_enabled)] + pub extern "C" fn __trunctfdf2(a: f128) -> f64 { + trunc(a) + } +} diff --git a/library/compiler-builtins/compiler-builtins/src/hexagon.rs b/library/compiler-builtins/compiler-builtins/src/hexagon.rs new file mode 100644 index 0000000000000..91cf91c314219 --- /dev/null +++ b/library/compiler-builtins/compiler-builtins/src/hexagon.rs @@ -0,0 +1,55 @@ +#![cfg(not(feature = "no-asm"))] + +use core::arch::global_asm; + +global_asm!(include_str!("hexagon/func_macro.s"), options(raw)); + +global_asm!(include_str!("hexagon/dfaddsub.s"), options(raw)); + +global_asm!(include_str!("hexagon/dfdiv.s"), options(raw)); + +global_asm!(include_str!("hexagon/dffma.s"), options(raw)); + +global_asm!(include_str!("hexagon/dfminmax.s"), options(raw)); + +global_asm!(include_str!("hexagon/dfmul.s"), options(raw)); + +global_asm!(include_str!("hexagon/dfsqrt.s"), options(raw)); + +global_asm!(include_str!("hexagon/divdi3.s"), options(raw)); + +global_asm!(include_str!("hexagon/divsi3.s"), options(raw)); + +global_asm!(include_str!("hexagon/fastmath2_dlib_asm.s"), options(raw)); + +global_asm!(include_str!("hexagon/fastmath2_ldlib_asm.s"), options(raw)); + +global_asm!( + include_str!("hexagon/memcpy_forward_vp4cp4n2.s"), + options(raw) +); + +global_asm!( + include_str!("hexagon/memcpy_likely_aligned.s"), + options(raw) +); + +global_asm!(include_str!("hexagon/moddi3.s"), options(raw)); + +global_asm!(include_str!("hexagon/modsi3.s"), options(raw)); + +global_asm!(include_str!("hexagon/sfdiv_opt.s"), options(raw)); + +global_asm!(include_str!("hexagon/sfsqrt_opt.s"), options(raw)); + +global_asm!(include_str!("hexagon/udivdi3.s"), options(raw)); + +global_asm!(include_str!("hexagon/udivmoddi4.s"), options(raw)); + +global_asm!(include_str!("hexagon/udivmodsi4.s"), options(raw)); + +global_asm!(include_str!("hexagon/udivsi3.s"), options(raw)); + +global_asm!(include_str!("hexagon/umoddi3.s"), options(raw)); + +global_asm!(include_str!("hexagon/umodsi3.s"), options(raw)); diff --git a/library/compiler-builtins/compiler-builtins/src/hexagon/dfaddsub.s b/library/compiler-builtins/compiler-builtins/src/hexagon/dfaddsub.s new file mode 100644 index 0000000000000..1f59e460be61c --- /dev/null +++ b/library/compiler-builtins/compiler-builtins/src/hexagon/dfaddsub.s @@ -0,0 +1,321 @@ + .text + .global __hexagon_adddf3 + .global __hexagon_subdf3 + .type __hexagon_adddf3, @function + .type __hexagon_subdf3, @function + +.global __qdsp_adddf3 ; .set __qdsp_adddf3, __hexagon_adddf3 +.global __hexagon_fast_adddf3 ; .set __hexagon_fast_adddf3, __hexagon_adddf3 +.global __hexagon_fast2_adddf3 ; .set __hexagon_fast2_adddf3, __hexagon_adddf3 +.global __qdsp_subdf3 ; .set __qdsp_subdf3, __hexagon_subdf3 +.global __hexagon_fast_subdf3 ; .set __hexagon_fast_subdf3, __hexagon_subdf3 +.global __hexagon_fast2_subdf3 ; .set __hexagon_fast2_subdf3, __hexagon_subdf3 + + .p2align 5 +__hexagon_adddf3: + { + r4 = extractu(r1,#11,#20) + r5 = extractu(r3,#11,#20) + r13:12 = combine(##0x20000000,#0) + } + { + p3 = dfclass(r1:0,#2) + p3 = dfclass(r3:2,#2) + r9:8 = r13:12 + p2 = cmp.gtu(r5,r4) + } + { + if (!p3) jump .Ladd_abnormal + if (p2) r1:0 = r3:2 + if (p2) r3:2 = r1:0 + if (p2) r5:4 = combine(r4,r5) + } + { + r13:12 = insert(r1:0,#52,#11 -2) + r9:8 = insert(r3:2,#52,#11 -2) + r15 = sub(r4,r5) + r7:6 = combine(#62,#1) + } + + + + + +.Ladd_continue: + { + r15 = min(r15,r7) + + r11:10 = neg(r13:12) + p2 = cmp.gt(r1,#-1) + r14 = #0 + } + { + if (!p2) r13:12 = r11:10 + r11:10 = extractu(r9:8,r15:14) + r9:8 = ASR(r9:8,r15) + + + + + r15:14 = #0 + } + { + p1 = cmp.eq(r11:10,r15:14) + if (!p1.new) r8 = or(r8,r6) + r5 = add(r4,#-1024 -60) + p3 = cmp.gt(r3,#-1) + } + { + r13:12 = add(r13:12,r9:8) + r11:10 = sub(r13:12,r9:8) + r7:6 = combine(#54,##2045) + } + { + p0 = cmp.gtu(r4,r7) + p0 = !cmp.gtu(r4,r6) + if (!p0.new) jump:nt .Ladd_ovf_unf + if (!p3) r13:12 = r11:10 + } + { + r1:0 = convert_d2df(r13:12) + p0 = cmp.eq(r13,#0) + p0 = cmp.eq(r12,#0) + if (p0.new) jump:nt .Ladd_zero + } + { + r1 += asl(r5,#20) + jumpr r31 + } + .falign +__hexagon_subdf3: + { + r3 = togglebit(r3,#31) + jump __qdsp_adddf3 + } + + + .falign +.Ladd_zero: + + + { + r28 = USR + r1:0 = #0 + r3 = #1 + } + { + r28 = extractu(r28,#2,#22) + r3 = asl(r3,#31) + } + { + p0 = cmp.eq(r28,#2) + if (p0.new) r1 = xor(r1,r3) + jumpr r31 + } + .falign +.Ladd_ovf_unf: + { + r1:0 = convert_d2df(r13:12) + p0 = cmp.eq(r13,#0) + p0 = cmp.eq(r12,#0) + if (p0.new) jump:nt .Ladd_zero + } + { + r28 = extractu(r1,#11,#20) + r1 += asl(r5,#20) + } + { + r5 = add(r5,r28) + r3:2 = combine(##0x00100000,#0) + } + { + p0 = cmp.gt(r5,##1024 +1024 -2) + if (p0.new) jump:nt .Ladd_ovf + } + { + p0 = cmp.gt(r5,#0) + if (p0.new) jumpr:t r31 + r28 = sub(#1,r5) + } + { + r3:2 = insert(r1:0,#52,#0) + r1:0 = r13:12 + } + { + r3:2 = lsr(r3:2,r28) + } + { + r1:0 = insert(r3:2,#63,#0) + jumpr r31 + } + .falign +.Ladd_ovf: + + { + r1:0 = r13:12 + r28 = USR + r13:12 = combine(##0x7fefffff,#-1) + } + { + r5 = extractu(r28,#2,#22) + r28 = or(r28,#0x28) + r9:8 = combine(##0x7ff00000,#0) + } + { + USR = r28 + r5 ^= lsr(r1,#31) + r28 = r5 + } + { + p0 = !cmp.eq(r28,#1) + p0 = !cmp.eq(r5,#2) + if (p0.new) r13:12 = r9:8 + } + { + r1:0 = insert(r13:12,#63,#0) + } + { + p0 = dfcmp.eq(r1:0,r1:0) + jumpr r31 + } + +.Ladd_abnormal: + { + r13:12 = extractu(r1:0,#63,#0) + r9:8 = extractu(r3:2,#63,#0) + } + { + p3 = cmp.gtu(r13:12,r9:8) + if (!p3.new) r1:0 = r3:2 + if (!p3.new) r3:2 = r1:0 + } + { + + p0 = dfclass(r1:0,#0x0f) + if (!p0.new) jump:nt .Linvalid_nan_add + if (!p3) r13:12 = r9:8 + if (!p3) r9:8 = r13:12 + } + { + + + p1 = dfclass(r1:0,#0x08) + if (p1.new) jump:nt .Linf_add + } + { + p2 = dfclass(r3:2,#0x01) + if (p2.new) jump:nt .LB_zero + r13:12 = #0 + } + + { + p0 = dfclass(r1:0,#4) + if (p0.new) jump:nt .Ladd_two_subnormal + r13:12 = combine(##0x20000000,#0) + } + { + r4 = extractu(r1,#11,#20) + r5 = #1 + + r9:8 = asl(r9:8,#11 -2) + } + + + + { + r13:12 = insert(r1:0,#52,#11 -2) + r15 = sub(r4,r5) + r7:6 = combine(#62,#1) + jump .Ladd_continue + } + +.Ladd_two_subnormal: + { + r13:12 = extractu(r1:0,#63,#0) + r9:8 = extractu(r3:2,#63,#0) + } + { + r13:12 = neg(r13:12) + r9:8 = neg(r9:8) + p0 = cmp.gt(r1,#-1) + p1 = cmp.gt(r3,#-1) + } + { + if (p0) r13:12 = r1:0 + if (p1) r9:8 = r3:2 + } + { + r13:12 = add(r13:12,r9:8) + } + { + r9:8 = neg(r13:12) + p0 = cmp.gt(r13,#-1) + r3:2 = #0 + } + { + if (!p0) r1:0 = r9:8 + if (p0) r1:0 = r13:12 + r3 = ##0x80000000 + } + { + if (!p0) r1 = or(r1,r3) + p0 = dfcmp.eq(r1:0,r3:2) + if (p0.new) jump:nt .Lzero_plus_zero + } + { + jumpr r31 + } + +.Linvalid_nan_add: + { + r28 = convert_df2sf(r1:0) + p0 = dfclass(r3:2,#0x0f) + if (p0.new) r3:2 = r1:0 + } + { + r2 = convert_df2sf(r3:2) + r1:0 = #-1 + jumpr r31 + } + .falign +.LB_zero: + { + p0 = dfcmp.eq(r13:12,r1:0) + if (!p0.new) jumpr:t r31 + } + + + + +.Lzero_plus_zero: + { + p0 = cmp.eq(r1:0,r3:2) + if (p0.new) jumpr:t r31 + } + { + r28 = USR + } + { + r28 = extractu(r28,#2,#22) + r1:0 = #0 + } + { + p0 = cmp.eq(r28,#2) + if (p0.new) r1 = ##0x80000000 + jumpr r31 + } +.Linf_add: + + { + p0 = !cmp.eq(r1,r3) + p0 = dfclass(r3:2,#8) + if (!p0.new) jumpr:t r31 + } + { + r2 = ##0x7f800001 + } + { + r1:0 = convert_sf2df(r2) + jumpr r31 + } +.size __hexagon_adddf3,.-__hexagon_adddf3 diff --git a/library/compiler-builtins/compiler-builtins/src/hexagon/dfdiv.s b/library/compiler-builtins/compiler-builtins/src/hexagon/dfdiv.s new file mode 100644 index 0000000000000..6d65dbfc4df1d --- /dev/null +++ b/library/compiler-builtins/compiler-builtins/src/hexagon/dfdiv.s @@ -0,0 +1,372 @@ + .text + .global __hexagon_divdf3 + .type __hexagon_divdf3,@function + .global __qdsp_divdf3 ; .set __qdsp_divdf3, __hexagon_divdf3 + .global __hexagon_fast_divdf3 ; .set __hexagon_fast_divdf3, __hexagon_divdf3 + .global __hexagon_fast2_divdf3 ; .set __hexagon_fast2_divdf3, __hexagon_divdf3 + .p2align 5 +__hexagon_divdf3: + { + p2 = dfclass(r1:0,#0x02) + p2 = dfclass(r3:2,#0x02) + r13:12 = combine(r3,r1) + r28 = xor(r1,r3) + } + { + if (!p2) jump .Ldiv_abnormal + r7:6 = extractu(r3:2,#23,#52 -23) + r8 = ##0x3f800001 + } + { + r9 = or(r8,r6) + r13 = extractu(r13,#11,#52 -32) + r12 = extractu(r12,#11,#52 -32) + p3 = cmp.gt(r28,#-1) + } + + +.Ldenorm_continue: + { + r11,p0 = sfrecipa(r8,r9) + r10 = and(r8,#-2) + r28 = #1 + r12 = sub(r12,r13) + } + + + { + r10 -= sfmpy(r11,r9):lib + r1 = insert(r28,#11 +1,#52 -32) + r13 = ##0x00800000 << 3 + } + { + r11 += sfmpy(r11,r10):lib + r3 = insert(r28,#11 +1,#52 -32) + r10 = and(r8,#-2) + } + { + r10 -= sfmpy(r11,r9):lib + r5 = #-0x3ff +1 + r4 = #0x3ff -1 + } + { + r11 += sfmpy(r11,r10):lib + p1 = cmp.gt(r12,r5) + p1 = !cmp.gt(r12,r4) + } + { + r13 = insert(r11,#23,#3) + r5:4 = #0 + r12 = add(r12,#-61) + } + + + + + { + r13 = add(r13,#((-3) << 3)) + } + { r7:6 = mpyu(r13,r1); r1:0 = asl(r1:0,# ( 15 )); }; { r6 = # 0; r1:0 -= mpyu(r7,r2); r15:14 = mpyu(r7,r3); }; { r5:4 += ASL(r7:6, # ( 14 )); r1:0 -= asl(r15:14, # 32); } + { r7:6 = mpyu(r13,r1); r1:0 = asl(r1:0,# ( 15 )); }; { r6 = # 0; r1:0 -= mpyu(r7,r2); r15:14 = mpyu(r7,r3); }; { r5:4 += ASR(r7:6, # ( 1 )); r1:0 -= asl(r15:14, # 32); } + { r7:6 = mpyu(r13,r1); r1:0 = asl(r1:0,# ( 15 )); }; { r6 = # 0; r1:0 -= mpyu(r7,r2); r15:14 = mpyu(r7,r3); }; { r5:4 += ASR(r7:6, # ( 16 )); r1:0 -= asl(r15:14, # 32); } + { r7:6 = mpyu(r13,r1); r1:0 = asl(r1:0,# ( 15 )); }; { r6 = # 0; r1:0 -= mpyu(r7,r2); r15:14 = mpyu(r7,r3); }; { r5:4 += ASR(r7:6, # ( 31 )); r1:0 -= asl(r15:14, # 32); r7:6=# ( 0 ); } + + + + + + + + { + + r15:14 = sub(r1:0,r3:2) + p0 = cmp.gtu(r3:2,r1:0) + + if (!p0.new) r6 = #2 + } + { + r5:4 = add(r5:4,r7:6) + if (!p0) r1:0 = r15:14 + r15:14 = #0 + } + { + p0 = cmp.eq(r1:0,r15:14) + if (!p0.new) r4 = or(r4,r28) + } + { + r7:6 = neg(r5:4) + } + { + if (!p3) r5:4 = r7:6 + } + { + r1:0 = convert_d2df(r5:4) + if (!p1) jump .Ldiv_ovf_unf + } + { + r1 += asl(r12,#52 -32) + jumpr r31 + } + +.Ldiv_ovf_unf: + { + r1 += asl(r12,#52 -32) + r13 = extractu(r1,#11,#52 -32) + } + { + r7:6 = abs(r5:4) + r12 = add(r12,r13) + } + { + p0 = cmp.gt(r12,##0x3ff +0x3ff) + if (p0.new) jump:nt .Ldiv_ovf + } + { + p0 = cmp.gt(r12,#0) + if (p0.new) jump:nt .Lpossible_unf2 + } + { + r13 = add(clb(r7:6),#-1) + r12 = sub(#7,r12) + r10 = USR + r11 = #63 + } + { + r13 = min(r12,r11) + r11 = or(r10,#0x030) + r7:6 = asl(r7:6,r13) + r12 = #0 + } + { + r15:14 = extractu(r7:6,r13:12) + r7:6 = lsr(r7:6,r13) + r3:2 = #1 + } + { + p0 = cmp.gtu(r3:2,r15:14) + if (!p0.new) r6 = or(r2,r6) + r7 = setbit(r7,#52 -32+4) + } + { + r5:4 = neg(r7:6) + p0 = bitsclr(r6,#(1<<4)-1) + if (!p0.new) r10 = r11 + } + { + USR = r10 + if (p3) r5:4 = r7:6 + r10 = #-0x3ff -(52 +4) + } + { + r1:0 = convert_d2df(r5:4) + } + { + r1 += asl(r10,#52 -32) + jumpr r31 + } + + +.Lpossible_unf2: + + + { + r3:2 = extractu(r1:0,#63,#0) + r15:14 = combine(##0x00100000,#0) + r10 = #0x7FFF + } + { + p0 = dfcmp.eq(r15:14,r3:2) + p0 = bitsset(r7,r10) + } + + + + + + + { + if (!p0) jumpr r31 + r10 = USR + } + + { + r10 = or(r10,#0x30) + } + { + USR = r10 + } + { + p0 = dfcmp.eq(r1:0,r1:0) + jumpr r31 + } + +.Ldiv_ovf: + + + + { + r10 = USR + r3:2 = combine(##0x7fefffff,#-1) + r1 = mux(p3,#0,#-1) + } + { + r7:6 = combine(##0x7ff00000,#0) + r5 = extractu(r10,#2,#22) + r10 = or(r10,#0x28) + } + { + USR = r10 + r5 ^= lsr(r1,#31) + r4 = r5 + } + { + p0 = !cmp.eq(r4,#1) + p0 = !cmp.eq(r5,#2) + if (p0.new) r3:2 = r7:6 + p0 = dfcmp.eq(r3:2,r3:2) + } + { + r1:0 = insert(r3:2,#63,#0) + jumpr r31 + } + + + + + + + +.Ldiv_abnormal: + { + p0 = dfclass(r1:0,#0x0F) + p0 = dfclass(r3:2,#0x0F) + p3 = cmp.gt(r28,#-1) + } + { + p1 = dfclass(r1:0,#0x08) + p1 = dfclass(r3:2,#0x08) + } + { + p2 = dfclass(r1:0,#0x01) + p2 = dfclass(r3:2,#0x01) + } + { + if (!p0) jump .Ldiv_nan + if (p1) jump .Ldiv_invalid + } + { + if (p2) jump .Ldiv_invalid + } + { + p2 = dfclass(r1:0,#(0x0F ^ 0x01)) + p2 = dfclass(r3:2,#(0x0F ^ 0x08)) + } + { + p1 = dfclass(r1:0,#(0x0F ^ 0x08)) + p1 = dfclass(r3:2,#(0x0F ^ 0x01)) + } + { + if (!p2) jump .Ldiv_zero_result + if (!p1) jump .Ldiv_inf_result + } + + + + + + { + p0 = dfclass(r1:0,#0x02) + p1 = dfclass(r3:2,#0x02) + r10 = ##0x00100000 + } + { + r13:12 = combine(r3,r1) + r1 = insert(r10,#11 +1,#52 -32) + r3 = insert(r10,#11 +1,#52 -32) + } + { + if (p0) r1 = or(r1,r10) + if (p1) r3 = or(r3,r10) + } + { + r5 = add(clb(r1:0),#-11) + r4 = add(clb(r3:2),#-11) + r10 = #1 + } + { + r12 = extractu(r12,#11,#52 -32) + r13 = extractu(r13,#11,#52 -32) + } + { + r1:0 = asl(r1:0,r5) + r3:2 = asl(r3:2,r4) + if (!p0) r12 = sub(r10,r5) + if (!p1) r13 = sub(r10,r4) + } + { + r7:6 = extractu(r3:2,#23,#52 -23) + } + { + r9 = or(r8,r6) + jump .Ldenorm_continue + } + +.Ldiv_zero_result: + { + r1 = xor(r1,r3) + r3:2 = #0 + } + { + r1:0 = insert(r3:2,#63,#0) + jumpr r31 + } +.Ldiv_inf_result: + { + p2 = dfclass(r3:2,#0x01) + p2 = dfclass(r1:0,#(0x0F ^ 0x08)) + } + { + r10 = USR + if (!p2) jump 1f + r1 = xor(r1,r3) + } + { + r10 = or(r10,#0x04) + } + { + USR = r10 + } +1: + { + r3:2 = combine(##0x7ff00000,#0) + p0 = dfcmp.uo(r3:2,r3:2) + } + { + r1:0 = insert(r3:2,#63,#0) + jumpr r31 + } +.Ldiv_nan: + { + p0 = dfclass(r1:0,#0x10) + p1 = dfclass(r3:2,#0x10) + if (!p0.new) r1:0 = r3:2 + if (!p1.new) r3:2 = r1:0 + } + { + r5 = convert_df2sf(r1:0) + r4 = convert_df2sf(r3:2) + } + { + r1:0 = #-1 + jumpr r31 + } + +.Ldiv_invalid: + { + r10 = ##0x7f800001 + } + { + r1:0 = convert_sf2df(r10) + jumpr r31 + } +.size __hexagon_divdf3,.-__hexagon_divdf3 diff --git a/library/compiler-builtins/compiler-builtins/src/hexagon/dffma.s b/library/compiler-builtins/compiler-builtins/src/hexagon/dffma.s new file mode 100644 index 0000000000000..97d05eb1839ee --- /dev/null +++ b/library/compiler-builtins/compiler-builtins/src/hexagon/dffma.s @@ -0,0 +1,534 @@ + .text + .global __hexagon_fmadf4 + .type __hexagon_fmadf4,@function + .global __hexagon_fmadf5 + .type __hexagon_fmadf5,@function + .global __qdsp_fmadf5 ; .set __qdsp_fmadf5, __hexagon_fmadf5 + .p2align 5 +__hexagon_fmadf4: +__hexagon_fmadf5: +fma: + { + p0 = dfclass(r1:0,#2) + p0 = dfclass(r3:2,#2) + r13:12 = #0 + r15:14 = #0 + } + { + r13:12 = insert(r1:0,#52,#11 -3) + r15:14 = insert(r3:2,#52,#11 -3) + r7 = ##0x10000000 + allocframe(#32) + } + { + r9:8 = mpyu(r12,r14) + if (!p0) jump .Lfma_abnormal_ab + r13 = or(r13,r7) + r15 = or(r15,r7) + } + { + p0 = dfclass(r5:4,#2) + if (!p0.new) jump:nt .Lfma_abnormal_c + r11:10 = combine(r7,#0) + r7:6 = combine(#0,r9) + } +.Lfma_abnormal_c_restart: + { + r7:6 += mpyu(r14,r13) + r11:10 = insert(r5:4,#52,#11 -3) + memd(r29+#0) = r17:16 + memd(r29+#8) = r19:18 + } + { + r7:6 += mpyu(r12,r15) + r19:18 = neg(r11:10) + p0 = cmp.gt(r5,#-1) + r28 = xor(r1,r3) + } + { + r18 = extractu(r1,#11,#20) + r19 = extractu(r3,#11,#20) + r17:16 = combine(#0,r7) + if (!p0) r11:10 = r19:18 + } + { + r17:16 += mpyu(r13,r15) + r9:8 = combine(r6,r8) + r18 = add(r18,r19) + + + + + r19 = extractu(r5,#11,#20) + } + { + r18 = add(r18,#-1023 +(4)) + p3 = !cmp.gt(r28,#-1) + r7:6 = #0 + r15:14 = #0 + } + { + r7:6 = sub(r7:6,r9:8,p3):carry + p0 = !cmp.gt(r28,#-1) + p1 = cmp.gt(r19,r18) + if (p1.new) r19:18 = combine(r18,r19) + } + { + r15:14 = sub(r15:14,r17:16,p3):carry + if (p0) r9:8 = r7:6 + + + + + r7:6 = #0 + r19 = sub(r18,r19) + } + { + if (p0) r17:16 = r15:14 + p0 = cmp.gt(r19,#63) + if (p1) r9:8 = r7:6 + if (p1) r7:6 = r9:8 + } + + + + + + + + { + if (p1) r17:16 = r11:10 + if (p1) r11:10 = r17:16 + if (p0) r19 = add(r19,#-64) + r28 = #63 + } + { + + if (p0) r7:6 = r11:10 + r28 = asr(r11,#31) + r13 = min(r19,r28) + r12 = #0 + } + + + + + + + { + if (p0) r11:10 = combine(r28,r28) + r5:4 = extract(r7:6,r13:12) + r7:6 = lsr(r7:6,r13) + r12 = sub(#64,r13) + } + { + r15:14 = #0 + r28 = #-2 + r7:6 |= lsl(r11:10,r12) + r11:10 = asr(r11:10,r13) + } + { + p3 = cmp.gtu(r5:4,r15:14) + if (p3.new) r6 = and(r6,r28) + + + + r15:14 = #1 + r5:4 = #0 + } + { + r9:8 = add(r7:6,r9:8,p3):carry + } + { + r17:16 = add(r11:10,r17:16,p3):carry + r28 = #62 + } + + + + + + + + { + r12 = add(clb(r17:16),#-2) + if (!cmp.eq(r12.new,r28)) jump:t 1f + } + + { + r11:10 = extractu(r9:8,#62,#2) + r9:8 = asl(r9:8,#62) + r18 = add(r18,#-62) + } + { + r17:16 = insert(r11:10,#62,#0) + } + { + r12 = add(clb(r17:16),#-2) + } + .falign +1: + { + r11:10 = asl(r17:16,r12) + r5:4 |= asl(r9:8,r12) + r13 = sub(#64,r12) + r18 = sub(r18,r12) + } + { + r11:10 |= lsr(r9:8,r13) + p2 = cmp.gtu(r15:14,r5:4) + r28 = #1023 +1023 -2 + } + { + if (!p2) r10 = or(r10,r14) + + p0 = !cmp.gt(r18,r28) + p0 = cmp.gt(r18,#1) + if (!p0.new) jump:nt .Lfma_ovf_unf + } + { + + p0 = cmp.gtu(r15:14,r11:10) + r1:0 = convert_d2df(r11:10) + r18 = add(r18,#-1023 -60) + r17:16 = memd(r29+#0) + } + { + r1 += asl(r18,#20) + r19:18 = memd(r29+#8) + if (!p0) dealloc_return + } +.Ladd_yields_zero: + + { + r28 = USR + r1:0 = #0 + } + { + r28 = extractu(r28,#2,#22) + r17:16 = memd(r29+#0) + r19:18 = memd(r29+#8) + } + { + p0 = cmp.eq(r28,#2) + if (p0.new) r1 = ##0x80000000 + dealloc_return + } +.Lfma_ovf_unf: + { + p0 = cmp.gtu(r15:14,r11:10) + if (p0.new) jump:nt .Ladd_yields_zero + } + { + r1:0 = convert_d2df(r11:10) + r18 = add(r18,#-1023 -60) + r28 = r18 + } + + + { + r1 += asl(r18,#20) + r7 = extractu(r1,#11,#20) + } + { + r6 = add(r18,r7) + r17:16 = memd(r29+#0) + r19:18 = memd(r29+#8) + r9:8 = abs(r11:10) + } + { + p0 = cmp.gt(r6,##1023 +1023) + if (p0.new) jump:nt .Lfma_ovf + } + { + p0 = cmp.gt(r6,#0) + if (p0.new) jump:nt .Lpossible_unf0 + } + { + + + + r7 = add(clb(r9:8),#-2) + r6 = sub(#1+5,r28) + p3 = cmp.gt(r11,#-1) + } + + + + { + r6 = add(r6,r7) + r9:8 = asl(r9:8,r7) + r1 = USR + r28 = #63 + } + { + r7 = min(r6,r28) + r6 = #0 + r0 = #0x0030 + } + { + r3:2 = extractu(r9:8,r7:6) + r9:8 = asr(r9:8,r7) + } + { + p0 = cmp.gtu(r15:14,r3:2) + if (!p0.new) r8 = or(r8,r14) + r9 = setbit(r9,#20 +3) + } + { + r11:10 = neg(r9:8) + p1 = bitsclr(r8,#(1<<3)-1) + if (!p1.new) r1 = or(r1,r0) + r3:2 = #0 + } + { + if (p3) r11:10 = r9:8 + USR = r1 + r28 = #-1023 -(52 +3) + } + { + r1:0 = convert_d2df(r11:10) + } + { + r1 += asl(r28,#20) + dealloc_return + } +.Lpossible_unf0: + { + r28 = ##0x7fefffff + r9:8 = abs(r11:10) + } + { + p0 = cmp.eq(r0,#0) + p0 = bitsclr(r1,r28) + if (!p0.new) dealloc_return:t + r28 = #0x7fff + } + { + p0 = bitsset(r9,r28) + r3 = USR + r2 = #0x0030 + } + { + if (p0) r3 = or(r3,r2) + } + { + USR = r3 + } + { + p0 = dfcmp.eq(r1:0,r1:0) + dealloc_return + } +.Lfma_ovf: + { + r28 = USR + r11:10 = combine(##0x7fefffff,#-1) + r1:0 = r11:10 + } + { + r9:8 = combine(##0x7ff00000,#0) + r3 = extractu(r28,#2,#22) + r28 = or(r28,#0x28) + } + { + USR = r28 + r3 ^= lsr(r1,#31) + r2 = r3 + } + { + p0 = !cmp.eq(r2,#1) + p0 = !cmp.eq(r3,#2) + } + { + p0 = dfcmp.eq(r9:8,r9:8) + if (p0.new) r11:10 = r9:8 + } + { + r1:0 = insert(r11:10,#63,#0) + dealloc_return + } +.Lfma_abnormal_ab: + { + r9:8 = extractu(r1:0,#63,#0) + r11:10 = extractu(r3:2,#63,#0) + deallocframe + } + { + p3 = cmp.gtu(r9:8,r11:10) + if (!p3.new) r1:0 = r3:2 + if (!p3.new) r3:2 = r1:0 + } + { + p0 = dfclass(r1:0,#0x0f) + if (!p0.new) jump:nt .Lnan + if (!p3) r9:8 = r11:10 + if (!p3) r11:10 = r9:8 + } + { + p1 = dfclass(r1:0,#0x08) + p1 = dfclass(r3:2,#0x0e) + } + { + p0 = dfclass(r1:0,#0x08) + p0 = dfclass(r3:2,#0x01) + } + { + if (p1) jump .Lab_inf + p2 = dfclass(r3:2,#0x01) + } + { + if (p0) jump .Linvalid + if (p2) jump .Lab_true_zero + r28 = ##0x7c000000 + } + + + + + + { + p0 = bitsclr(r1,r28) + if (p0.new) jump:nt .Lfma_ab_tiny + } + { + r28 = add(clb(r11:10),#-11) + } + { + r11:10 = asl(r11:10,r28) + } + { + r3:2 = insert(r11:10,#63,#0) + r1 -= asl(r28,#20) + } + jump fma + +.Lfma_ab_tiny: + r9:8 = combine(##0x00100000,#0) + { + r1:0 = insert(r9:8,#63,#0) + r3:2 = insert(r9:8,#63,#0) + } + jump fma + +.Lab_inf: + { + r3:2 = lsr(r3:2,#63) + p0 = dfclass(r5:4,#0x10) + } + { + r1:0 ^= asl(r3:2,#63) + if (p0) jump .Lnan + } + { + p1 = dfclass(r5:4,#0x08) + if (p1.new) jump:nt .Lfma_inf_plus_inf + } + + { + jumpr r31 + } + .falign +.Lfma_inf_plus_inf: + { + p0 = dfcmp.eq(r1:0,r5:4) + if (!p0.new) jump:nt .Linvalid + } + { + jumpr r31 + } + +.Lnan: + { + p0 = dfclass(r3:2,#0x10) + p1 = dfclass(r5:4,#0x10) + if (!p0.new) r3:2 = r1:0 + if (!p1.new) r5:4 = r1:0 + } + { + r3 = convert_df2sf(r3:2) + r2 = convert_df2sf(r5:4) + } + { + r3 = convert_df2sf(r1:0) + r1:0 = #-1 + jumpr r31 + } + +.Linvalid: + { + r28 = ##0x7f800001 + } + { + r1:0 = convert_sf2df(r28) + jumpr r31 + } + +.Lab_true_zero: + + { + p0 = dfclass(r5:4,#0x10) + if (p0.new) jump:nt .Lnan + if (p0.new) r1:0 = r5:4 + } + { + p0 = dfcmp.eq(r3:2,r5:4) + r1 = lsr(r1,#31) + } + { + r3 ^= asl(r1,#31) + if (!p0) r1:0 = r5:4 + if (!p0) jumpr r31 + } + + { + p0 = cmp.eq(r3:2,r5:4) + if (p0.new) jumpr:t r31 + r1:0 = r3:2 + } + { + r28 = USR + } + { + r28 = extractu(r28,#2,#22) + r1:0 = #0 + } + { + p0 = cmp.eq(r28,#2) + if (p0.new) r1 = ##0x80000000 + jumpr r31 + } + + + + + .falign +.Lfma_abnormal_c: + + + { + p0 = dfclass(r5:4,#0x10) + if (p0.new) jump:nt .Lnan + if (p0.new) r1:0 = r5:4 + deallocframe + } + { + p0 = dfclass(r5:4,#0x08) + if (p0.new) r1:0 = r5:4 + if (p0.new) jumpr:nt r31 + } + + + { + p0 = dfclass(r5:4,#0x01) + if (p0.new) jump:nt __hexagon_muldf3 + r28 = #1 + } + + + { + allocframe(#32) + r11:10 = #0 + r5 = insert(r28,#11,#20) + jump .Lfma_abnormal_c_restart + } +.size fma,.-fma diff --git a/library/compiler-builtins/compiler-builtins/src/hexagon/dfminmax.s b/library/compiler-builtins/compiler-builtins/src/hexagon/dfminmax.s new file mode 100644 index 0000000000000..953e773bf1946 --- /dev/null +++ b/library/compiler-builtins/compiler-builtins/src/hexagon/dfminmax.s @@ -0,0 +1,45 @@ + .text + .global __hexagon_mindf3 + .global __hexagon_maxdf3 + .type __hexagon_mindf3,@function + .type __hexagon_maxdf3,@function + .global __qdsp_mindf3 ; .set __qdsp_mindf3, __hexagon_mindf3 + .global __qdsp_maxdf3 ; .set __qdsp_maxdf3, __hexagon_maxdf3 + .p2align 5 +__hexagon_mindf3: + { + p0 = dfclass(r1:0,#0x10) + p1 = dfcmp.gt(r1:0,r3:2) + r5:4 = r1:0 + } + { + if (p0) r1:0 = r3:2 + if (p1) r1:0 = r3:2 + p2 = dfcmp.eq(r1:0,r3:2) + if (!p2.new) jumpr:t r31 + } + + { + r1:0 = or(r5:4,r3:2) + jumpr r31 + } +.size __hexagon_mindf3,.-__hexagon_mindf3 + .falign +__hexagon_maxdf3: + { + p0 = dfclass(r1:0,#0x10) + p1 = dfcmp.gt(r3:2,r1:0) + r5:4 = r1:0 + } + { + if (p0) r1:0 = r3:2 + if (p1) r1:0 = r3:2 + p2 = dfcmp.eq(r1:0,r3:2) + if (!p2.new) jumpr:t r31 + } + + { + r1:0 = and(r5:4,r3:2) + jumpr r31 + } +.size __hexagon_maxdf3,.-__hexagon_maxdf3 diff --git a/library/compiler-builtins/compiler-builtins/src/hexagon/dfmul.s b/library/compiler-builtins/compiler-builtins/src/hexagon/dfmul.s new file mode 100644 index 0000000000000..32fc674f975d1 --- /dev/null +++ b/library/compiler-builtins/compiler-builtins/src/hexagon/dfmul.s @@ -0,0 +1,309 @@ + .text + .global __hexagon_muldf3 + .type __hexagon_muldf3,@function + .global __qdsp_muldf3 ; .set __qdsp_muldf3, __hexagon_muldf3 + .global __hexagon_fast_muldf3 ; .set __hexagon_fast_muldf3, __hexagon_muldf3 + .global __hexagon_fast2_muldf3 ; .set __hexagon_fast2_muldf3, __hexagon_muldf3 + .p2align 5 +__hexagon_muldf3: + { + p0 = dfclass(r1:0,#2) + p0 = dfclass(r3:2,#2) + r13:12 = combine(##0x40000000,#0) + } + { + r13:12 = insert(r1:0,#52,#11 -1) + r5:4 = asl(r3:2,#11 -1) + r28 = #-1024 + r9:8 = #1 + } + { + r7:6 = mpyu(r4,r13) + r5:4 = insert(r9:8,#2,#62) + } + + + + + { + r15:14 = mpyu(r12,r4) + r7:6 += mpyu(r12,r5) + } + { + r7:6 += lsr(r15:14,#32) + r11:10 = mpyu(r13,r5) + r5:4 = combine(##1024 +1024 -4,#0) + } + { + r11:10 += lsr(r7:6,#32) + if (!p0) jump .Lmul_abnormal + p1 = cmp.eq(r14,#0) + p1 = cmp.eq(r6,#0) + } + { + if (!p1) r10 = or(r10,r8) + r6 = extractu(r1,#11,#20) + r7 = extractu(r3,#11,#20) + } + { + r15:14 = neg(r11:10) + r6 += add(r28,r7) + r28 = xor(r1,r3) + } + { + if (!p2.new) r11:10 = r15:14 + p2 = cmp.gt(r28,#-1) + p0 = !cmp.gt(r6,r5) + p0 = cmp.gt(r6,r4) + if (!p0.new) jump:nt .Lmul_ovf_unf + } + { + r1:0 = convert_d2df(r11:10) + r6 = add(r6,#-1024 -58) + } + { + r1 += asl(r6,#20) + jumpr r31 + } + + .falign +.Lpossible_unf1: + { + p0 = cmp.eq(r0,#0) + p0 = bitsclr(r1,r4) + if (!p0.new) jumpr:t r31 + r5 = #0x7fff + } + { + p0 = bitsset(r13,r5) + r4 = USR + r5 = #0x030 + } + { + if (p0) r4 = or(r4,r5) + } + { + USR = r4 + } + { + p0 = dfcmp.eq(r1:0,r1:0) + jumpr r31 + } + .falign +.Lmul_ovf_unf: + { + r1:0 = convert_d2df(r11:10) + r13:12 = abs(r11:10) + r7 = add(r6,#-1024 -58) + } + { + r1 += asl(r7,#20) + r7 = extractu(r1,#11,#20) + r4 = ##0x7FEFFFFF + } + { + r7 += add(r6,##-1024 -58) + + r5 = #0 + } + { + p0 = cmp.gt(r7,##1024 +1024 -2) + if (p0.new) jump:nt .Lmul_ovf + } + { + p0 = cmp.gt(r7,#0) + if (p0.new) jump:nt .Lpossible_unf1 + r5 = sub(r6,r5) + r28 = #63 + } + { + r4 = #0 + r5 = sub(#5,r5) + } + { + p3 = cmp.gt(r11,#-1) + r5 = min(r5,r28) + r11:10 = r13:12 + } + { + r28 = USR + r15:14 = extractu(r11:10,r5:4) + } + { + r11:10 = asr(r11:10,r5) + r4 = #0x0030 + r1 = insert(r9,#11,#20) + } + { + p0 = cmp.gtu(r9:8,r15:14) + if (!p0.new) r10 = or(r10,r8) + r11 = setbit(r11,#20 +3) + } + { + r15:14 = neg(r11:10) + p1 = bitsclr(r10,#0x7) + if (!p1.new) r28 = or(r4,r28) + } + { + if (!p3) r11:10 = r15:14 + USR = r28 + } + { + r1:0 = convert_d2df(r11:10) + p0 = dfcmp.eq(r1:0,r1:0) + } + { + r1 = insert(r9,#11 -1,#20 +1) + jumpr r31 + } + .falign +.Lmul_ovf: + + { + r28 = USR + r13:12 = combine(##0x7fefffff,#-1) + r1:0 = r11:10 + } + { + r14 = extractu(r28,#2,#22) + r28 = or(r28,#0x28) + r5:4 = combine(##0x7ff00000,#0) + } + { + USR = r28 + r14 ^= lsr(r1,#31) + r28 = r14 + } + { + p0 = !cmp.eq(r28,#1) + p0 = !cmp.eq(r14,#2) + if (p0.new) r13:12 = r5:4 + p0 = dfcmp.eq(r1:0,r1:0) + } + { + r1:0 = insert(r13:12,#63,#0) + jumpr r31 + } + +.Lmul_abnormal: + { + r13:12 = extractu(r1:0,#63,#0) + r5:4 = extractu(r3:2,#63,#0) + } + { + p3 = cmp.gtu(r13:12,r5:4) + if (!p3.new) r1:0 = r3:2 + if (!p3.new) r3:2 = r1:0 + } + { + + p0 = dfclass(r1:0,#0x0f) + if (!p0.new) jump:nt .Linvalid_nan + if (!p3) r13:12 = r5:4 + if (!p3) r5:4 = r13:12 + } + { + + p1 = dfclass(r1:0,#0x08) + p1 = dfclass(r3:2,#0x0e) + } + { + + + p0 = dfclass(r1:0,#0x08) + p0 = dfclass(r3:2,#0x01) + } + { + if (p1) jump .Ltrue_inf + p2 = dfclass(r3:2,#0x01) + } + { + if (p0) jump .Linvalid_zeroinf + if (p2) jump .Ltrue_zero + r28 = ##0x7c000000 + } + + + + + + { + p0 = bitsclr(r1,r28) + if (p0.new) jump:nt .Lmul_tiny + } + { + r28 = cl0(r5:4) + } + { + r28 = add(r28,#-11) + } + { + r5:4 = asl(r5:4,r28) + } + { + r3:2 = insert(r5:4,#63,#0) + r1 -= asl(r28,#20) + } + jump __hexagon_muldf3 +.Lmul_tiny: + { + r28 = USR + r1:0 = xor(r1:0,r3:2) + } + { + r28 = or(r28,#0x30) + r1:0 = insert(r9:8,#63,#0) + r5 = extractu(r28,#2,#22) + } + { + USR = r28 + p0 = cmp.gt(r5,#1) + if (!p0.new) r0 = #0 + r5 ^= lsr(r1,#31) + } + { + p0 = cmp.eq(r5,#3) + if (!p0.new) r0 = #0 + jumpr r31 + } +.Linvalid_zeroinf: + { + r28 = USR + } + { + r1:0 = #-1 + r28 = or(r28,#2) + } + { + USR = r28 + } + { + p0 = dfcmp.uo(r1:0,r1:0) + jumpr r31 + } +.Linvalid_nan: + { + p0 = dfclass(r3:2,#0x0f) + r28 = convert_df2sf(r1:0) + if (p0.new) r3:2 = r1:0 + } + { + r2 = convert_df2sf(r3:2) + r1:0 = #-1 + jumpr r31 + } + .falign +.Ltrue_zero: + { + r1:0 = r3:2 + r3:2 = r1:0 + } +.Ltrue_inf: + { + r3 = extract(r3,#1,#31) + } + { + r1 ^= asl(r3,#31) + jumpr r31 + } +.size __hexagon_muldf3,.-__hexagon_muldf3 diff --git a/library/compiler-builtins/compiler-builtins/src/hexagon/dfsqrt.s b/library/compiler-builtins/compiler-builtins/src/hexagon/dfsqrt.s new file mode 100644 index 0000000000000..14f584a11339d --- /dev/null +++ b/library/compiler-builtins/compiler-builtins/src/hexagon/dfsqrt.s @@ -0,0 +1,277 @@ + .text + .global __hexagon_sqrtdf2 + .type __hexagon_sqrtdf2,@function + .global __hexagon_sqrt + .type __hexagon_sqrt,@function + .global __qdsp_sqrtdf2 ; .set __qdsp_sqrtdf2, __hexagon_sqrtdf2; .type __qdsp_sqrtdf2,@function + .global __qdsp_sqrt ; .set __qdsp_sqrt, __hexagon_sqrt; .type __qdsp_sqrt,@function + .global __hexagon_fast_sqrtdf2 ; .set __hexagon_fast_sqrtdf2, __hexagon_sqrtdf2; .type __hexagon_fast_sqrtdf2,@function + .global __hexagon_fast_sqrt ; .set __hexagon_fast_sqrt, __hexagon_sqrt; .type __hexagon_fast_sqrt,@function + .global __hexagon_fast2_sqrtdf2 ; .set __hexagon_fast2_sqrtdf2, __hexagon_sqrtdf2; .type __hexagon_fast2_sqrtdf2,@function + .global __hexagon_fast2_sqrt ; .set __hexagon_fast2_sqrt, __hexagon_sqrt; .type __hexagon_fast2_sqrt,@function + .type sqrt,@function + .p2align 5 +__hexagon_sqrtdf2: +__hexagon_sqrt: + { + r15:14 = extractu(r1:0,#23 +1,#52 -23) + r28 = extractu(r1,#11,#52 -32) + r5:4 = combine(##0x3f000004,#1) + } + { + p2 = dfclass(r1:0,#0x02) + p2 = cmp.gt(r1,#-1) + if (!p2.new) jump:nt .Lsqrt_abnormal + r9 = or(r5,r14) + } + +.Ldenormal_restart: + { + r11:10 = r1:0 + r7,p0 = sfinvsqrta(r9) + r5 = and(r5,#-16) + r3:2 = #0 + } + { + r3 += sfmpy(r7,r9):lib + r2 += sfmpy(r7,r5):lib + r6 = r5 + + + r9 = and(r28,#1) + } + { + r6 -= sfmpy(r3,r2):lib + r11 = insert(r4,#11 +1,#52 -32) + p1 = cmp.gtu(r9,#0) + } + { + r3 += sfmpy(r3,r6):lib + r2 += sfmpy(r2,r6):lib + r6 = r5 + r9 = mux(p1,#8,#9) + } + { + r6 -= sfmpy(r3,r2):lib + r11:10 = asl(r11:10,r9) + r9 = mux(p1,#3,#2) + } + { + r2 += sfmpy(r2,r6):lib + + r15:14 = asl(r11:10,r9) + } + { + r2 = and(r2,##0x007fffff) + } + { + r2 = add(r2,##0x00800000 - 3) + r9 = mux(p1,#7,#8) + } + { + r8 = asl(r2,r9) + r9 = mux(p1,#15-(1+1),#15-(1+0)) + } + { + r13:12 = mpyu(r8,r15) + } + { + r1:0 = asl(r11:10,#15) + r15:14 = mpyu(r13,r13) + p1 = cmp.eq(r0,r0) + } + { + r1:0 -= asl(r15:14,#15) + r15:14 = mpyu(r13,r12) + p2 = cmp.eq(r0,r0) + } + { + r1:0 -= lsr(r15:14,#16) + p3 = cmp.eq(r0,r0) + } + { + r1:0 = mpyu(r1,r8) + } + { + r13:12 += lsr(r1:0,r9) + r9 = add(r9,#16) + r1:0 = asl(r11:10,#31) + } + + { + r15:14 = mpyu(r13,r13) + r1:0 -= mpyu(r13,r12) + } + { + r1:0 -= asl(r15:14,#31) + r15:14 = mpyu(r12,r12) + } + { + r1:0 -= lsr(r15:14,#33) + } + { + r1:0 = mpyu(r1,r8) + } + { + r13:12 += lsr(r1:0,r9) + r9 = add(r9,#16) + r1:0 = asl(r11:10,#47) + } + + { + r15:14 = mpyu(r13,r13) + } + { + r1:0 -= asl(r15:14,#47) + r15:14 = mpyu(r13,r12) + } + { + r1:0 -= asl(r15:14,#16) + r15:14 = mpyu(r12,r12) + } + { + r1:0 -= lsr(r15:14,#17) + } + { + r1:0 = mpyu(r1,r8) + } + { + r13:12 += lsr(r1:0,r9) + } + { + r3:2 = mpyu(r13,r12) + r5:4 = mpyu(r12,r12) + r15:14 = #0 + r1:0 = #0 + } + { + r3:2 += lsr(r5:4,#33) + r5:4 += asl(r3:2,#33) + p1 = cmp.eq(r0,r0) + } + { + r7:6 = mpyu(r13,r13) + r1:0 = sub(r1:0,r5:4,p1):carry + r9:8 = #1 + } + { + r7:6 += lsr(r3:2,#31) + r9:8 += asl(r13:12,#1) + } + + + + + + { + r15:14 = sub(r11:10,r7:6,p1):carry + r5:4 = sub(r1:0,r9:8,p2):carry + + + + + r7:6 = #1 + r11:10 = #0 + } + { + r3:2 = sub(r15:14,r11:10,p2):carry + r7:6 = add(r13:12,r7:6) + r28 = add(r28,#-0x3ff) + } + { + + if (p2) r13:12 = r7:6 + if (p2) r1:0 = r5:4 + if (p2) r15:14 = r3:2 + } + { + r5:4 = sub(r1:0,r9:8,p3):carry + r7:6 = #1 + r28 = asr(r28,#1) + } + { + r3:2 = sub(r15:14,r11:10,p3):carry + r7:6 = add(r13:12,r7:6) + } + { + if (p3) r13:12 = r7:6 + if (p3) r1:0 = r5:4 + + + + + + r2 = #1 + } + { + p0 = cmp.eq(r1:0,r11:10) + if (!p0.new) r12 = or(r12,r2) + r3 = cl0(r13:12) + r28 = add(r28,#-63) + } + + + + { + r1:0 = convert_ud2df(r13:12) + r28 = add(r28,r3) + } + { + r1 += asl(r28,#52 -32) + jumpr r31 + } +.Lsqrt_abnormal: + { + p0 = dfclass(r1:0,#0x01) + if (p0.new) jumpr:t r31 + } + { + p0 = dfclass(r1:0,#0x10) + if (p0.new) jump:nt .Lsqrt_nan + } + { + p0 = cmp.gt(r1,#-1) + if (!p0.new) jump:nt .Lsqrt_invalid_neg + if (!p0.new) r28 = ##0x7F800001 + } + { + p0 = dfclass(r1:0,#0x08) + if (p0.new) jumpr:nt r31 + } + + + { + r1:0 = extractu(r1:0,#52,#0) + } + { + r28 = add(clb(r1:0),#-11) + } + { + r1:0 = asl(r1:0,r28) + r28 = sub(#1,r28) + } + { + r1 = insert(r28,#1,#52 -32) + } + { + r3:2 = extractu(r1:0,#23 +1,#52 -23) + r5 = ##0x3f000004 + } + { + r9 = or(r5,r2) + r5 = and(r5,#-16) + jump .Ldenormal_restart + } +.Lsqrt_nan: + { + r28 = convert_df2sf(r1:0) + r1:0 = #-1 + jumpr r31 + } +.Lsqrt_invalid_neg: + { + r1:0 = convert_sf2df(r28) + jumpr r31 + } +.size __hexagon_sqrt,.-__hexagon_sqrt +.size __hexagon_sqrtdf2,.-__hexagon_sqrtdf2 diff --git a/library/compiler-builtins/compiler-builtins/src/hexagon/divdi3.s b/library/compiler-builtins/compiler-builtins/src/hexagon/divdi3.s new file mode 100644 index 0000000000000..0fee6e70f0630 --- /dev/null +++ b/library/compiler-builtins/compiler-builtins/src/hexagon/divdi3.s @@ -0,0 +1,64 @@ + +FUNCTION_BEGIN __hexagon_divdi3 + { + p2 = tstbit(r1,#31) + p3 = tstbit(r3,#31) + } + { + r1:0 = abs(r1:0) + r3:2 = abs(r3:2) + } + { + r6 = cl0(r1:0) + r7 = cl0(r3:2) + r5:4 = r3:2 + r3:2 = r1:0 + } + { + p3 = xor(p2,p3) + r10 = sub(r7,r6) + r1:0 = #0 + r15:14 = #1 + } + { + r11 = add(r10,#1) + r13:12 = lsl(r5:4,r10) + r15:14 = lsl(r15:14,r10) + } + { + p0 = cmp.gtu(r5:4,r3:2) + loop0(1f,r11) + } + { + if (p0) jump .hexagon_divdi3_return + } + .falign +1: + { + p0 = cmp.gtu(r13:12,r3:2) + } + { + r7:6 = sub(r3:2, r13:12) + r9:8 = add(r1:0, r15:14) + } + { + r1:0 = vmux(p0, r1:0, r9:8) + r3:2 = vmux(p0, r3:2, r7:6) + } + { + r15:14 = lsr(r15:14, #1) + r13:12 = lsr(r13:12, #1) + }:endloop0 + +.hexagon_divdi3_return: + { + r3:2 = neg(r1:0) + } + { + r1:0 = vmux(p3,r3:2,r1:0) + jumpr r31 + } +FUNCTION_END __hexagon_divdi3 + + .globl __qdsp_divdi3 + .set __qdsp_divdi3, __hexagon_divdi3 diff --git a/library/compiler-builtins/compiler-builtins/src/hexagon/divsi3.s b/library/compiler-builtins/compiler-builtins/src/hexagon/divsi3.s new file mode 100644 index 0000000000000..fc957a4314600 --- /dev/null +++ b/library/compiler-builtins/compiler-builtins/src/hexagon/divsi3.s @@ -0,0 +1,53 @@ + +FUNCTION_BEGIN __hexagon_divsi3 + { + p0 = cmp.ge(r0,#0) + p1 = cmp.ge(r1,#0) + r1 = abs(r0) + r2 = abs(r1) + } + { + r3 = cl0(r1) + r4 = cl0(r2) + r5 = sub(r1,r2) + p2 = cmp.gtu(r2,r1) + } + { + r0 = #0 + p1 = xor(p0,p1) + p0 = cmp.gtu(r2,r5) + if (p2) jumpr r31 + } + + { + r0 = mux(p1,#-1,#1) + if (p0) jumpr r31 + r4 = sub(r4,r3) + r3 = #1 + } + { + r0 = #0 + r3:2 = vlslw(r3:2,r4) + loop0(1f,r4) + } + .falign +1: + { + p0 = cmp.gtu(r2,r1) + if (!p0.new) r1 = sub(r1,r2) + if (!p0.new) r0 = add(r0,r3) + r3:2 = vlsrw(r3:2,#1) + }:endloop0 + { + p0 = cmp.gtu(r2,r1) + if (!p0.new) r0 = add(r0,r3) + if (!p1) jumpr r31 + } + { + r0 = neg(r0) + jumpr r31 + } +FUNCTION_END __hexagon_divsi3 + + .globl __qdsp_divsi3 + .set __qdsp_divsi3, __hexagon_divsi3 diff --git a/library/compiler-builtins/compiler-builtins/src/hexagon/fastmath2_dlib_asm.s b/library/compiler-builtins/compiler-builtins/src/hexagon/fastmath2_dlib_asm.s new file mode 100644 index 0000000000000..e77b7db033242 --- /dev/null +++ b/library/compiler-builtins/compiler-builtins/src/hexagon/fastmath2_dlib_asm.s @@ -0,0 +1,266 @@ + .text + .global __hexagon_fast2_dadd_asm + .type __hexagon_fast2_dadd_asm, @function +__hexagon_fast2_dadd_asm: + .falign + { + R7:6 = VABSDIFFH(R1:0, R3:2) + R9 = #62 + R4 = SXTH(R0) + R5 = SXTH(R2) + } { + R6 = SXTH(R6) + P0 = CMP.GT(R4, R5); + if ( P0.new) R8 = add(R4, #1) + if (!P0.new) R8 = add(R5, #1) + } { + if ( P0) R4 = #1 + if (!P0) R5 = #1 + R0.L = #0 + R6 = MIN(R6, R9) + } { + if (!P0) R4 = add(R6, #1) + if ( P0) R5 = add(R6, #1) + R2.L = #0 + R11:10 = #0 + } { + R1:0 = ASR(R1:0, R4) + R3:2 = ASR(R3:2, R5) + } { + R1:0 = add(R1:0, R3:2) + R10.L = #0x8001 + } { + R4 = clb(R1:0) + R9 = #58 + } { + R4 = add(R4, #-1) + p0 = cmp.gt(R4, R9) + } { + R1:0 = ASL(R1:0, R4) + R8 = SUB(R8, R4) + if(p0) jump .Ldenorma + } { + R0 = insert(R8, #16, #0) + jumpr r31 + } +.Ldenorma: + { + R1:0 = R11:10 + jumpr r31 + } + .text + .global __hexagon_fast2_dsub_asm + .type __hexagon_fast2_dsub_asm, @function +__hexagon_fast2_dsub_asm: + .falign + { + R7:6 = VABSDIFFH(R1:0, R3:2) + R9 = #62 + R4 = SXTH(R0) + R5 = SXTH(R2) + } { + R6 = SXTH(R6) + P0 = CMP.GT(R4, R5); + if ( P0.new) R8 = add(R4, #1) + if (!P0.new) R8 = add(R5, #1) + } { + if ( P0) R4 = #1 + if (!P0) R5 = #1 + R0.L = #0 + R6 = MIN(R6, R9) + } { + if (!P0) R4 = add(R6, #1) + if ( P0) R5 = add(R6, #1) + R2.L = #0 + R11:10 = #0 + } { + R1:0 = ASR(R1:0, R4) + R3:2 = ASR(R3:2, R5) + } { + R1:0 = sub(R1:0, R3:2) + R10.L = #0x8001 + } { + R4 = clb(R1:0) + R9 = #58 + } { + R4 = add(R4, #-1) + p0 = cmp.gt(R4, R9) + } { + R1:0 = ASL(R1:0, R4) + R8 = SUB(R8, R4) + if(p0) jump .Ldenorm + } { + R0 = insert(R8, #16, #0) + jumpr r31 + } +.Ldenorm: + { + R1:0 = R11:10 + jumpr r31 + } + .text + .global __hexagon_fast2_dmpy_asm + .type __hexagon_fast2_dmpy_asm, @function +__hexagon_fast2_dmpy_asm: + .falign + { + R13= lsr(R2, #16) + R5 = sxth(R2) + R4 = sxth(R0) + R12= lsr(R0, #16) + } + { + R11:10 = mpy(R1, R3) + R7:6 = mpy(R1, R13) + R0.L = #0x0 + R15:14 = #0 + } + { + R11:10 = add(R11:10, R11:10) + R7:6 += mpy(R3, R12) + R2.L = #0x0 + R15.H = #0x8000 + } + { + R7:6 = asr(R7:6, #15) + R12.L = #0x8001 + p1 = cmp.eq(R1:0, R3:2) + } + { + R7:6 = add(R7:6, R11:10) + R8 = add(R4, R5) + p2 = cmp.eq(R1:0, R15:14) + } + { + R9 = clb(R7:6) + R3:2 = abs(R7:6) + R11 = #58 + } + { + p1 = and(p1, p2) + R8 = sub(R8, R9) + R9 = add(R9, #-1) + p0 = cmp.gt(R9, R11) + } + { + R8 = add(R8, #1) + R1:0 = asl(R7:6, R9) + if(p1) jump .Lsat + } + { + R0 = insert(R8,#16, #0) + if(!p0) jumpr r31 + } + { + R0 = insert(R12,#16, #0) + jumpr r31 + } +.Lsat: + { + R1:0 = #-1 + } + { + R1:0 = lsr(R1:0, #1) + } + { + R0 = insert(R8,#16, #0) + jumpr r31 + } + .text + .global __hexagon_fast2_qd2f_asm + .type __hexagon_fast2_qd2f_asm, @function +__hexagon_fast2_qd2f_asm: + .falign + { + R3 = abs(R1):sat + R4 = sxth(R0) + R5 = #0x40 + R6.L = #0xffc0 + } + { + R0 = extractu(R3, #8, #0) + p2 = cmp.gt(R4, #126) + p3 = cmp.ge(R4, #-126) + R6.H = #0x7fff + } + { + p1 = cmp.eq(R0,#0x40) + if(p1.new) R5 = #0 + R4 = add(R4, #126) + if(!p3) jump .Lmin + } + { + p0 = bitsset(R3, R6) + R0.L = #0x0000 + R2 = add(R3, R5) + R7 = lsr(R6, #8) + } + { + if(p0) R4 = add(R4, #1) + if(p0) R3 = #0 + R2 = lsr(R2, #7) + R0.H = #0x8000 + } + { + R0 = and(R0, R1) + R6 &= asl(R4, #23) + if(!p0) R3 = and(R2, R7) + if(p2) jump .Lmax + } + { + R0 += add(R6, R3) + jumpr r31 + } +.Lmax: + { + R0.L = #0xffff; + } + { + R0.H = #0x7f7f; + jumpr r31 + } +.Lmin: + { + R0 = #0x0 + jumpr r31 + } + .text + .global __hexagon_fast2_f2qd_asm + .type __hexagon_fast2_f2qd_asm, @function +__hexagon_fast2_f2qd_asm: + + + + + + + + .falign + { + R1 = asl(R0, #7) + p0 = tstbit(R0, #31) + R5:4 = #0 + R3 = add(R0,R0) + } + { + R1 = setbit(R1, #30) + R0= extractu(R0,#8,#23) + R4.L = #0x8001 + p1 = cmp.eq(R3, #0) + } + { + R1= extractu(R1, #31, #0) + R0= add(R0, #-126) + R2 = #0 + if(p1) jump .Lminqd + } + { + R0 = zxth(R0) + if(p0) R1= sub(R2, R1) + jumpr r31 + } +.Lminqd: + { + R1:0 = R5:4 + jumpr r31 + } diff --git a/library/compiler-builtins/compiler-builtins/src/hexagon/fastmath2_ldlib_asm.s b/library/compiler-builtins/compiler-builtins/src/hexagon/fastmath2_ldlib_asm.s new file mode 100644 index 0000000000000..3251057d78c98 --- /dev/null +++ b/library/compiler-builtins/compiler-builtins/src/hexagon/fastmath2_ldlib_asm.s @@ -0,0 +1,187 @@ + .text + .global __hexagon_fast2ldadd_asm + .type __hexagon_fast2ldadd_asm, @function +__hexagon_fast2ldadd_asm: + .falign + { + R4 = memw(r29+#8) + R5 = memw(r29+#24) + r7 = r0 + } + { + R6 = sub(R4, R5):sat + P0 = CMP.GT(R4, R5); + if ( P0.new) R8 = add(R4, #1) + if (!P0.new) R8 = add(R5, #1) + } { + R6 = abs(R6):sat + if ( P0) R4 = #1 + if (!P0) R5 = #1 + R9 = #62 + } { + R6 = MIN(R6, R9) + R1:0 = memd(r29+#0) + R3:2 = memd(r29+#16) + } { + if (!P0) R4 = add(R6, #1) + if ( P0) R5 = add(R6, #1) + } { + R1:0 = ASR(R1:0, R4) + R3:2 = ASR(R3:2, R5) + } { + R1:0 = add(R1:0, R3:2) + R3:2 = #0 + } { + R4 = clb(R1:0) + R9.L =#0x0001 + } { + R8 -= add(R4, #-1) + R4 = add(R4, #-1) + p0 = cmp.gt(R4, #58) + R9.H =#0x8000 + } { + if(!p0)memw(r7+#8) = R8 + R1:0 = ASL(R1:0, R4) + if(p0) jump .Ldenorma1 + } { + memd(r7+#0) = R1:0 + jumpr r31 + } +.Ldenorma1: + memd(r7+#0) = R3:2 + { + memw(r7+#8) = R9 + jumpr r31 + } + .text + .global __hexagon_fast2ldsub_asm + .type __hexagon_fast2ldsub_asm, @function +__hexagon_fast2ldsub_asm: + .falign + { + R4 = memw(r29+#8) + R5 = memw(r29+#24) + r7 = r0 + } + { + R6 = sub(R4, R5):sat + P0 = CMP.GT(R4, R5); + if ( P0.new) R8 = add(R4, #1) + if (!P0.new) R8 = add(R5, #1) + } { + R6 = abs(R6):sat + if ( P0) R4 = #1 + if (!P0) R5 = #1 + R9 = #62 + } { + R6 = min(R6, R9) + R1:0 = memd(r29+#0) + R3:2 = memd(r29+#16) + } { + if (!P0) R4 = add(R6, #1) + if ( P0) R5 = add(R6, #1) + } { + R1:0 = ASR(R1:0, R4) + R3:2 = ASR(R3:2, R5) + } { + R1:0 = sub(R1:0, R3:2) + R3:2 = #0 + } { + R4 = clb(R1:0) + R9.L =#0x0001 + } { + R8 -= add(R4, #-1) + R4 = add(R4, #-1) + p0 = cmp.gt(R4, #58) + R9.H =#0x8000 + } { + if(!p0)memw(r7+#8) = R8 + R1:0 = asl(R1:0, R4) + if(p0) jump .Ldenorma_s + } { + memd(r7+#0) = R1:0 + jumpr r31 + } +.Ldenorma_s: + memd(r7+#0) = R3:2 + { + memw(r7+#8) = R9 + jumpr r31 + } + .text + .global __hexagon_fast2ldmpy_asm + .type __hexagon_fast2ldmpy_asm, @function +__hexagon_fast2ldmpy_asm: + .falign + { + R15:14 = memd(r29+#0) + R3:2 = memd(r29+#16) + R13:12 = #0 + } + { + R8= extractu(R2, #31, #1) + R9= extractu(R14, #31, #1) + R13.H = #0x8000 + } + { + R11:10 = mpy(R15, R3) + R7:6 = mpy(R15, R8) + R4 = memw(r29+#8) + R5 = memw(r29+#24) + } + { + R11:10 = add(R11:10, R11:10) + R7:6 += mpy(R3, R9) + } + { + R7:6 = asr(R7:6, #30) + R8.L = #0x0001 + p1 = cmp.eq(R15:14, R3:2) + } + { + R7:6 = add(R7:6, R11:10) + R4= add(R4, R5) + p2 = cmp.eq(R3:2, R13:12) + } + { + R9 = clb(R7:6) + R8.H = #0x8000 + p1 = and(p1, p2) + } + { + R4-= add(R9, #-1) + R9 = add(R9, #-1) + if(p1) jump .Lsat1 + } + { + R7:6 = asl(R7:6, R9) + memw(R0+#8) = R4 + p0 = cmp.gt(R9, #58) + if(p0.new) jump:NT .Ldenorm1 + } + { + memd(R0+#0) = R7:6 + jumpr r31 + } +.Lsat1: + { + R13:12 = #0 + R4+= add(R9, #1) + } + { + R13.H = #0x4000 + memw(R0+#8) = R4 + } + { + memd(R0+#0) = R13:12 + jumpr r31 + } +.Ldenorm1: + { + memw(R0+#8) = R8 + R15:14 = #0 + } + { + memd(R0+#0) = R15:14 + jumpr r31 + } diff --git a/library/compiler-builtins/compiler-builtins/src/hexagon/func_macro.s b/library/compiler-builtins/compiler-builtins/src/hexagon/func_macro.s new file mode 100644 index 0000000000000..9a1e11aebcb50 --- /dev/null +++ b/library/compiler-builtins/compiler-builtins/src/hexagon/func_macro.s @@ -0,0 +1,12 @@ + .macro FUNCTION_BEGIN name + .text + .p2align 5 + .globl \name + .type \name, @function +\name: + .endm + + .macro FUNCTION_END name + .size \name, . - \name + .endm + diff --git a/library/compiler-builtins/compiler-builtins/src/hexagon/memcpy_forward_vp4cp4n2.s b/library/compiler-builtins/compiler-builtins/src/hexagon/memcpy_forward_vp4cp4n2.s new file mode 100644 index 0000000000000..89f69010aa43b --- /dev/null +++ b/library/compiler-builtins/compiler-builtins/src/hexagon/memcpy_forward_vp4cp4n2.s @@ -0,0 +1,91 @@ + .text + + + + + + + .globl hexagon_memcpy_forward_vp4cp4n2 + .balign 32 + .type hexagon_memcpy_forward_vp4cp4n2,@function +hexagon_memcpy_forward_vp4cp4n2: + + + + + { + r3 = sub(##4096, r1) + r5 = lsr(r2, #3) + } + { + + + r3 = extractu(r3, #10, #2) + r4 = extractu(r3, #7, #5) + } + { + r3 = minu(r2, r3) + r4 = minu(r5, r4) + } + { + r4 = or(r4, ##2105344) + p0 = cmp.eq(r3, #0) + if (p0.new) jump:nt .Lskipprolog + } + l2fetch(r1, r4) + { + loop0(.Lprolog, r3) + r2 = sub(r2, r3) + } + .falign +.Lprolog: + { + r4 = memw(r1++#4) + memw(r0++#4) = r4.new + } :endloop0 +.Lskipprolog: + { + + r3 = lsr(r2, #10) + if (cmp.eq(r3.new, #0)) jump:nt .Lskipmain + } + { + loop1(.Lout, r3) + r2 = extractu(r2, #10, #0) + r3 = ##2105472 + } + + .falign +.Lout: + + l2fetch(r1, r3) + loop0(.Lpage, #512) + .falign +.Lpage: + r5:4 = memd(r1++#8) + { + memw(r0++#8) = r4 + memw(r0+#4) = r5 + } :endloop0:endloop1 +.Lskipmain: + { + r3 = ##2105344 + r4 = lsr(r2, #3) + p0 = cmp.eq(r2, #0) + if (p0.new) jumpr:nt r31 + } + { + r3 = or(r3, r4) + loop0(.Lepilog, r2) + } + l2fetch(r1, r3) + .falign +.Lepilog: + { + r4 = memw(r1++#4) + memw(r0++#4) = r4.new + } :endloop0 + + jumpr r31 + +.size hexagon_memcpy_forward_vp4cp4n2, . - hexagon_memcpy_forward_vp4cp4n2 diff --git a/library/compiler-builtins/compiler-builtins/src/hexagon/memcpy_likely_aligned.s b/library/compiler-builtins/compiler-builtins/src/hexagon/memcpy_likely_aligned.s new file mode 100644 index 0000000000000..7e9b62f6a791c --- /dev/null +++ b/library/compiler-builtins/compiler-builtins/src/hexagon/memcpy_likely_aligned.s @@ -0,0 +1,42 @@ + +FUNCTION_BEGIN __hexagon_memcpy_likely_aligned_min32bytes_mult8bytes + { + p0 = bitsclr(r1,#7) + p0 = bitsclr(r0,#7) + if (p0.new) r5:4 = memd(r1) + r3 = #-3 + } + { + if (!p0) jump .Lmemcpy_call + if (p0) memd(r0++#8) = r5:4 + if (p0) r5:4 = memd(r1+#8) + r3 += lsr(r2,#3) + } + { + memd(r0++#8) = r5:4 + r5:4 = memd(r1+#16) + r1 = add(r1,#24) + loop0(1f,r3) + } + .falign +1: + { + memd(r0++#8) = r5:4 + r5:4 = memd(r1++#8) + }:endloop0 + { + memd(r0) = r5:4 + r0 -= add(r2,#-8) + jumpr r31 + } +FUNCTION_END __hexagon_memcpy_likely_aligned_min32bytes_mult8bytes + +.Lmemcpy_call: + + jump memcpy@PLT + + + + + .globl __qdsp_memcpy_likely_aligned_min32bytes_mult8bytes + .set __qdsp_memcpy_likely_aligned_min32bytes_mult8bytes, __hexagon_memcpy_likely_aligned_min32bytes_mult8bytes diff --git a/library/compiler-builtins/compiler-builtins/src/hexagon/moddi3.s b/library/compiler-builtins/compiler-builtins/src/hexagon/moddi3.s new file mode 100644 index 0000000000000..53ea6d52a58b4 --- /dev/null +++ b/library/compiler-builtins/compiler-builtins/src/hexagon/moddi3.s @@ -0,0 +1,63 @@ + + +FUNCTION_BEGIN __hexagon_moddi3 + { + p3 = tstbit(r1,#31) + } + { + r1:0 = abs(r1:0) + r3:2 = abs(r3:2) + } + { + r6 = cl0(r1:0) + r7 = cl0(r3:2) + r5:4 = r3:2 + r3:2 = r1:0 + } + { + r10 = sub(r7,r6) + r1:0 = #0 + r15:14 = #1 + } + { + r11 = add(r10,#1) + r13:12 = lsl(r5:4,r10) + r15:14 = lsl(r15:14,r10) + } + { + p0 = cmp.gtu(r5:4,r3:2) + loop0(1f,r11) + } + { + if (p0) jump .hexagon_moddi3_return + } + .falign +1: + { + p0 = cmp.gtu(r13:12,r3:2) + } + { + r7:6 = sub(r3:2, r13:12) + r9:8 = add(r1:0, r15:14) + } + { + r1:0 = vmux(p0, r1:0, r9:8) + r3:2 = vmux(p0, r3:2, r7:6) + } + { + r15:14 = lsr(r15:14, #1) + r13:12 = lsr(r13:12, #1) + }:endloop0 + +.hexagon_moddi3_return: + { + r1:0 = neg(r3:2) + } + { + r1:0 = vmux(p3,r1:0,r3:2) + jumpr r31 + } +FUNCTION_END __hexagon_moddi3 + + .globl __qdsp_moddi3 + .set __qdsp_moddi3, __hexagon_moddi3 diff --git a/library/compiler-builtins/compiler-builtins/src/hexagon/modsi3.s b/library/compiler-builtins/compiler-builtins/src/hexagon/modsi3.s new file mode 100644 index 0000000000000..c4ae7e59edc9a --- /dev/null +++ b/library/compiler-builtins/compiler-builtins/src/hexagon/modsi3.s @@ -0,0 +1,44 @@ + + +FUNCTION_BEGIN __hexagon_modsi3 + { + p2 = cmp.ge(r0,#0) + r2 = abs(r0) + r1 = abs(r1) + } + { + r3 = cl0(r2) + r4 = cl0(r1) + p0 = cmp.gtu(r1,r2) + } + { + r3 = sub(r4,r3) + if (p0) jumpr r31 + } + { + p1 = cmp.eq(r3,#0) + loop0(1f,r3) + r0 = r2 + r2 = lsl(r1,r3) + } + .falign +1: + { + p0 = cmp.gtu(r2,r0) + if (!p0.new) r0 = sub(r0,r2) + r2 = lsr(r2,#1) + if (p1) r1 = #0 + }:endloop0 + { + p0 = cmp.gtu(r2,r0) + if (!p0.new) r0 = sub(r0,r1) + if (p2) jumpr r31 + } + { + r0 = neg(r0) + jumpr r31 + } +FUNCTION_END __hexagon_modsi3 + + .globl __qdsp_modsi3 + .set __qdsp_modsi3, __hexagon_modsi3 diff --git a/library/compiler-builtins/compiler-builtins/src/hexagon/sfdiv_opt.s b/library/compiler-builtins/compiler-builtins/src/hexagon/sfdiv_opt.s new file mode 100644 index 0000000000000..26c91f15cbb05 --- /dev/null +++ b/library/compiler-builtins/compiler-builtins/src/hexagon/sfdiv_opt.s @@ -0,0 +1,42 @@ + +FUNCTION_BEGIN __hexagon_divsf3 + { + r2,p0 = sfrecipa(r0,r1) + r4 = sffixupd(r0,r1) + r3 = ##0x3f800000 + } + { + r5 = sffixupn(r0,r1) + r3 -= sfmpy(r4,r2):lib + r6 = ##0x80000000 + r7 = r3 + } + { + r2 += sfmpy(r3,r2):lib + r3 = r7 + r6 = r5 + r0 = and(r6,r5) + } + { + r3 -= sfmpy(r4,r2):lib + r0 += sfmpy(r5,r2):lib + } + { + r2 += sfmpy(r3,r2):lib + r6 -= sfmpy(r0,r4):lib + } + { + r0 += sfmpy(r6,r2):lib + } + { + r5 -= sfmpy(r0,r4):lib + } + { + r0 += sfmpy(r5,r2,p0):scale + jumpr r31 + } +FUNCTION_END __hexagon_divsf3 + +.global __qdsp_divsf3 ; .set __qdsp_divsf3, __hexagon_divsf3 +.global __hexagon_fast_divsf3 ; .set __hexagon_fast_divsf3, __hexagon_divsf3 +.global __hexagon_fast2_divsf3 ; .set __hexagon_fast2_divsf3, __hexagon_divsf3 diff --git a/library/compiler-builtins/compiler-builtins/src/hexagon/sfsqrt_opt.s b/library/compiler-builtins/compiler-builtins/src/hexagon/sfsqrt_opt.s new file mode 100644 index 0000000000000..c90af17975412 --- /dev/null +++ b/library/compiler-builtins/compiler-builtins/src/hexagon/sfsqrt_opt.s @@ -0,0 +1,49 @@ +FUNCTION_BEGIN __hexagon_sqrtf + { + r3,p0 = sfinvsqrta(r0) + r5 = sffixupr(r0) + r4 = ##0x3f000000 + r1:0 = combine(#0,#0) + } + { + r0 += sfmpy(r3,r5):lib + r1 += sfmpy(r3,r4):lib + r2 = r4 + r3 = r5 + } + { + r2 -= sfmpy(r0,r1):lib + p1 = sfclass(r5,#1) + + } + { + r0 += sfmpy(r0,r2):lib + r1 += sfmpy(r1,r2):lib + r2 = r4 + r3 = r5 + } + { + r2 -= sfmpy(r0,r1):lib + r3 -= sfmpy(r0,r0):lib + } + { + r0 += sfmpy(r1,r3):lib + r1 += sfmpy(r1,r2):lib + r2 = r4 + r3 = r5 + } + { + + r3 -= sfmpy(r0,r0):lib + if (p1) r0 = or(r0,r5) + } + { + r0 += sfmpy(r1,r3,p0):scale + jumpr r31 + } + +FUNCTION_END __hexagon_sqrtf + +.global __qdsp_sqrtf ; .set __qdsp_sqrtf, __hexagon_sqrtf +.global __hexagon_fast_sqrtf ; .set __hexagon_fast_sqrtf, __hexagon_sqrtf +.global __hexagon_fast2_sqrtf ; .set __hexagon_fast2_sqrtf, __hexagon_sqrtf diff --git a/library/compiler-builtins/compiler-builtins/src/hexagon/udivdi3.s b/library/compiler-builtins/compiler-builtins/src/hexagon/udivdi3.s new file mode 100644 index 0000000000000..f0fffc23df006 --- /dev/null +++ b/library/compiler-builtins/compiler-builtins/src/hexagon/udivdi3.s @@ -0,0 +1,50 @@ + + +FUNCTION_BEGIN __hexagon_udivdi3 + { + r6 = cl0(r1:0) + r7 = cl0(r3:2) + r5:4 = r3:2 + r3:2 = r1:0 + } + { + r10 = sub(r7,r6) + r1:0 = #0 + r15:14 = #1 + } + { + r11 = add(r10,#1) + r13:12 = lsl(r5:4,r10) + r15:14 = lsl(r15:14,r10) + } + { + p0 = cmp.gtu(r5:4,r3:2) + loop0(1f,r11) + } + { + if (p0) jumpr r31 + } + .falign +1: + { + p0 = cmp.gtu(r13:12,r3:2) + } + { + r7:6 = sub(r3:2, r13:12) + r9:8 = add(r1:0, r15:14) + } + { + r1:0 = vmux(p0, r1:0, r9:8) + r3:2 = vmux(p0, r3:2, r7:6) + } + { + r15:14 = lsr(r15:14, #1) + r13:12 = lsr(r13:12, #1) + }:endloop0 + { + jumpr r31 + } +FUNCTION_END __hexagon_udivdi3 + + .globl __qdsp_udivdi3 + .set __qdsp_udivdi3, __hexagon_udivdi3 diff --git a/library/compiler-builtins/compiler-builtins/src/hexagon/udivmoddi4.s b/library/compiler-builtins/compiler-builtins/src/hexagon/udivmoddi4.s new file mode 100644 index 0000000000000..cbfb3987dd2be --- /dev/null +++ b/library/compiler-builtins/compiler-builtins/src/hexagon/udivmoddi4.s @@ -0,0 +1,50 @@ + + +FUNCTION_BEGIN __hexagon_udivmoddi4 + { + r6 = cl0(r1:0) + r7 = cl0(r3:2) + r5:4 = r3:2 + r3:2 = r1:0 + } + { + r10 = sub(r7,r6) + r1:0 = #0 + r15:14 = #1 + } + { + r11 = add(r10,#1) + r13:12 = lsl(r5:4,r10) + r15:14 = lsl(r15:14,r10) + } + { + p0 = cmp.gtu(r5:4,r3:2) + loop0(1f,r11) + } + { + if (p0) jumpr r31 + } + .falign +1: + { + p0 = cmp.gtu(r13:12,r3:2) + } + { + r7:6 = sub(r3:2, r13:12) + r9:8 = add(r1:0, r15:14) + } + { + r1:0 = vmux(p0, r1:0, r9:8) + r3:2 = vmux(p0, r3:2, r7:6) + } + { + r15:14 = lsr(r15:14, #1) + r13:12 = lsr(r13:12, #1) + }:endloop0 + { + jumpr r31 + } +FUNCTION_END __hexagon_udivmoddi4 + + .globl __qdsp_udivmoddi4 + .set __qdsp_udivmoddi4, __hexagon_udivmoddi4 diff --git a/library/compiler-builtins/compiler-builtins/src/hexagon/udivmodsi4.s b/library/compiler-builtins/compiler-builtins/src/hexagon/udivmodsi4.s new file mode 100644 index 0000000000000..83489c5143151 --- /dev/null +++ b/library/compiler-builtins/compiler-builtins/src/hexagon/udivmodsi4.s @@ -0,0 +1,39 @@ + + +FUNCTION_BEGIN __hexagon_udivmodsi4 + { + r2 = cl0(r0) + r3 = cl0(r1) + r5:4 = combine(#1,#0) + p0 = cmp.gtu(r1,r0) + } + { + r6 = sub(r3,r2) + r4 = r1 + r1:0 = combine(r0,r4) + if (p0) jumpr r31 + } + { + r3:2 = vlslw(r5:4,r6) + loop0(1f,r6) + p0 = cmp.eq(r6,#0) + if (p0.new) r4 = #0 + } + .falign +1: + { + p0 = cmp.gtu(r2,r1) + if (!p0.new) r1 = sub(r1,r2) + if (!p0.new) r0 = add(r0,r3) + r3:2 = vlsrw(r3:2,#1) + }:endloop0 + { + p0 = cmp.gtu(r2,r1) + if (!p0.new) r1 = sub(r1,r4) + if (!p0.new) r0 = add(r0,r3) + jumpr r31 + } +FUNCTION_END __hexagon_udivmodsi4 + + .globl __qdsp_udivmodsi4 + .set __qdsp_udivmodsi4, __hexagon_udivmodsi4 diff --git a/library/compiler-builtins/compiler-builtins/src/hexagon/udivsi3.s b/library/compiler-builtins/compiler-builtins/src/hexagon/udivsi3.s new file mode 100644 index 0000000000000..e0b94aa99826e --- /dev/null +++ b/library/compiler-builtins/compiler-builtins/src/hexagon/udivsi3.s @@ -0,0 +1,36 @@ + + +FUNCTION_BEGIN __hexagon_udivsi3 + { + r2 = cl0(r0) + r3 = cl0(r1) + r5:4 = combine(#1,#0) + p0 = cmp.gtu(r1,r0) + } + { + r6 = sub(r3,r2) + r4 = r1 + r1:0 = combine(r0,r4) + if (p0) jumpr r31 + } + { + r3:2 = vlslw(r5:4,r6) + loop0(1f,r6) + } + .falign +1: + { + p0 = cmp.gtu(r2,r1) + if (!p0.new) r1 = sub(r1,r2) + if (!p0.new) r0 = add(r0,r3) + r3:2 = vlsrw(r3:2,#1) + }:endloop0 + { + p0 = cmp.gtu(r2,r1) + if (!p0.new) r0 = add(r0,r3) + jumpr r31 + } +FUNCTION_END __hexagon_udivsi3 + + .globl __qdsp_udivsi3 + .set __qdsp_udivsi3, __hexagon_udivsi3 diff --git a/library/compiler-builtins/compiler-builtins/src/hexagon/umoddi3.s b/library/compiler-builtins/compiler-builtins/src/hexagon/umoddi3.s new file mode 100644 index 0000000000000..c76011c3e7aef --- /dev/null +++ b/library/compiler-builtins/compiler-builtins/src/hexagon/umoddi3.s @@ -0,0 +1,53 @@ + + +FUNCTION_BEGIN __hexagon_umoddi3 + { + r6 = cl0(r1:0) + r7 = cl0(r3:2) + r5:4 = r3:2 + r3:2 = r1:0 + } + { + r10 = sub(r7,r6) + r1:0 = #0 + r15:14 = #1 + } + { + r11 = add(r10,#1) + r13:12 = lsl(r5:4,r10) + r15:14 = lsl(r15:14,r10) + } + { + p0 = cmp.gtu(r5:4,r3:2) + loop0(1f,r11) + } + { + if (p0) jump .hexagon_umoddi3_return + } + .falign +1: + { + p0 = cmp.gtu(r13:12,r3:2) + } + { + r7:6 = sub(r3:2, r13:12) + r9:8 = add(r1:0, r15:14) + } + { + r1:0 = vmux(p0, r1:0, r9:8) + r3:2 = vmux(p0, r3:2, r7:6) + } + { + r15:14 = lsr(r15:14, #1) + r13:12 = lsr(r13:12, #1) + }:endloop0 + +.hexagon_umoddi3_return: + { + r1:0 = r3:2 + jumpr r31 + } +FUNCTION_END __hexagon_umoddi3 + + .globl __qdsp_umoddi3 + .set __qdsp_umoddi3, __hexagon_umoddi3 diff --git a/library/compiler-builtins/compiler-builtins/src/hexagon/umodsi3.s b/library/compiler-builtins/compiler-builtins/src/hexagon/umodsi3.s new file mode 100644 index 0000000000000..1b592a7c56184 --- /dev/null +++ b/library/compiler-builtins/compiler-builtins/src/hexagon/umodsi3.s @@ -0,0 +1,34 @@ + + +FUNCTION_BEGIN __hexagon_umodsi3 + { + r2 = cl0(r0) + r3 = cl0(r1) + p0 = cmp.gtu(r1,r0) + } + { + r2 = sub(r3,r2) + if (p0) jumpr r31 + } + { + loop0(1f,r2) + p1 = cmp.eq(r2,#0) + r2 = lsl(r1,r2) + } + .falign +1: + { + p0 = cmp.gtu(r2,r0) + if (!p0.new) r0 = sub(r0,r2) + r2 = lsr(r2,#1) + if (p1) r1 = #0 + }:endloop0 + { + p0 = cmp.gtu(r2,r0) + if (!p0.new) r0 = sub(r0,r1) + jumpr r31 + } +FUNCTION_END __hexagon_umodsi3 + + .globl __qdsp_umodsi3 + .set __qdsp_umodsi3, __hexagon_umodsi3 diff --git a/library/compiler-builtins/compiler-builtins/src/int/addsub.rs b/library/compiler-builtins/compiler-builtins/src/int/addsub.rs new file mode 100644 index 0000000000000..1f84e8eb1e117 --- /dev/null +++ b/library/compiler-builtins/compiler-builtins/src/int/addsub.rs @@ -0,0 +1,104 @@ +use crate::int::{DInt, Int, MinInt}; + +trait UAddSub: DInt + Int { + fn uadd(self, other: Self) -> Self { + let (lo, carry) = self.lo().overflowing_add(other.lo()); + let hi = self.hi().wrapping_add(other.hi()); + let carry = if carry { Self::H::ONE } else { Self::H::ZERO }; + Self::from_lo_hi(lo, hi.wrapping_add(carry)) + } + fn uadd_one(self) -> Self { + let (lo, carry) = self.lo().overflowing_add(Self::H::ONE); + let carry = if carry { Self::H::ONE } else { Self::H::ZERO }; + Self::from_lo_hi(lo, self.hi().wrapping_add(carry)) + } + fn usub(self, other: Self) -> Self { + let uneg = (!other).uadd_one(); + self.uadd(uneg) + } +} + +impl UAddSub for u128 {} + +trait AddSub: Int +where + ::UnsignedInt: UAddSub, +{ + fn add(self, other: Self) -> Self { + Self::from_unsigned(self.unsigned().uadd(other.unsigned())) + } + fn sub(self, other: Self) -> Self { + Self::from_unsigned(self.unsigned().usub(other.unsigned())) + } +} + +impl AddSub for u128 {} +impl AddSub for i128 {} + +trait Addo: AddSub +where + ::UnsignedInt: UAddSub, +{ + fn addo(self, other: Self) -> (Self, bool) { + let sum = AddSub::add(self, other); + (sum, (other < Self::ZERO) != (sum < self)) + } +} + +impl Addo for i128 {} +impl Addo for u128 {} + +trait Subo: AddSub +where + ::UnsignedInt: UAddSub, +{ + fn subo(self, other: Self) -> (Self, bool) { + let sum = AddSub::sub(self, other); + (sum, (other < Self::ZERO) != (self < sum)) + } +} + +impl Subo for i128 {} +impl Subo for u128 {} + +intrinsics! { + pub extern "C" fn __rust_i128_add(a: i128, b: i128) -> i128 { + AddSub::add(a,b) + } + + pub extern "C" fn __rust_i128_addo(a: i128, b: i128, oflow: &mut i32) -> i128 { + let (add, o) = a.addo(b); + *oflow = o.into(); + add + } + + pub extern "C" fn __rust_u128_add(a: u128, b: u128) -> u128 { + AddSub::add(a,b) + } + + pub extern "C" fn __rust_u128_addo(a: u128, b: u128, oflow: &mut i32) -> u128 { + let (add, o) = a.addo(b); + *oflow = o.into(); + add + } + + pub extern "C" fn __rust_i128_sub(a: i128, b: i128) -> i128 { + AddSub::sub(a,b) + } + + pub extern "C" fn __rust_i128_subo(a: i128, b: i128, oflow: &mut i32) -> i128 { + let (sub, o) = a.subo(b); + *oflow = o.into(); + sub + } + + pub extern "C" fn __rust_u128_sub(a: u128, b: u128) -> u128 { + AddSub::sub(a,b) + } + + pub extern "C" fn __rust_u128_subo(a: u128, b: u128, oflow: &mut i32) -> u128 { + let (sub, o) = a.subo(b); + *oflow = o.into(); + sub + } +} diff --git a/library/compiler-builtins/compiler-builtins/src/int/big.rs b/library/compiler-builtins/compiler-builtins/src/int/big.rs new file mode 100644 index 0000000000000..1402efb8ed419 --- /dev/null +++ b/library/compiler-builtins/compiler-builtins/src/int/big.rs @@ -0,0 +1,295 @@ +//! Integers used for wide operations, larger than `u128`. + +#![allow(unused)] + +use core::{fmt, ops}; + +use crate::int::{DInt, HInt, Int, MinInt}; + +const WORD_LO_MASK: u64 = 0x00000000ffffffff; +const WORD_HI_MASK: u64 = 0xffffffff00000000; +const WORD_FULL_MASK: u64 = 0xffffffffffffffff; +const U128_LO_MASK: u128 = u64::MAX as u128; +const U128_HI_MASK: u128 = (u64::MAX as u128) << 64; + +/// A 256-bit unsigned integer represented as 4 64-bit limbs. +/// +/// Each limb is a native-endian number, but the array is little-limb-endian. +#[allow(non_camel_case_types)] +#[derive(Clone, Copy, Debug, PartialEq, PartialOrd)] +pub struct u256(pub [u64; 4]); + +impl u256 { + pub const MAX: Self = Self([u64::MAX, u64::MAX, u64::MAX, u64::MAX]); + + /// Reinterpret as a signed integer + pub fn signed(self) -> i256 { + i256(self.0) + } +} + +/// A 256-bit signed integer represented as 4 64-bit limbs. +/// +/// Each limb is a native-endian number, but the array is little-limb-endian. +#[allow(non_camel_case_types)] +#[derive(Clone, Copy, Debug, PartialEq, PartialOrd)] +pub struct i256(pub [u64; 4]); + +impl i256 { + /// Reinterpret as an unsigned integer + pub fn unsigned(self) -> u256 { + u256(self.0) + } +} + +impl MinInt for u256 { + type OtherSign = i256; + + type UnsignedInt = u256; + + const SIGNED: bool = false; + const BITS: u32 = 256; + const ZERO: Self = Self([0u64; 4]); + const ONE: Self = Self([1, 0, 0, 0]); + const MIN: Self = Self([0u64; 4]); + const MAX: Self = Self([u64::MAX; 4]); +} + +impl MinInt for i256 { + type OtherSign = u256; + + type UnsignedInt = u256; + + const SIGNED: bool = false; + const BITS: u32 = 256; + const ZERO: Self = Self([0u64; 4]); + const ONE: Self = Self([1, 0, 0, 0]); + const MIN: Self = Self([0, 0, 0, 1 << 63]); + const MAX: Self = Self([u64::MAX, u64::MAX, u64::MAX, u64::MAX >> 1]); +} + +macro_rules! impl_common { + ($ty:ty) => { + impl ops::BitOr for $ty { + type Output = Self; + + fn bitor(mut self, rhs: Self) -> Self::Output { + self.0[0] |= rhs.0[0]; + self.0[1] |= rhs.0[1]; + self.0[2] |= rhs.0[2]; + self.0[3] |= rhs.0[3]; + self + } + } + + impl ops::Not for $ty { + type Output = Self; + + fn not(self) -> Self::Output { + Self([!self.0[0], !self.0[1], !self.0[2], !self.0[3]]) + } + } + + impl ops::Shl for $ty { + type Output = Self; + + fn shl(self, rhs: u32) -> Self::Output { + unimplemented!("only used to meet trait bounds") + } + } + }; +} + +impl_common!(i256); +impl_common!(u256); + +impl ops::Shr for u256 { + type Output = Self; + + fn shr(self, rhs: u32) -> Self::Output { + assert!(rhs < Self::BITS, "attempted to shift right with overflow"); + + if rhs == 0 { + return self; + } + + let mut ret = self; + let byte_shift = rhs / 64; + let bit_shift = rhs % 64; + + for idx in 0..4 { + let base_idx = idx + byte_shift as usize; + + let Some(base) = ret.0.get(base_idx) else { + ret.0[idx] = 0; + continue; + }; + + let mut new_val = base >> bit_shift; + + if let Some(new) = ret.0.get(base_idx + 1) { + new_val |= new.overflowing_shl(64 - bit_shift).0; + } + + ret.0[idx] = new_val; + } + + ret + } +} + +macro_rules! word { + (1, $val:expr) => { + (($val >> (32 * 3)) & Self::from(WORD_LO_MASK)) as u64 + }; + (2, $val:expr) => { + (($val >> (32 * 2)) & Self::from(WORD_LO_MASK)) as u64 + }; + (3, $val:expr) => { + (($val >> (32 * 1)) & Self::from(WORD_LO_MASK)) as u64 + }; + (4, $val:expr) => { + (($val >> (32 * 0)) & Self::from(WORD_LO_MASK)) as u64 + }; +} + +impl HInt for u128 { + type D = u256; + + fn widen(self) -> Self::D { + let w0 = self & u128::from(u64::MAX); + let w1 = (self >> u64::BITS) & u128::from(u64::MAX); + u256([w0 as u64, w1 as u64, 0, 0]) + } + + fn zero_widen(self) -> Self::D { + self.widen() + } + + fn zero_widen_mul(self, rhs: Self) -> Self::D { + let product11: u64 = word!(1, self) * word!(1, rhs); + let product12: u64 = word!(1, self) * word!(2, rhs); + let product13: u64 = word!(1, self) * word!(3, rhs); + let product14: u64 = word!(1, self) * word!(4, rhs); + let product21: u64 = word!(2, self) * word!(1, rhs); + let product22: u64 = word!(2, self) * word!(2, rhs); + let product23: u64 = word!(2, self) * word!(3, rhs); + let product24: u64 = word!(2, self) * word!(4, rhs); + let product31: u64 = word!(3, self) * word!(1, rhs); + let product32: u64 = word!(3, self) * word!(2, rhs); + let product33: u64 = word!(3, self) * word!(3, rhs); + let product34: u64 = word!(3, self) * word!(4, rhs); + let product41: u64 = word!(4, self) * word!(1, rhs); + let product42: u64 = word!(4, self) * word!(2, rhs); + let product43: u64 = word!(4, self) * word!(3, rhs); + let product44: u64 = word!(4, self) * word!(4, rhs); + + let sum0: u128 = u128::from(product44); + let sum1: u128 = u128::from(product34) + u128::from(product43); + let sum2: u128 = u128::from(product24) + u128::from(product33) + u128::from(product42); + let sum3: u128 = u128::from(product14) + + u128::from(product23) + + u128::from(product32) + + u128::from(product41); + let sum4: u128 = u128::from(product13) + u128::from(product22) + u128::from(product31); + let sum5: u128 = u128::from(product12) + u128::from(product21); + let sum6: u128 = u128::from(product11); + + let r0: u128 = + (sum0 & u128::from(WORD_FULL_MASK)) + ((sum1 & u128::from(WORD_LO_MASK)) << 32); + let r1: u128 = (sum0 >> 64) + + ((sum1 >> 32) & u128::from(WORD_FULL_MASK)) + + (sum2 & u128::from(WORD_FULL_MASK)) + + ((sum3 << 32) & u128::from(WORD_HI_MASK)); + + let (lo, carry) = r0.overflowing_add(r1 << 64); + let hi = (r1 >> 64) + + (sum1 >> 96) + + (sum2 >> 64) + + (sum3 >> 32) + + sum4 + + (sum5 << 32) + + (sum6 << 64) + + u128::from(carry); + + u256([ + (lo & U128_LO_MASK) as u64, + ((lo >> 64) & U128_LO_MASK) as u64, + (hi & U128_LO_MASK) as u64, + ((hi >> 64) & U128_LO_MASK) as u64, + ]) + } + + fn widen_mul(self, rhs: Self) -> Self::D { + self.zero_widen_mul(rhs) + } + + fn widen_hi(self) -> Self::D { + self.widen() << ::BITS + } +} + +impl HInt for i128 { + type D = i256; + + fn widen(self) -> Self::D { + let mut ret = self.unsigned().zero_widen().signed(); + if self.is_negative() { + ret.0[2] = u64::MAX; + ret.0[3] = u64::MAX; + } + ret + } + + fn zero_widen(self) -> Self::D { + self.unsigned().zero_widen().signed() + } + + fn zero_widen_mul(self, rhs: Self) -> Self::D { + self.unsigned().zero_widen_mul(rhs.unsigned()).signed() + } + + fn widen_mul(self, rhs: Self) -> Self::D { + unimplemented!("signed i128 widening multiply is not used") + } + + fn widen_hi(self) -> Self::D { + self.widen() << ::BITS + } +} + +impl DInt for u256 { + type H = u128; + + fn lo(self) -> Self::H { + let mut tmp = [0u8; 16]; + tmp[..8].copy_from_slice(&self.0[0].to_le_bytes()); + tmp[8..].copy_from_slice(&self.0[1].to_le_bytes()); + u128::from_le_bytes(tmp) + } + + fn hi(self) -> Self::H { + let mut tmp = [0u8; 16]; + tmp[..8].copy_from_slice(&self.0[2].to_le_bytes()); + tmp[8..].copy_from_slice(&self.0[3].to_le_bytes()); + u128::from_le_bytes(tmp) + } +} + +impl DInt for i256 { + type H = i128; + + fn lo(self) -> Self::H { + let mut tmp = [0u8; 16]; + tmp[..8].copy_from_slice(&self.0[0].to_le_bytes()); + tmp[8..].copy_from_slice(&self.0[1].to_le_bytes()); + i128::from_le_bytes(tmp) + } + + fn hi(self) -> Self::H { + let mut tmp = [0u8; 16]; + tmp[..8].copy_from_slice(&self.0[2].to_le_bytes()); + tmp[8..].copy_from_slice(&self.0[3].to_le_bytes()); + i128::from_le_bytes(tmp) + } +} diff --git a/library/compiler-builtins/compiler-builtins/src/int/bswap.rs b/library/compiler-builtins/compiler-builtins/src/int/bswap.rs new file mode 100644 index 0000000000000..3ede08882dc90 --- /dev/null +++ b/library/compiler-builtins/compiler-builtins/src/int/bswap.rs @@ -0,0 +1,19 @@ +intrinsics! { + #[maybe_use_optimized_c_shim] + /// Swaps bytes in 32-bit number + pub extern "C" fn __bswapsi2(x: u32) -> u32 { + x.swap_bytes() + } + + #[maybe_use_optimized_c_shim] + /// Swaps bytes in 64-bit number + pub extern "C" fn __bswapdi2(x: u64) -> u64 { + x.swap_bytes() + } + + #[maybe_use_optimized_c_shim] + /// Swaps bytes in 128-bit number + pub extern "C" fn __bswapti2(x: u128) -> u128 { + x.swap_bytes() + } +} diff --git a/library/compiler-builtins/compiler-builtins/src/int/leading_zeros.rs b/library/compiler-builtins/compiler-builtins/src/int/leading_zeros.rs new file mode 100644 index 0000000000000..112f4d0361317 --- /dev/null +++ b/library/compiler-builtins/compiler-builtins/src/int/leading_zeros.rs @@ -0,0 +1,164 @@ +// Note: these functions happen to produce the correct `usize::leading_zeros(0)` value +// without a explicit zero check. Zero is probably common enough that it could warrant +// adding a zero check at the beginning, but `__clzsi2` has a precondition that `x != 0`. +// Compilers will insert the check for zero in cases where it is needed. + +#[cfg(feature = "unstable-public-internals")] +pub use implementation::{leading_zeros_default, leading_zeros_riscv}; +#[cfg(not(feature = "unstable-public-internals"))] +pub(crate) use implementation::{leading_zeros_default, leading_zeros_riscv}; + +mod implementation { + use crate::int::{CastInto, Int}; + + /// Returns the number of leading binary zeros in `x`. + #[allow(dead_code)] + pub fn leading_zeros_default>(x: T) -> usize { + // The basic idea is to test if the higher bits of `x` are zero and bisect the number + // of leading zeros. It is possible for all branches of the bisection to use the same + // code path by conditionally shifting the higher parts down to let the next bisection + // step work on the higher or lower parts of `x`. Instead of starting with `z == 0` + // and adding to the number of zeros, it is slightly faster to start with + // `z == usize::MAX.count_ones()` and subtract from the potential number of zeros, + // because it simplifies the final bisection step. + let mut x = x; + // the number of potential leading zeros + let mut z = T::BITS as usize; + // a temporary + let mut t: T; + + const { assert!(T::BITS <= 64) }; + if T::BITS >= 64 { + t = x >> 32; + if t != T::ZERO { + z -= 32; + x = t; + } + } + if T::BITS >= 32 { + t = x >> 16; + if t != T::ZERO { + z -= 16; + x = t; + } + } + const { assert!(T::BITS >= 16) }; + t = x >> 8; + if t != T::ZERO { + z -= 8; + x = t; + } + t = x >> 4; + if t != T::ZERO { + z -= 4; + x = t; + } + t = x >> 2; + if t != T::ZERO { + z -= 2; + x = t; + } + // the last two bisections are combined into one conditional + t = x >> 1; + if t != T::ZERO { z - 2 } else { z - x.cast() } + + // We could potentially save a few cycles by using the LUT trick from + // "https://embeddedgurus.com/state-space/2014/09/ + // fast-deterministic-and-portable-counting-leading-zeros/". + // However, 256 bytes for a LUT is too large for embedded use cases. We could remove + // the last 3 bisections and use this 16 byte LUT for the rest of the work: + //const LUT: [u8; 16] = [0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4]; + //z -= LUT[x] as usize; + //z + // However, it ends up generating about the same number of instructions. When benchmarked + // on x86_64, it is slightly faster to use the LUT, but this is probably because of OOO + // execution effects. Changing to using a LUT and branching is risky for smaller cores. + } + + // The above method does not compile well on RISC-V (because of the lack of predicated + // instructions), producing code with many branches or using an excessively long + // branchless solution. This method takes advantage of the set-if-less-than instruction on + // RISC-V that allows `(x >= power-of-two) as usize` to be branchless. + + /// Returns the number of leading binary zeros in `x`. + #[allow(dead_code)] + pub fn leading_zeros_riscv>(x: T) -> usize { + let mut x = x; + // the number of potential leading zeros + let mut z = T::BITS; + // a temporary + let mut t: u32; + + // RISC-V does not have a set-if-greater-than-or-equal instruction and + // `(x >= power-of-two) as usize` will get compiled into two instructions, but this is + // still the most optimal method. A conditional set can only be turned into a single + // immediate instruction if `x` is compared with an immediate `imm` (that can fit into + // 12 bits) like `x < imm` but not `imm < x` (because the immediate is always on the + // right). If we try to save an instruction by using `x < imm` for each bisection, we + // have to shift `x` left and compare with powers of two approaching `usize::MAX + 1`, + // but the immediate will never fit into 12 bits and never save an instruction. + const { assert!(T::BITS <= 64) }; + if T::BITS >= 64 { + // If the upper 32 bits of `x` are not all 0, `t` is set to `1 << 5`, otherwise + // `t` is set to 0. + t = ((x >= (T::ONE << 32)) as u32) << 5; + // If `t` was set to `1 << 5`, then the upper 32 bits are shifted down for the + // next step to process. + x >>= t; + // If `t` was set to `1 << 5`, then we subtract 32 from the number of potential + // leading zeros + z -= t; + } + if T::BITS >= 32 { + t = ((x >= (T::ONE << 16)) as u32) << 4; + x >>= t; + z -= t; + } + const { assert!(T::BITS >= 16) }; + t = ((x >= (T::ONE << 8)) as u32) << 3; + x >>= t; + z -= t; + t = ((x >= (T::ONE << 4)) as u32) << 2; + x >>= t; + z -= t; + t = ((x >= (T::ONE << 2)) as u32) << 1; + x >>= t; + z -= t; + t = (x >= (T::ONE << 1)) as u32; + x >>= t; + z -= t; + // All bits except the LSB are guaranteed to be zero for this final bisection step. + // If `x != 0` then `x == 1` and subtracts one potential zero from `z`. + z as usize - x.cast() + } +} + +intrinsics! { + /// Returns the number of leading binary zeros in `x` + pub extern "C" fn __clzsi2(x: u32) -> usize { + if cfg!(any(target_arch = "riscv32", target_arch = "riscv64")) { + leading_zeros_riscv(x) + } else { + leading_zeros_default(x) + } + } + + /// Returns the number of leading binary zeros in `x` + pub extern "C" fn __clzdi2(x: u64) -> usize { + if cfg!(any(target_arch = "riscv32", target_arch = "riscv64")) { + leading_zeros_riscv(x) + } else { + leading_zeros_default(x) + } + } + + /// Returns the number of leading binary zeros in `x` + pub extern "C" fn __clzti2(x: u128) -> usize { + let hi = (x >> 64) as u64; + if hi == 0 { + 64 + __clzdi2(x as u64) + } else { + __clzdi2(hi) + } + } +} diff --git a/library/compiler-builtins/compiler-builtins/src/int/mod.rs b/library/compiler-builtins/compiler-builtins/src/int/mod.rs new file mode 100644 index 0000000000000..518ccb23f8009 --- /dev/null +++ b/library/compiler-builtins/compiler-builtins/src/int/mod.rs @@ -0,0 +1,18 @@ +mod specialized_div_rem; + +pub mod addsub; +mod big; +pub mod bswap; +pub mod leading_zeros; +pub mod mul; +pub mod sdiv; +pub mod shift; +pub mod trailing_zeros; +mod traits; +pub mod udiv; + +pub use big::{i256, u256}; +#[cfg(not(feature = "unstable-public-internals"))] +pub(crate) use traits::{CastFrom, CastInto, DInt, HInt, Int, MinInt}; +#[cfg(feature = "unstable-public-internals")] +pub use traits::{CastFrom, CastInto, DInt, HInt, Int, MinInt}; diff --git a/library/compiler-builtins/compiler-builtins/src/int/mul.rs b/library/compiler-builtins/compiler-builtins/src/int/mul.rs new file mode 100644 index 0000000000000..040c69342d148 --- /dev/null +++ b/library/compiler-builtins/compiler-builtins/src/int/mul.rs @@ -0,0 +1,142 @@ +use crate::int::{DInt, HInt, Int}; + +trait Mul: DInt + Int +where + Self::H: DInt, +{ + fn mul(self, rhs: Self) -> Self { + // In order to prevent infinite recursion, we cannot use the `widen_mul` in this: + //self.lo().widen_mul(rhs.lo()) + // .wrapping_add(self.lo().wrapping_mul(rhs.hi()).widen_hi()) + // .wrapping_add(self.hi().wrapping_mul(rhs.lo()).widen_hi()) + + let lhs_lo = self.lo(); + let rhs_lo = rhs.lo(); + // construct the widening multiplication using only `Self::H` sized multiplications + let tmp_0 = lhs_lo.lo().zero_widen_mul(rhs_lo.lo()); + let tmp_1 = lhs_lo.lo().zero_widen_mul(rhs_lo.hi()); + let tmp_2 = lhs_lo.hi().zero_widen_mul(rhs_lo.lo()); + let tmp_3 = lhs_lo.hi().zero_widen_mul(rhs_lo.hi()); + // sum up all widening partials + let mul = Self::from_lo_hi(tmp_0, tmp_3) + .wrapping_add(tmp_1.zero_widen() << (Self::BITS / 4)) + .wrapping_add(tmp_2.zero_widen() << (Self::BITS / 4)); + // add the higher partials + mul.wrapping_add(lhs_lo.wrapping_mul(rhs.hi()).widen_hi()) + .wrapping_add(self.hi().wrapping_mul(rhs_lo).widen_hi()) + } +} + +impl Mul for u64 {} +impl Mul for i128 {} + +pub(crate) trait UMulo: DInt + Int { + fn mulo(self, rhs: Self) -> (Self, bool) { + match (self.hi().is_zero(), rhs.hi().is_zero()) { + // overflow is guaranteed + (false, false) => (self.wrapping_mul(rhs), true), + (true, false) => { + let mul_lo = self.lo().widen_mul(rhs.lo()); + let mul_hi = self.lo().widen_mul(rhs.hi()); + let (mul, o) = mul_lo.overflowing_add(mul_hi.lo().widen_hi()); + (mul, o || !mul_hi.hi().is_zero()) + } + (false, true) => { + let mul_lo = rhs.lo().widen_mul(self.lo()); + let mul_hi = rhs.lo().widen_mul(self.hi()); + let (mul, o) = mul_lo.overflowing_add(mul_hi.lo().widen_hi()); + (mul, o || !mul_hi.hi().is_zero()) + } + // overflow is guaranteed to not happen, and use a smaller widening multiplication + (true, true) => (self.lo().widen_mul(rhs.lo()), false), + } + } +} + +impl UMulo for u32 {} +impl UMulo for u64 {} +impl UMulo for u128 {} + +macro_rules! impl_signed_mulo { + ($fn:ident, $iD:ident, $uD:ident) => { + fn $fn(lhs: $iD, rhs: $iD) -> ($iD, bool) { + let mut lhs = lhs; + let mut rhs = rhs; + // the test against `mul_neg` below fails without this early return + if lhs == 0 || rhs == 0 { + return (0, false); + } + + let lhs_neg = lhs < 0; + let rhs_neg = rhs < 0; + if lhs_neg { + lhs = lhs.wrapping_neg(); + } + if rhs_neg { + rhs = rhs.wrapping_neg(); + } + let mul_neg = lhs_neg != rhs_neg; + + let (mul, o) = (lhs as $uD).mulo(rhs as $uD); + let mut mul = mul as $iD; + + if mul_neg { + mul = mul.wrapping_neg(); + } + if (mul < 0) != mul_neg { + // this one check happens to catch all edge cases related to `$iD::MIN` + (mul, true) + } else { + (mul, o) + } + } + }; +} + +impl_signed_mulo!(i32_overflowing_mul, i32, u32); +impl_signed_mulo!(i64_overflowing_mul, i64, u64); +impl_signed_mulo!(i128_overflowing_mul, i128, u128); + +intrinsics! { + #[maybe_use_optimized_c_shim] + #[arm_aeabi_alias = __aeabi_lmul] + #[cfg(any(not(any(target_arch = "riscv32", target_arch = "riscv64")), target_feature = "m"))] + pub extern "C" fn __muldi3(a: u64, b: u64) -> u64 { + a.mul(b) + } + + pub extern "C" fn __multi3(a: i128, b: i128) -> i128 { + a.mul(b) + } + + pub extern "C" fn __mulosi4(a: i32, b: i32, oflow: &mut i32) -> i32 { + let (mul, o) = i32_overflowing_mul(a, b); + *oflow = o as i32; + mul + } + + pub extern "C" fn __mulodi4(a: i64, b: i64, oflow: &mut i32) -> i64 { + let (mul, o) = i64_overflowing_mul(a, b); + *oflow = o as i32; + mul + } + + #[unadjusted_on_win64] + pub extern "C" fn __muloti4(a: i128, b: i128, oflow: &mut i32) -> i128 { + let (mul, o) = i128_overflowing_mul(a, b); + *oflow = o as i32; + mul + } + + pub extern "C" fn __rust_i128_mulo(a: i128, b: i128, oflow: &mut i32) -> i128 { + let (mul, o) = i128_overflowing_mul(a, b); + *oflow = o.into(); + mul + } + + pub extern "C" fn __rust_u128_mulo(a: u128, b: u128, oflow: &mut i32) -> u128 { + let (mul, o) = a.mulo(b); + *oflow = o.into(); + mul + } +} diff --git a/library/compiler-builtins/compiler-builtins/src/int/sdiv.rs b/library/compiler-builtins/compiler-builtins/src/int/sdiv.rs new file mode 100644 index 0000000000000..6a9029de7f287 --- /dev/null +++ b/library/compiler-builtins/compiler-builtins/src/int/sdiv.rs @@ -0,0 +1,205 @@ +use crate::int::udiv::*; + +macro_rules! sdivmod { + ( + $unsigned_fn:ident, // name of the unsigned division function + $signed_fn:ident, // name of the signed division function + $uX:ident, // unsigned integer type for the inputs and outputs of `$unsigned_name` + $iX:ident, // signed integer type for the inputs and outputs of `$signed_name` + $($attr:tt),* // attributes + ) => { + intrinsics! { + $( + #[$attr] + )* + /// Returns `n / d` and sets `*rem = n % d` + pub extern "C" fn $signed_fn(a: $iX, b: $iX, rem: &mut $iX) -> $iX { + let a_neg = a < 0; + let b_neg = b < 0; + let mut a = a; + let mut b = b; + + if a_neg { + a = a.wrapping_neg(); + } + if b_neg { + b = b.wrapping_neg(); + } + + let mut r = *rem as $uX; + let t = $unsigned_fn(a as $uX, b as $uX, Some(&mut r)) as $iX; + let mut r = r as $iX; + + if a_neg { + r = r.wrapping_neg(); + } + *rem = r; + if a_neg != b_neg { + t.wrapping_neg() + } else { + t + } + } + } + } +} + +macro_rules! sdiv { + ( + $unsigned_fn:ident, // name of the unsigned division function + $signed_fn:ident, // name of the signed division function + $uX:ident, // unsigned integer type for the inputs and outputs of `$unsigned_name` + $iX:ident, // signed integer type for the inputs and outputs of `$signed_name` + $($attr:tt),* // attributes + ) => { + intrinsics! { + $( + #[$attr] + )* + /// Returns `n / d` + pub extern "C" fn $signed_fn(a: $iX, b: $iX) -> $iX { + let a_neg = a < 0; + let b_neg = b < 0; + let mut a = a; + let mut b = b; + if a_neg { + a = a.wrapping_neg(); + } + if b_neg { + b = b.wrapping_neg(); + } + let t = $unsigned_fn(a as $uX, b as $uX) as $iX; + if a_neg != b_neg { + t.wrapping_neg() + } else { + t + } + } + } + } +} + +macro_rules! smod { + ( + $unsigned_fn:ident, // name of the unsigned division function + $signed_fn:ident, // name of the signed division function + $uX:ident, // unsigned integer type for the inputs and outputs of `$unsigned_name` + $iX:ident, // signed integer type for the inputs and outputs of `$signed_name` + $($attr:tt),* // attributes + ) => { + intrinsics! { + $( + #[$attr] + )* + /// Returns `n % d` + pub extern "C" fn $signed_fn(a: $iX, b: $iX) -> $iX { + let a_neg = a < 0; + let b_neg = b < 0; + let mut a = a; + let mut b = b; + if a_neg { + a = a.wrapping_neg(); + } + if b_neg { + b = b.wrapping_neg(); + } + let r = $unsigned_fn(a as $uX, b as $uX) as $iX; + if a_neg { + r.wrapping_neg() + } else { + r + } + } + } + } +} + +#[cfg(not(target_arch = "avr"))] +sdivmod!( + __udivmodsi4, + __divmodsi4, + u32, + i32, + maybe_use_optimized_c_shim +); + +#[cfg(target_arch = "avr")] +intrinsics! { + /// Returns `a / b` and `a % b` packed together. + /// + /// Ideally we'd use `-> (u32, u32)` or some kind of a packed struct, but + /// both force a stack allocation, while our result has to be in R18:R26. + pub extern "C" fn __divmodsi4(a: i32, b: i32) -> u64 { + let a_neg = a < 0; + let b_neg = b < 0; + let mut a = a; + let mut b = b; + + if a_neg { + a = a.wrapping_neg(); + } + if b_neg { + b = b.wrapping_neg(); + } + + let tr = __udivmodsi4(a as u32, b as u32); + let mut t = tr as u32 as i32; + let mut r = (tr >> 32) as u32 as i32; + + if a_neg { + r = r.wrapping_neg(); + } + if a_neg != b_neg { + t = t.wrapping_neg(); + } + + ((r as u32 as u64) << 32) | (t as u32 as u64) + } +} + +// The `#[arm_aeabi_alias = __aeabi_idiv]` attribute cannot be made to work with `intrinsics!` in macros +intrinsics! { + #[maybe_use_optimized_c_shim] + #[arm_aeabi_alias = __aeabi_idiv] + /// Returns `n / d` + pub extern "C" fn __divsi3(a: i32, b: i32) -> i32 { + let a_neg = a < 0; + let b_neg = b < 0; + let mut a = a; + let mut b = b; + if a_neg { + a = a.wrapping_neg(); + } + if b_neg { + b = b.wrapping_neg(); + } + let t = __udivsi3(a as u32, b as u32) as i32; + if a_neg != b_neg { + t.wrapping_neg() + } else { + t + } + } +} +smod!(__umodsi3, __modsi3, u32, i32, maybe_use_optimized_c_shim); + +sdivmod!( + __udivmoddi4, + __divmoddi4, + u64, + i64, + maybe_use_optimized_c_shim +); +sdiv!(__udivdi3, __divdi3, u64, i64, maybe_use_optimized_c_shim); +smod!(__umoddi3, __moddi3, u64, i64, maybe_use_optimized_c_shim); + +// LLVM does not currently have a `__divmodti4` function, but GCC does +sdivmod!( + __udivmodti4, + __divmodti4, + u128, + i128, + maybe_use_optimized_c_shim +); +sdiv!(__udivti3, __divti3, u128, i128,); +smod!(__umodti3, __modti3, u128, i128,); diff --git a/library/compiler-builtins/compiler-builtins/src/int/shift.rs b/library/compiler-builtins/compiler-builtins/src/int/shift.rs new file mode 100644 index 0000000000000..a85c1b33d6714 --- /dev/null +++ b/library/compiler-builtins/compiler-builtins/src/int/shift.rs @@ -0,0 +1,116 @@ +use crate::int::{DInt, HInt, Int, MinInt}; + +trait Ashl: DInt { + /// Returns `a << b`, requires `b < Self::BITS` + fn ashl(self, shl: u32) -> Self { + let n_h = Self::H::BITS; + if shl & n_h != 0 { + // we only need `self.lo()` because `self.hi()` will be shifted out entirely + self.lo().wrapping_shl(shl - n_h).widen_hi() + } else if shl == 0 { + self + } else { + Self::from_lo_hi( + self.lo().wrapping_shl(shl), + self.lo().logical_shr(n_h.wrapping_sub(shl)) | self.hi().wrapping_shl(shl), + ) + } + } +} + +impl Ashl for u32 {} +impl Ashl for u64 {} +impl Ashl for u128 {} + +trait Ashr: DInt { + /// Returns arithmetic `a >> b`, requires `b < Self::BITS` + fn ashr(self, shr: u32) -> Self { + let n_h = Self::H::BITS; + if shr & n_h != 0 { + Self::from_lo_hi( + self.hi().wrapping_shr(shr - n_h), + // smear the sign bit + self.hi().wrapping_shr(n_h - 1), + ) + } else if shr == 0 { + self + } else { + Self::from_lo_hi( + self.lo().logical_shr(shr) | self.hi().wrapping_shl(n_h.wrapping_sub(shr)), + self.hi().wrapping_shr(shr), + ) + } + } +} + +impl Ashr for i32 {} +impl Ashr for i64 {} +impl Ashr for i128 {} + +trait Lshr: DInt { + /// Returns logical `a >> b`, requires `b < Self::BITS` + fn lshr(self, shr: u32) -> Self { + let n_h = Self::H::BITS; + if shr & n_h != 0 { + self.hi().logical_shr(shr - n_h).zero_widen() + } else if shr == 0 { + self + } else { + Self::from_lo_hi( + self.lo().logical_shr(shr) | self.hi().wrapping_shl(n_h.wrapping_sub(shr)), + self.hi().logical_shr(shr), + ) + } + } +} + +impl Lshr for u32 {} +impl Lshr for u64 {} +impl Lshr for u128 {} + +intrinsics! { + #[maybe_use_optimized_c_shim] + pub extern "C" fn __ashlsi3(a: u32, b: u32) -> u32 { + a.ashl(b) + } + + #[maybe_use_optimized_c_shim] + #[arm_aeabi_alias = __aeabi_llsl] + pub extern "C" fn __ashldi3(a: u64, b: core::ffi::c_uint) -> u64 { + a.ashl(b as u32) + } + + pub extern "C" fn __ashlti3(a: u128, b: u32) -> u128 { + a.ashl(b) + } + + #[maybe_use_optimized_c_shim] + pub extern "C" fn __ashrsi3(a: i32, b: u32) -> i32 { + a.ashr(b) + } + + #[maybe_use_optimized_c_shim] + #[arm_aeabi_alias = __aeabi_lasr] + pub extern "C" fn __ashrdi3(a: i64, b: core::ffi::c_uint) -> i64 { + a.ashr(b as u32) + } + + pub extern "C" fn __ashrti3(a: i128, b: u32) -> i128 { + a.ashr(b) + } + + #[maybe_use_optimized_c_shim] + pub extern "C" fn __lshrsi3(a: u32, b: u32) -> u32 { + a.lshr(b) + } + + #[maybe_use_optimized_c_shim] + #[arm_aeabi_alias = __aeabi_llsr] + pub extern "C" fn __lshrdi3(a: u64, b: core::ffi::c_uint) -> u64 { + a.lshr(b as u32) + } + + pub extern "C" fn __lshrti3(a: u128, b: u32) -> u128 { + a.lshr(b) + } +} diff --git a/library/compiler-builtins/compiler-builtins/src/int/specialized_div_rem/asymmetric.rs b/library/compiler-builtins/compiler-builtins/src/int/specialized_div_rem/asymmetric.rs new file mode 100644 index 0000000000000..56ce188a37378 --- /dev/null +++ b/library/compiler-builtins/compiler-builtins/src/int/specialized_div_rem/asymmetric.rs @@ -0,0 +1,69 @@ +/// Creates an unsigned division function optimized for dividing integers with the same +/// bitwidth as the largest operand in an asymmetrically sized division. For example, x86-64 has an +/// assembly instruction that can divide a 128 bit integer by a 64 bit integer if the quotient fits +/// in 64 bits. The 128 bit version of this algorithm would use that fast hardware division to +/// construct a full 128 bit by 128 bit division. +#[allow(unused_macros)] +macro_rules! impl_asymmetric { + ( + $fn:ident, // name of the unsigned division function + $zero_div_fn:ident, // function called when division by zero is attempted + $half_division:ident, // function for division of a $uX by a $uX + $asymmetric_division:ident, // function for division of a $uD by a $uX + $n_h:expr, // the number of bits in a $iH or $uH + $uH:ident, // unsigned integer with half the bit width of $uX + $uX:ident, // unsigned integer with half the bit width of $uD + $uD:ident // unsigned integer type for the inputs and outputs of `$fn` + ) => { + /// Computes the quotient and remainder of `duo` divided by `div` and returns them as a + /// tuple. + pub fn $fn(duo: $uD, div: $uD) -> ($uD, $uD) { + let n: u32 = $n_h * 2; + + let duo_lo = duo as $uX; + let duo_hi = (duo >> n) as $uX; + let div_lo = div as $uX; + let div_hi = (div >> n) as $uX; + if div_hi == 0 { + if div_lo == 0 { + $zero_div_fn() + } + if duo_hi < div_lo { + // `$uD` by `$uX` division with a quotient that will fit into a `$uX` + let (quo, rem) = unsafe { $asymmetric_division(duo, div_lo) }; + return (quo as $uD, rem as $uD); + } else { + // Short division using the $uD by $uX division + let (quo_hi, rem_hi) = $half_division(duo_hi, div_lo); + let tmp = unsafe { + $asymmetric_division((duo_lo as $uD) | ((rem_hi as $uD) << n), div_lo) + }; + return ((tmp.0 as $uD) | ((quo_hi as $uD) << n), tmp.1 as $uD); + } + } + + // This has been adapted from + // https://www.codeproject.com/tips/785014/uint-division-modulus which was in turn + // adapted from Hacker's Delight. This is similar to the two possibility algorithm + // in that it uses only more significant parts of `duo` and `div` to divide a large + // integer with a smaller division instruction. + let div_lz = div_hi.leading_zeros(); + let div_extra = n - div_lz; + let div_sig_n = (div >> div_extra) as $uX; + let tmp = unsafe { $asymmetric_division(duo >> 1, div_sig_n) }; + + let mut quo = tmp.0 >> ((n - 1) - div_lz); + if quo != 0 { + quo -= 1; + } + + // Note that this is a full `$uD` multiplication being used here + let mut rem = duo - (quo as $uD).wrapping_mul(div); + if div <= rem { + quo += 1; + rem -= div; + } + return (quo as $uD, rem); + } + }; +} diff --git a/library/compiler-builtins/compiler-builtins/src/int/specialized_div_rem/binary_long.rs b/library/compiler-builtins/compiler-builtins/src/int/specialized_div_rem/binary_long.rs new file mode 100644 index 0000000000000..2c61a45e06e0a --- /dev/null +++ b/library/compiler-builtins/compiler-builtins/src/int/specialized_div_rem/binary_long.rs @@ -0,0 +1,552 @@ +/// Creates an unsigned division function that uses binary long division, designed for +/// computer architectures without division instructions. These functions have good performance for +/// microarchitectures with large branch miss penalties and architectures without the ability to +/// predicate instructions. For architectures with predicated instructions, one of the algorithms +/// described in the documentation of these functions probably has higher performance, and a custom +/// assembly routine should be used instead. +#[allow(unused_macros)] +macro_rules! impl_binary_long { + ( + $fn:ident, // name of the unsigned division function + $zero_div_fn:ident, // function called when division by zero is attempted + $normalization_shift:ident, // function for finding the normalization shift + $n:tt, // the number of bits in a $iX or $uX + $uX:ident, // unsigned integer type for the inputs and outputs of `$fn` + $iX:ident // signed integer type with same bitwidth as `$uX` + $(, $fun_attr:meta)* // attributes for the function + ) => { + /// Computes the quotient and remainder of `duo` divided by `div` and returns them as a + /// tuple. + $( + #[$fun_attr] + )* + pub fn $fn(duo: $uX, div: $uX) -> ($uX, $uX) { + let mut duo = duo; + // handle edge cases before calling `$normalization_shift` + if div == 0 { + $zero_div_fn() + } + if duo < div { + return (0, duo); + } + + // There are many variations of binary division algorithm that could be used. This + // documentation gives a tour of different methods so that future readers wanting to + // optimize further do not have to painstakingly derive them. The SWAR variation is + // especially hard to understand without reading the less convoluted methods first. + + // You may notice that a `duo < div_original` check is included in many these + // algorithms. A critical optimization that many algorithms miss is handling of + // quotients that will turn out to have many trailing zeros or many leading zeros. This + // happens in cases of exact or close-to-exact divisions, divisions by power of two, and + // in cases where the quotient is small. The `duo < div_original` check handles these + // cases of early returns and ends up replacing other kinds of mundane checks that + // normally terminate a binary division algorithm. + // + // Something you may see in other algorithms that is not special-cased here is checks + // for division by powers of two. The `duo < div_original` check handles this case and + // more, however it can be checked up front before the bisection using the + // `((div > 0) && ((div & (div - 1)) == 0))` trick. This is not special-cased because + // compilers should handle most cases where divisions by power of two occur, and we do + // not want to add on a few cycles for every division operation just to save a few + // cycles rarely. + + // The following example is the most straightforward translation from the way binary + // long division is typically visualized: + // Dividing 178u8 (0b10110010) by 6u8 (0b110). `div` is shifted left by 5, according to + // the result from `$normalization_shift(duo, div, false)`. + // + // Step 0: `sub` is negative, so there is not full normalization, so no `quo` bit is set + // and `duo` is kept unchanged. + // duo:10110010, div_shifted:11000000, sub:11110010, quo:00000000, shl:5 + // + // Step 1: `sub` is positive, set a `quo` bit and update `duo` for next step. + // duo:10110010, div_shifted:01100000, sub:01010010, quo:00010000, shl:4 + // + // Step 2: Continue based on `sub`. The `quo` bits start accumulating. + // duo:01010010, div_shifted:00110000, sub:00100010, quo:00011000, shl:3 + // duo:00100010, div_shifted:00011000, sub:00001010, quo:00011100, shl:2 + // duo:00001010, div_shifted:00001100, sub:11111110, quo:00011100, shl:1 + // duo:00001010, div_shifted:00000110, sub:00000100, quo:00011100, shl:0 + // The `duo < div_original` check terminates the algorithm with the correct quotient of + // 29u8 and remainder of 4u8 + /* + let div_original = div; + let mut shl = $normalization_shift(duo, div, false); + let mut quo = 0; + loop { + let div_shifted = div << shl; + let sub = duo.wrapping_sub(div_shifted); + // it is recommended to use `println!`s like this if functionality is unclear + /* + println!("duo:{:08b}, div_shifted:{:08b}, sub:{:08b}, quo:{:08b}, shl:{}", + duo, + div_shifted, + sub, + quo, + shl + ); + */ + if 0 <= (sub as $iX) { + duo = sub; + quo += 1 << shl; + if duo < div_original { + // this branch is optional + return (quo, duo) + } + } + if shl == 0 { + return (quo, duo) + } + shl -= 1; + } + */ + + // This restoring binary long division algorithm reduces the number of operations + // overall via: + // - `pow` can be shifted right instead of recalculating from `shl` + // - starting `div` shifted left and shifting it right for each step instead of + // recalculating from `shl` + // - The `duo < div_original` branch is used to terminate the algorithm instead of the + // `shl == 0` branch. This check is strong enough to prevent set bits of `pow` and + // `div` from being shifted off the end. This check also only occurs on half of steps + // on average, since it is behind the `(sub as $iX) >= 0` branch. + // - `shl` is now not needed by any aspect of of the loop and thus only 3 variables are + // being updated between steps + // + // There are many variations of this algorithm, but this encompases the largest number + // of architectures and does not rely on carry flags, add-with-carry, or SWAR + // complications to be decently fast. + /* + let div_original = div; + let shl = $normalization_shift(duo, div, false); + let mut div: $uX = div << shl; + let mut pow: $uX = 1 << shl; + let mut quo: $uX = 0; + loop { + let sub = duo.wrapping_sub(div); + if 0 <= (sub as $iX) { + duo = sub; + quo |= pow; + if duo < div_original { + return (quo, duo) + } + } + div >>= 1; + pow >>= 1; + } + */ + + // If the architecture has flags and predicated arithmetic instructions, it is possible + // to do binary long division without branching and in only 3 or 4 instructions. This is + // a variation of a 3 instruction central loop from + // http://www.chiark.greenend.org.uk/~theom/riscos/docs/ultimate/a252div.txt. + // + // What allows doing division in only 3 instructions is realizing that instead of + // keeping `duo` in place and shifting `div` right to align bits, `div` can be kept in + // place and `duo` can be shifted left. This means `div` does not have to be updated, + // but causes edge case problems and makes `duo < div_original` tests harder. Some + // architectures have an option to shift an argument in an arithmetic operation, which + // means `duo` can be shifted left and subtracted from in one instruction. The other two + // instructions are updating `quo` and undoing the subtraction if it turns out things + // were not normalized. + + /* + // Perform one binary long division step on the already normalized arguments, because + // the main. Note that this does a full normalization since the central loop needs + // `duo.leading_zeros()` to be at least 1 more than `div.leading_zeros()`. The original + // variation only did normalization to the nearest 4 steps, but this makes handling edge + // cases much harder. We do a full normalization and perform a binary long division + // step. In the edge case where the msbs of `duo` and `div` are set, it clears the msb + // of `duo`, then the edge case handler shifts `div` right and does another long + // division step to always insure `duo.leading_zeros() + 1 >= div.leading_zeros()`. + let div_original = div; + let mut shl = $normalization_shift(duo, div, true); + let mut div: $uX = (div << shl); + let mut quo: $uX = 1; + duo = duo.wrapping_sub(div); + if duo < div_original { + return (1 << shl, duo); + } + let div_neg: $uX; + if (div as $iX) < 0 { + // A very ugly edge case where the most significant bit of `div` is set (after + // shifting to match `duo` when its most significant bit is at the sign bit), which + // leads to the sign bit of `div_neg` being cut off and carries not happening when + // they should. This branch performs a long division step that keeps `duo` in place + // and shifts `div` down. + div >>= 1; + div_neg = div.wrapping_neg(); + let (sub, carry) = duo.overflowing_add(div_neg); + duo = sub; + quo = quo.wrapping_add(quo).wrapping_add(carry as $uX); + if !carry { + duo = duo.wrapping_add(div); + } + shl -= 1; + } else { + div_neg = div.wrapping_neg(); + } + // The add-with-carry that updates `quo` needs to have the carry set when a normalized + // subtract happens. Using `duo.wrapping_shl(1).overflowing_sub(div)` to do the + // subtraction generates a carry when an unnormalized subtract happens, which is the + // opposite of what we want. Instead, we use + // `duo.wrapping_shl(1).overflowing_add(div_neg)`, where `div_neg` is negative `div`. + let mut i = shl; + loop { + if i == 0 { + break; + } + i -= 1; + // `ADDS duo, div, duo, LSL #1` + // (add `div` to `duo << 1` and set flags) + let (sub, carry) = duo.wrapping_shl(1).overflowing_add(div_neg); + duo = sub; + // `ADC quo, quo, quo` + // (add with carry). Effectively shifts `quo` left by 1 and sets the least + // significant bit to the carry. + quo = quo.wrapping_add(quo).wrapping_add(carry as $uX); + // `ADDCC duo, duo, div` + // (add if carry clear). Undoes the subtraction if no carry was generated. + if !carry { + duo = duo.wrapping_add(div); + } + } + return (quo, duo >> shl); + */ + + // This is the SWAR (SIMD within in a register) restoring division algorithm. + // This combines several ideas of the above algorithms: + // - If `duo` is shifted left instead of shifting `div` right like in the 3 instruction + // restoring division algorithm, some architectures can do the shifting and + // subtraction step in one instruction. + // - `quo` can be constructed by adding powers-of-two to it or shifting it left by one + // and adding one. + // - Every time `duo` is shifted left, there is another unused 0 bit shifted into the + // LSB, so what if we use those bits to store `quo`? + // Through a complex setup, it is possible to manage `duo` and `quo` in the same + // register, and perform one step with 2 or 3 instructions. The only major downsides are + // that there is significant setup (it is only saves instructions if `shl` is + // approximately more than 4), `duo < div_original` checks are impractical once SWAR is + // initiated, and the number of division steps taken has to be exact (we cannot do more + // division steps than `shl`, because it introduces edge cases where quotient bits in + // `duo` start to collide with the real part of `div`. + /* + // first step. The quotient bit is stored in `quo` for now + let div_original = div; + let mut shl = $normalization_shift(duo, div, true); + let mut div: $uX = (div << shl); + duo = duo.wrapping_sub(div); + let mut quo: $uX = 1 << shl; + if duo < div_original { + return (quo, duo); + } + + let mask: $uX; + if (div as $iX) < 0 { + // deal with same edge case as the 3 instruction restoring division algorithm, but + // the quotient bit from this step also has to be stored in `quo` + div >>= 1; + shl -= 1; + let tmp = 1 << shl; + mask = tmp - 1; + let sub = duo.wrapping_sub(div); + if (sub as $iX) >= 0 { + // restore + duo = sub; + quo |= tmp; + } + if duo < div_original { + return (quo, duo); + } + } else { + mask = quo - 1; + } + // There is now room for quotient bits in `duo`. + + // Note that `div` is already shifted left and has `shl` unset bits. We subtract 1 from + // `div` and end up with the subset of `shl` bits being all being set. This subset acts + // just like a two's complement negative one. The subset of `div` containing the divisor + // had 1 subtracted from it, but a carry will always be generated from the `shl` subset + // as long as the quotient stays positive. + // + // When the modified `div` is subtracted from `duo.wrapping_shl(1)`, the `shl` subset + // adds a quotient bit to the least significant bit. + // For example, 89 (0b01011001) divided by 3 (0b11): + // + // shl:4, div:0b00110000 + // first step: + // duo:0b01011001 + // + div_neg:0b11010000 + // ____________________ + // 0b00101001 + // quo is set to 0b00010000 and mask is set to 0b00001111 for later + // + // 1 is subtracted from `div`. I will differentiate the `shl` part of `div` and the + // quotient part of `duo` with `^`s. + // chars. + // div:0b00110000 + // ^^^^ + // + 0b11111111 + // ________________ + // 0b00101111 + // ^^^^ + // div_neg:0b11010001 + // + // first SWAR step: + // duo_shl1:0b01010010 + // ^ + // + div_neg:0b11010001 + // ____________________ + // 0b00100011 + // ^ + // second: + // duo_shl1:0b01000110 + // ^^ + // + div_neg:0b11010001 + // ____________________ + // 0b00010111 + // ^^ + // third: + // duo_shl1:0b00101110 + // ^^^ + // + div_neg:0b11010001 + // ____________________ + // 0b11111111 + // ^^^ + // 3 steps resulted in the quotient with 3 set bits as expected, but currently the real + // part of `duo` is negative and the third step was an unnormalized step. The restore + // branch then restores `duo`. Note that the restore branch does not shift `duo` left. + // + // duo:0b11111111 + // ^^^ + // + div:0b00101111 + // ^^^^ + // ________________ + // 0b00101110 + // ^^^ + // `duo` is now back in the `duo_shl1` state it was at in the the third step, with an + // unset quotient bit. + // + // final step (`shl` was 4, so exactly 4 steps must be taken) + // duo_shl1:0b01011100 + // ^^^^ + // + div_neg:0b11010001 + // ____________________ + // 0b00101101 + // ^^^^ + // The quotient includes the `^` bits added with the `quo` bits from the beginning that + // contained the first step and potential edge case step, + // `quo:0b00010000 + (duo:0b00101101 & mask:0b00001111) == 0b00011101 == 29u8`. + // The remainder is the bits remaining in `duo` that are not part of the quotient bits, + // `duo:0b00101101 >> shl == 0b0010 == 2u8`. + let div: $uX = div.wrapping_sub(1); + let mut i = shl; + loop { + if i == 0 { + break; + } + i -= 1; + duo = duo.wrapping_shl(1).wrapping_sub(div); + if (duo as $iX) < 0 { + // restore + duo = duo.wrapping_add(div); + } + } + // unpack the results of SWAR + return ((duo & mask) | quo, duo >> shl); + */ + + // The problem with the conditional restoring SWAR algorithm above is that, in practice, + // it requires assembly code to bring out its full unrolled potential (It seems that + // LLVM can't use unrolled conditionals optimally and ends up erasing all the benefit + // that my algorithm intends. On architectures without predicated instructions, the code + // gen is especially bad. We need a default software division algorithm that is + // guaranteed to get decent code gen for the central loop. + + // For non-SWAR algorithms, there is a way to do binary long division without + // predication or even branching. This involves creating a mask from the sign bit and + // performing different kinds of steps using that. + /* + let shl = $normalization_shift(duo, div, true); + let mut div: $uX = div << shl; + let mut pow: $uX = 1 << shl; + let mut quo: $uX = 0; + loop { + let sub = duo.wrapping_sub(div); + let sign_mask = !((sub as $iX).wrapping_shr($n - 1) as $uX); + duo -= div & sign_mask; + quo |= pow & sign_mask; + div >>= 1; + pow >>= 1; + if pow == 0 { + break; + } + } + return (quo, duo); + */ + // However, it requires about 4 extra operations (smearing the sign bit, negating the + // mask, and applying the mask twice) on top of the operations done by the actual + // algorithm. With SWAR however, just 2 extra operations are needed, making it + // practical and even the most optimal algorithm for some architectures. + + // What we do is use custom assembly for predicated architectures that need software + // division, and for the default algorithm use a mask based restoring SWAR algorithm + // without conditionals or branches. On almost all architectures, this Rust code is + // guaranteed to compile down to 5 assembly instructions or less for each step, and LLVM + // will unroll it in a decent way. + + // standard opening for SWAR algorithm with first step and edge case handling + let div_original = div; + let mut shl = $normalization_shift(duo, div, true); + let mut div: $uX = (div << shl); + duo = duo.wrapping_sub(div); + let mut quo: $uX = 1 << shl; + if duo < div_original { + return (quo, duo); + } + let mask: $uX; + if (div as $iX) < 0 { + div >>= 1; + shl -= 1; + let tmp = 1 << shl; + mask = tmp - 1; + let sub = duo.wrapping_sub(div); + if (sub as $iX) >= 0 { + duo = sub; + quo |= tmp; + } + if duo < div_original { + return (quo, duo); + } + } else { + mask = quo - 1; + } + + // central loop + div = div.wrapping_sub(1); + let mut i = shl; + loop { + if i == 0 { + break; + } + i -= 1; + // shift left 1 and subtract + duo = duo.wrapping_shl(1).wrapping_sub(div); + // create mask + let mask = (duo as $iX).wrapping_shr($n - 1) as $uX; + // restore + duo = duo.wrapping_add(div & mask); + } + // unpack + return ((duo & mask) | quo, duo >> shl); + + // miscellanious binary long division algorithms that might be better for specific + // architectures + + // Another kind of long division uses an interesting fact that `div` and `pow` can be + // negated when `duo` is negative to perform a "negated" division step that works in + // place of any normalization mechanism. This is a non-restoring division algorithm that + // is very similar to the non-restoring division algorithms that can be found on the + // internet, except there is only one test for `duo < 0`. The subtraction from `quo` can + // be viewed as shifting the least significant set bit right (e.x. if we enter a series + // of negated binary long division steps starting with `quo == 0b1011_0000` and + // `pow == 0b0000_1000`, `quo` will progress like this: 0b1010_1000, 0b1010_0100, + // 0b1010_0010, 0b1010_0001). + /* + let div_original = div; + let shl = $normalization_shift(duo, div, true); + let mut div: $uX = (div << shl); + let mut pow: $uX = 1 << shl; + let mut quo: $uX = pow; + duo = duo.wrapping_sub(div); + if duo < div_original { + return (quo, duo); + } + div >>= 1; + pow >>= 1; + loop { + if (duo as $iX) < 0 { + // Negated binary long division step. + duo = duo.wrapping_add(div); + quo = quo.wrapping_sub(pow); + } else { + // Normal long division step. + if duo < div_original { + return (quo, duo) + } + duo = duo.wrapping_sub(div); + quo = quo.wrapping_add(pow); + } + pow >>= 1; + div >>= 1; + } + */ + + // This is the Nonrestoring SWAR algorithm, combining the nonrestoring algorithm with + // SWAR techniques that makes the only difference between steps be negation of `div`. + // If there was an architecture with an instruction that negated inputs to an adder + // based on conditionals, and in place shifting (or a three input addition operation + // that can have `duo` as two of the inputs to effectively shift it left by 1), then a + // single instruction central loop is possible. Microarchitectures often have inputs to + // their ALU that can invert the arguments and carry in of adders, but the architectures + // unfortunately do not have an instruction to dynamically invert this input based on + // conditionals. + /* + // SWAR opening + let div_original = div; + let mut shl = $normalization_shift(duo, div, true); + let mut div: $uX = (div << shl); + duo = duo.wrapping_sub(div); + let mut quo: $uX = 1 << shl; + if duo < div_original { + return (quo, duo); + } + let mask: $uX; + if (div as $iX) < 0 { + div >>= 1; + shl -= 1; + let tmp = 1 << shl; + let sub = duo.wrapping_sub(div); + if (sub as $iX) >= 0 { + // restore + duo = sub; + quo |= tmp; + } + if duo < div_original { + return (quo, duo); + } + mask = tmp - 1; + } else { + mask = quo - 1; + } + + // central loop + let div: $uX = div.wrapping_sub(1); + let mut i = shl; + loop { + if i == 0 { + break; + } + i -= 1; + // note: the `wrapping_shl(1)` can be factored out, but would require another + // restoring division step to prevent `(duo as $iX)` from overflowing + if (duo as $iX) < 0 { + // Negated binary long division step. + duo = duo.wrapping_shl(1).wrapping_add(div); + } else { + // Normal long division step. + duo = duo.wrapping_shl(1).wrapping_sub(div); + } + } + if (duo as $iX) < 0 { + // Restore. This was not needed in the original nonrestoring algorithm because of + // the `duo < div_original` checks. + duo = duo.wrapping_add(div); + } + // unpack + return ((duo & mask) | quo, duo >> shl); + */ + } + }; +} diff --git a/library/compiler-builtins/compiler-builtins/src/int/specialized_div_rem/delegate.rs b/library/compiler-builtins/compiler-builtins/src/int/specialized_div_rem/delegate.rs new file mode 100644 index 0000000000000..f5c6e50239a35 --- /dev/null +++ b/library/compiler-builtins/compiler-builtins/src/int/specialized_div_rem/delegate.rs @@ -0,0 +1,317 @@ +/// Creates an unsigned division function that uses a combination of hardware division and +/// binary long division to divide integers larger than what hardware division by itself can do. This +/// function is intended for microarchitectures that have division hardware, but not fast enough +/// multiplication hardware for `impl_trifecta` to be faster. +#[allow(unused_macros)] +macro_rules! impl_delegate { + ( + $fn:ident, // name of the unsigned division function + $zero_div_fn:ident, // function called when division by zero is attempted + $half_normalization_shift:ident, // function for finding the normalization shift of $uX + $half_division:ident, // function for division of a $uX by a $uX + $n_h:expr, // the number of bits in $iH or $uH + $uH:ident, // unsigned integer with half the bit width of $uX + $uX:ident, // unsigned integer with half the bit width of $uD. + $uD:ident, // unsigned integer type for the inputs and outputs of `$fn` + $iD:ident // signed integer type with the same bitwidth as `$uD` + ) => { + /// Computes the quotient and remainder of `duo` divided by `div` and returns them as a + /// tuple. + pub fn $fn(duo: $uD, div: $uD) -> ($uD, $uD) { + // The two possibility algorithm, undersubtracting long division algorithm, or any kind + // of reciprocal based algorithm will not be fastest, because they involve large + // multiplications that we assume to not be fast enough relative to the divisions to + // outweigh setup times. + + // the number of bits in a $uX + let n = $n_h * 2; + + let duo_lo = duo as $uX; + let duo_hi = (duo >> n) as $uX; + let div_lo = div as $uX; + let div_hi = (div >> n) as $uX; + + match (div_lo == 0, div_hi == 0, duo_hi == 0) { + (true, true, _) => $zero_div_fn(), + (_, false, true) => { + // `duo` < `div` + return (0, duo); + } + (false, true, true) => { + // delegate to smaller division + let tmp = $half_division(duo_lo, div_lo); + return (tmp.0 as $uD, tmp.1 as $uD); + } + (false, true, false) => { + if duo_hi < div_lo { + // `quo_hi` will always be 0. This performs a binary long division algorithm + // to zero `duo_hi` followed by a half division. + + // We can calculate the normalization shift using only `$uX` size functions. + // If we calculated the normalization shift using + // `$half_normalization_shift(duo_hi, div_lo false)`, it would break the + // assumption the function has that the first argument is more than the + // second argument. If the arguments are switched, the assumption holds true + // since `duo_hi < div_lo`. + let norm_shift = $half_normalization_shift(div_lo, duo_hi, false); + let shl = if norm_shift == 0 { + // Consider what happens if the msbs of `duo_hi` and `div_lo` align with + // no shifting. The normalization shift will always return + // `norm_shift == 0` regardless of whether it is fully normalized, + // because `duo_hi < div_lo`. In that edge case, `n - norm_shift` would + // result in shift overflow down the line. For the edge case, because + // both `duo_hi < div_lo` and we are comparing all the significant bits + // of `duo_hi` and `div`, we can make `shl = n - 1`. + n - 1 + } else { + // We also cannot just use `shl = n - norm_shift - 1` in the general + // case, because when we are not in the edge case comparing all the + // significant bits, then the full `duo < div` may not be true and thus + // breaks the division algorithm. + n - norm_shift + }; + + // The 3 variable restoring division algorithm (see binary_long.rs) is ideal + // for this task, since `pow` and `quo` can be `$uX` and the delegation + // check is simple. + let mut div: $uD = div << shl; + let mut pow_lo: $uX = 1 << shl; + let mut quo_lo: $uX = 0; + let mut duo = duo; + loop { + let sub = duo.wrapping_sub(div); + if 0 <= (sub as $iD) { + duo = sub; + quo_lo |= pow_lo; + let duo_hi = (duo >> n) as $uX; + if duo_hi == 0 { + // Delegate to get the rest of the quotient. Note that the + // `div_lo` here is the original unshifted `div`. + let tmp = $half_division(duo as $uX, div_lo); + return ((quo_lo | tmp.0) as $uD, tmp.1 as $uD); + } + } + div >>= 1; + pow_lo >>= 1; + } + } else if duo_hi == div_lo { + // `quo_hi == 1`. This branch is cheap and helps with edge cases. + let tmp = $half_division(duo as $uX, div as $uX); + return ((1 << n) | (tmp.0 as $uD), tmp.1 as $uD); + } else { + // `div_lo < duo_hi` + // `rem_hi == 0` + if (div_lo >> $n_h) == 0 { + // Short division of $uD by a $uH, using $uX by $uX division + let div_0 = div_lo as $uH as $uX; + let (quo_hi, rem_3) = $half_division(duo_hi, div_0); + + let duo_mid = ((duo >> $n_h) as $uH as $uX) | (rem_3 << $n_h); + let (quo_1, rem_2) = $half_division(duo_mid, div_0); + + let duo_lo = (duo as $uH as $uX) | (rem_2 << $n_h); + let (quo_0, rem_1) = $half_division(duo_lo, div_0); + + return ( + (quo_0 as $uD) | ((quo_1 as $uD) << $n_h) | ((quo_hi as $uD) << n), + rem_1 as $uD, + ); + } + + // This is basically a short division composed of a half division for the hi + // part, specialized 3 variable binary long division in the middle, and + // another half division for the lo part. + let duo_lo = duo as $uX; + let tmp = $half_division(duo_hi, div_lo); + let quo_hi = tmp.0; + let mut duo = (duo_lo as $uD) | ((tmp.1 as $uD) << n); + // This check is required to avoid breaking the long division below. + if duo < div { + return ((quo_hi as $uD) << n, duo); + } + + // The half division handled all shift alignments down to `n`, so this + // division can continue with a shift of `n - 1`. + let mut div: $uD = div << (n - 1); + let mut pow_lo: $uX = 1 << (n - 1); + let mut quo_lo: $uX = 0; + loop { + let sub = duo.wrapping_sub(div); + if 0 <= (sub as $iD) { + duo = sub; + quo_lo |= pow_lo; + let duo_hi = (duo >> n) as $uX; + if duo_hi == 0 { + // Delegate to get the rest of the quotient. Note that the + // `div_lo` here is the original unshifted `div`. + let tmp = $half_division(duo as $uX, div_lo); + return ( + (tmp.0) as $uD | (quo_lo as $uD) | ((quo_hi as $uD) << n), + tmp.1 as $uD, + ); + } + } + div >>= 1; + pow_lo >>= 1; + } + } + } + (_, false, false) => { + // Full $uD by $uD binary long division. `quo_hi` will always be 0. + if duo < div { + return (0, duo); + } + let div_original = div; + let shl = $half_normalization_shift(duo_hi, div_hi, false); + let mut duo = duo; + let mut div: $uD = div << shl; + let mut pow_lo: $uX = 1 << shl; + let mut quo_lo: $uX = 0; + loop { + let sub = duo.wrapping_sub(div); + if 0 <= (sub as $iD) { + duo = sub; + quo_lo |= pow_lo; + if duo < div_original { + return (quo_lo as $uD, duo); + } + } + div >>= 1; + pow_lo >>= 1; + } + } + } + } + }; +} + +/// Returns `n / d` and sets `*rem = n % d`. +/// +/// This specialization exists because: +/// - The LLVM backend for 32-bit SPARC cannot compile functions that return `(u128, u128)`, +/// so we have to use an old fashioned `&mut u128` argument to return the remainder. +/// - 64-bit SPARC does not have u64 * u64 => u128 widening multiplication, which makes the +/// delegate algorithm strategy the only reasonably fast way to perform `u128` division. +// used on SPARC +#[allow(dead_code)] +pub fn u128_divide_sparc(duo: u128, div: u128, rem: &mut u128) -> u128 { + use super::*; + let duo_lo = duo as u64; + let duo_hi = (duo >> 64) as u64; + let div_lo = div as u64; + let div_hi = (div >> 64) as u64; + + match (div_lo == 0, div_hi == 0, duo_hi == 0) { + (true, true, _) => zero_div_fn(), + (_, false, true) => { + *rem = duo; + return 0; + } + (false, true, true) => { + let tmp = u64_by_u64_div_rem(duo_lo, div_lo); + *rem = tmp.1 as u128; + return tmp.0 as u128; + } + (false, true, false) => { + if duo_hi < div_lo { + let norm_shift = u64_normalization_shift(div_lo, duo_hi, false); + let shl = if norm_shift == 0 { + 64 - 1 + } else { + 64 - norm_shift + }; + + let mut div: u128 = div << shl; + let mut pow_lo: u64 = 1 << shl; + let mut quo_lo: u64 = 0; + let mut duo = duo; + loop { + let sub = duo.wrapping_sub(div); + if 0 <= (sub as i128) { + duo = sub; + quo_lo |= pow_lo; + let duo_hi = (duo >> 64) as u64; + if duo_hi == 0 { + let tmp = u64_by_u64_div_rem(duo as u64, div_lo); + *rem = tmp.1 as u128; + return (quo_lo | tmp.0) as u128; + } + } + div >>= 1; + pow_lo >>= 1; + } + } else if duo_hi == div_lo { + let tmp = u64_by_u64_div_rem(duo as u64, div as u64); + *rem = tmp.1 as u128; + return (1 << 64) | (tmp.0 as u128); + } else { + if (div_lo >> 32) == 0 { + let div_0 = div_lo as u32 as u64; + let (quo_hi, rem_3) = u64_by_u64_div_rem(duo_hi, div_0); + + let duo_mid = ((duo >> 32) as u32 as u64) | (rem_3 << 32); + let (quo_1, rem_2) = u64_by_u64_div_rem(duo_mid, div_0); + + let duo_lo = (duo as u32 as u64) | (rem_2 << 32); + let (quo_0, rem_1) = u64_by_u64_div_rem(duo_lo, div_0); + + *rem = rem_1 as u128; + return (quo_0 as u128) | ((quo_1 as u128) << 32) | ((quo_hi as u128) << 64); + } + + let duo_lo = duo as u64; + let tmp = u64_by_u64_div_rem(duo_hi, div_lo); + let quo_hi = tmp.0; + let mut duo = (duo_lo as u128) | ((tmp.1 as u128) << 64); + if duo < div { + *rem = duo; + return (quo_hi as u128) << 64; + } + + let mut div: u128 = div << (64 - 1); + let mut pow_lo: u64 = 1 << (64 - 1); + let mut quo_lo: u64 = 0; + loop { + let sub = duo.wrapping_sub(div); + if 0 <= (sub as i128) { + duo = sub; + quo_lo |= pow_lo; + let duo_hi = (duo >> 64) as u64; + if duo_hi == 0 { + let tmp = u64_by_u64_div_rem(duo as u64, div_lo); + *rem = tmp.1 as u128; + return (tmp.0) as u128 | (quo_lo as u128) | ((quo_hi as u128) << 64); + } + } + div >>= 1; + pow_lo >>= 1; + } + } + } + (_, false, false) => { + if duo < div { + *rem = duo; + return 0; + } + let div_original = div; + let shl = u64_normalization_shift(duo_hi, div_hi, false); + let mut duo = duo; + let mut div: u128 = div << shl; + let mut pow_lo: u64 = 1 << shl; + let mut quo_lo: u64 = 0; + loop { + let sub = duo.wrapping_sub(div); + if 0 <= (sub as i128) { + duo = sub; + quo_lo |= pow_lo; + if duo < div_original { + *rem = duo; + return quo_lo as u128; + } + } + div >>= 1; + pow_lo >>= 1; + } + } + } +} diff --git a/library/compiler-builtins/compiler-builtins/src/int/specialized_div_rem/mod.rs b/library/compiler-builtins/compiler-builtins/src/int/specialized_div_rem/mod.rs new file mode 100644 index 0000000000000..43f466e75ba4a --- /dev/null +++ b/library/compiler-builtins/compiler-builtins/src/int/specialized_div_rem/mod.rs @@ -0,0 +1,320 @@ +// TODO: when `unsafe_block_in_unsafe_fn` is stabilized, remove this +#![allow(unused_unsafe)] +// The functions are complex with many branches, and explicit +// `return`s makes it clear where function exit points are +#![allow(clippy::needless_return)] +#![allow(clippy::comparison_chain)] +// Clippy is confused by the complex configuration +#![allow(clippy::if_same_then_else)] +#![allow(clippy::needless_bool)] + +//! This `specialized_div_rem` module is originally from version 1.0.0 of the +//! `specialized-div-rem` crate. Note that `for` loops with ranges are not used in this +//! module, since unoptimized compilation may generate references to `memcpy`. +//! +//! The purpose of these macros is to easily change the both the division algorithm used +//! for a given integer size and the half division used by that algorithm. The way +//! functions call each other is also constructed such that linkers will find the chain of +//! software and hardware divisions needed for every size of signed and unsigned division. +//! For example, most target compilations do the following: +//! +//! - Many 128 bit division functions like `u128::wrapping_div` use +//! `std::intrinsics::unchecked_div`, which gets replaced by `__udivti3` because there +//! is not a 128 bit by 128 bit hardware division function in most architectures. +//! `__udivti3` uses `u128_div_rem` (this extra level of function calls exists because +//! `__umodti3` and `__udivmodti4` also exist, and `specialized_div_rem` supplies just +//! one function to calculate both the quotient and remainder. If configuration flags +//! enable it, `impl_trifecta!` defines `u128_div_rem` to use the trifecta algorithm, +//! which requires the half sized division `u64_by_u64_div_rem`. If the architecture +//! supplies a 64 bit hardware division instruction, `u64_by_u64_div_rem` will be +//! reduced to those instructions. Note that we do not specify the half size division +//! directly to be `__udivdi3`, because hardware division would never be introduced. +//! - If the architecture does not supply a 64 bit hardware division instruction, u64 +//! divisions will use functions such as `__udivdi3`. This will call `u64_div_rem` +//! which is defined by `impl_delegate!`. The half division for this algorithm is +//! `u32_by_u32_div_rem` which in turn becomes hardware division instructions or more +//! software division algorithms. +//! - If the architecture does not supply a 32 bit hardware instruction, linkers will +//! look for `__udivsi3`. `impl_binary_long!` is used, but this algorithm uses no half +//! division, so the chain of calls ends here. +//! +//! On some architectures like x86_64, an asymmetrically sized division is supplied, in +//! which 128 bit numbers can be divided by 64 bit numbers. `impl_asymmetric!` is used to +//! extend the 128 by 64 bit division to a full 128 by 128 bit division. + +// `allow(dead_code)` is used in various places, because the configuration code would otherwise be +// ridiculously complex + +#[macro_use] +mod norm_shift; + +#[macro_use] +mod binary_long; + +#[macro_use] +mod delegate; + +// used on SPARC +#[allow(unused_imports)] +#[cfg(not(feature = "unstable-public-internals"))] +pub(crate) use self::delegate::u128_divide_sparc; +#[cfg(feature = "unstable-public-internals")] +pub use self::delegate::u128_divide_sparc; + +#[macro_use] +mod trifecta; + +#[macro_use] +mod asymmetric; + +/// The behavior of all divisions by zero is controlled by this function. This function should be +/// impossible to reach by Rust users, unless `compiler-builtins` public division functions or +/// `core/std::unchecked_div/rem` are directly used without a zero check in front. +fn zero_div_fn() -> ! { + // Calling the intrinsic directly, to avoid the `assert_unsafe_precondition` that cannot be used + // here because it involves non-`inline` functions + // (https://github.com/rust-lang/compiler-builtins/issues/491). + unsafe { core::intrinsics::unreachable() } +} + +const USE_LZ: bool = { + if cfg!(target_arch = "arm") { + if cfg!(target_feature = "thumb-mode") { + // ARM thumb targets have CLZ instructions if the instruction set of ARMv6T2 is + // supported. This is needed to successfully differentiate between targets like + // `thumbv8.base` and `thumbv8.main`. + cfg!(target_feature = "v6t2") + } else { + // Regular ARM targets have CLZ instructions if the ARMv5TE instruction set is + // supported. Technically, ARMv5T was the first to have CLZ, but the "v5t" target + // feature does not seem to work. + cfg!(target_feature = "v5te") + } + } else if cfg!(any(target_arch = "sparc", target_arch = "sparc64")) { + // LZD or LZCNT on SPARC only exists for the VIS 3 extension and later. + cfg!(target_feature = "vis3") + } else if cfg!(any(target_arch = "riscv32", target_arch = "riscv64")) { + // The 'Zbb' Basic Bit-Manipulation extension on RISC-V + // determines if a CLZ assembly instruction exists + cfg!(target_feature = "zbb") + } else { + // All other common targets Rust supports should have CLZ instructions + true + } +}; + +impl_normalization_shift!( + u32_normalization_shift, + USE_LZ, + 32, + u32, + i32, + allow(dead_code) +); +impl_normalization_shift!( + u64_normalization_shift, + USE_LZ, + 64, + u64, + i64, + allow(dead_code) +); + +/// Divides `duo` by `div` and returns a tuple of the quotient and the remainder. +/// `checked_div` and `checked_rem` are used to avoid bringing in panic function +/// dependencies. +#[inline] +fn u64_by_u64_div_rem(duo: u64, div: u64) -> (u64, u64) { + if let Some(quo) = duo.checked_div(div) { + if let Some(rem) = duo.checked_rem(div) { + return (quo, rem); + } + } + zero_div_fn() +} + +// Whether `trifecta` or `delegate` is faster for 128 bit division depends on the speed at which a +// microarchitecture can multiply and divide. We decide to be optimistic and assume `trifecta` is +// faster if the target pointer width is at least 64. Note that this +// implementation is additionally included on WebAssembly despite the typical +// pointer width there being 32 because it's typically run on a 64-bit machine +// that has access to faster 64-bit operations. +#[cfg(all( + any( + target_family = "wasm", + not(any(target_pointer_width = "16", target_pointer_width = "32")), + ), + not(all(not(feature = "no-asm"), target_arch = "x86_64")), + not(any(target_arch = "sparc", target_arch = "sparc64")) +))] +impl_trifecta!( + u128_div_rem, + zero_div_fn, + u64_by_u64_div_rem, + 32, + u32, + u64, + u128 +); + +// If the pointer width less than 64 and this isn't wasm, then the target +// architecture almost certainly does not have the fast 64 to 128 bit widening +// multiplication needed for `trifecta` to be faster. +#[cfg(all( + not(any( + target_family = "wasm", + not(any(target_pointer_width = "16", target_pointer_width = "32")), + )), + not(all(not(feature = "no-asm"), target_arch = "x86_64")), + not(any(target_arch = "sparc", target_arch = "sparc64")) +))] +impl_delegate!( + u128_div_rem, + zero_div_fn, + u64_normalization_shift, + u64_by_u64_div_rem, + 32, + u32, + u64, + u128, + i128 +); + +/// Divides `duo` by `div` and returns a tuple of the quotient and the remainder. +/// +/// # Safety +/// +/// If the quotient does not fit in a `u64`, a floating point exception occurs. +/// If `div == 0`, then a division by zero exception occurs. +#[cfg(all(not(feature = "no-asm"), target_arch = "x86_64"))] +#[inline] +unsafe fn u128_by_u64_div_rem(duo: u128, div: u64) -> (u64, u64) { + let duo_lo = duo as u64; + let duo_hi = (duo >> 64) as u64; + let quo: u64; + let rem: u64; + unsafe { + // divides the combined registers rdx:rax (`duo` is split into two 64 bit parts to do this) + // by `div`. The quotient is stored in rax and the remainder in rdx. + // FIXME: Use the Intel syntax once we drop LLVM 9 support on rust-lang/rust. + core::arch::asm!( + "div {0}", + in(reg) div, + inlateout("rax") duo_lo => quo, + inlateout("rdx") duo_hi => rem, + options(att_syntax, pure, nomem, nostack) + ); + } + (quo, rem) +} + +// use `asymmetric` instead of `trifecta` on x86_64 +#[cfg(all(not(feature = "no-asm"), target_arch = "x86_64"))] +impl_asymmetric!( + u128_div_rem, + zero_div_fn, + u64_by_u64_div_rem, + u128_by_u64_div_rem, + 32, + u32, + u64, + u128 +); + +/// Divides `duo` by `div` and returns a tuple of the quotient and the remainder. +/// `checked_div` and `checked_rem` are used to avoid bringing in panic function +/// dependencies. +#[inline] +#[allow(dead_code)] +fn u32_by_u32_div_rem(duo: u32, div: u32) -> (u32, u32) { + if let Some(quo) = duo.checked_div(div) { + if let Some(rem) = duo.checked_rem(div) { + return (quo, rem); + } + } + zero_div_fn() +} + +// When not on x86 and the pointer width is not 64, use `delegate` since the division size is larger +// than register size. +#[cfg(all( + not(all(not(feature = "no-asm"), target_arch = "x86")), + not(target_pointer_width = "64") +))] +impl_delegate!( + u64_div_rem, + zero_div_fn, + u32_normalization_shift, + u32_by_u32_div_rem, + 16, + u16, + u32, + u64, + i64 +); + +// When not on x86 and the pointer width is 64, use `binary_long`. +#[cfg(all( + not(all(not(feature = "no-asm"), target_arch = "x86")), + target_pointer_width = "64" +))] +impl_binary_long!( + u64_div_rem, + zero_div_fn, + u64_normalization_shift, + 64, + u64, + i64 +); + +/// Divides `duo` by `div` and returns a tuple of the quotient and the remainder. +/// +/// # Safety +/// +/// If the quotient does not fit in a `u32`, a floating point exception occurs. +/// If `div == 0`, then a division by zero exception occurs. +#[cfg(all(not(feature = "no-asm"), target_arch = "x86"))] +#[inline] +unsafe fn u64_by_u32_div_rem(duo: u64, div: u32) -> (u32, u32) { + let duo_lo = duo as u32; + let duo_hi = (duo >> 32) as u32; + let quo: u32; + let rem: u32; + unsafe { + // divides the combined registers rdx:rax (`duo` is split into two 32 bit parts to do this) + // by `div`. The quotient is stored in rax and the remainder in rdx. + // FIXME: Use the Intel syntax once we drop LLVM 9 support on rust-lang/rust. + core::arch::asm!( + "div {0}", + in(reg) div, + inlateout("rax") duo_lo => quo, + inlateout("rdx") duo_hi => rem, + options(att_syntax, pure, nomem, nostack) + ); + } + (quo, rem) +} + +// use `asymmetric` instead of `delegate` on x86 +#[cfg(all(not(feature = "no-asm"), target_arch = "x86"))] +impl_asymmetric!( + u64_div_rem, + zero_div_fn, + u32_by_u32_div_rem, + u64_by_u32_div_rem, + 16, + u16, + u32, + u64 +); + +// 32 bits is the smallest division used by `compiler-builtins`, so we end with binary long division +impl_binary_long!( + u32_div_rem, + zero_div_fn, + u32_normalization_shift, + 32, + u32, + i32, + allow(dead_code) +); diff --git a/library/compiler-builtins/compiler-builtins/src/int/specialized_div_rem/norm_shift.rs b/library/compiler-builtins/compiler-builtins/src/int/specialized_div_rem/norm_shift.rs new file mode 100644 index 0000000000000..61b67b6bc3dc1 --- /dev/null +++ b/library/compiler-builtins/compiler-builtins/src/int/specialized_div_rem/norm_shift.rs @@ -0,0 +1,106 @@ +/// Creates a function used by some division algorithms to compute the "normalization shift". +#[allow(unused_macros)] +macro_rules! impl_normalization_shift { + ( + $name:ident, // name of the normalization shift function + // boolean for if `$uX::leading_zeros` should be used (if an architecture does not have a + // hardware instruction for `usize::leading_zeros`, then this should be `true`) + $use_lz:ident, + $n:tt, // the number of bits in a $iX or $uX + $uX:ident, // unsigned integer type for the inputs of `$name` + $iX:ident, // signed integer type for the inputs of `$name` + $($unsigned_attr:meta),* // attributes for the function + ) => { + /// Finds the shift left that the divisor `div` would need to be normalized for a binary + /// long division step with the dividend `duo`. NOTE: This function assumes that these edge + /// cases have been handled before reaching it: + /// ` + /// if div == 0 { + /// panic!("attempt to divide by zero") + /// } + /// if duo < div { + /// return (0, duo) + /// } + /// ` + /// + /// Normalization is defined as (where `shl` is the output of this function): + /// ` + /// if duo.leading_zeros() != (div << shl).leading_zeros() { + /// // If the most significant bits of `duo` and `div << shl` are not in the same place, + /// // then `div << shl` has one more leading zero than `duo`. + /// assert_eq!(duo.leading_zeros() + 1, (div << shl).leading_zeros()); + /// // Also, `2*(div << shl)` is not more than `duo` (otherwise the first division step + /// // would not be able to clear the msb of `duo`) + /// assert!(duo < (div << (shl + 1))); + /// } + /// if full_normalization { + /// // Some algorithms do not need "full" normalization, which means that `duo` is + /// // larger than `div << shl` when the most significant bits are aligned. + /// assert!((div << shl) <= duo); + /// } + /// ` + /// + /// Note: If the software bisection algorithm is being used in this function, it happens + /// that full normalization always occurs, so be careful that new algorithms are not + /// invisibly depending on this invariant when `full_normalization` is set to `false`. + $( + #[$unsigned_attr] + )* + fn $name(duo: $uX, div: $uX, full_normalization: bool) -> usize { + // We have to find the leading zeros of `div` to know where its msb (most significant + // set bit) is to even begin binary long division. It is also good to know where the msb + // of `duo` is so that useful work can be started instead of shifting `div` for all + // possible quotients (many division steps are wasted if `duo.leading_zeros()` is large + // and `div` starts out being shifted all the way to the msb). Aligning the msbs of + // `div` and `duo` could be done by shifting `div` left by + // `div.leading_zeros() - duo.leading_zeros()`, but some CPUs without division hardware + // also do not have single instructions for calculating `leading_zeros`. Instead of + // software doing two bisections to find the two `leading_zeros`, we do one bisection to + // find `div.leading_zeros() - duo.leading_zeros()` without actually knowing either of + // the leading zeros values. + + let mut shl: usize; + if $use_lz { + shl = (div.leading_zeros() - duo.leading_zeros()) as usize; + if full_normalization { + if duo < (div << shl) { + // when the msb of `duo` and `div` are aligned, the resulting `div` may be + // larger than `duo`, so we decrease the shift by 1. + shl -= 1; + } + } + } else { + let mut test = duo; + shl = 0usize; + let mut lvl = $n >> 1; + loop { + let tmp = test >> lvl; + // It happens that a final `duo < (div << shl)` check is not needed, because the + // `div <= tmp` check insures that the msb of `test` never passes the msb of + // `div`, and any set bits shifted off the end of `test` would still keep + // `div <= tmp` true. + if div <= tmp { + test = tmp; + shl += lvl; + } + // narrow down bisection + lvl >>= 1; + if lvl == 0 { + break + } + } + } + // tests the invariants that should hold before beginning binary long division + /* + if full_normalization { + assert!((div << shl) <= duo); + } + if duo.leading_zeros() != (div << shl).leading_zeros() { + assert_eq!(duo.leading_zeros() + 1, (div << shl).leading_zeros()); + assert!(duo < (div << (shl + 1))); + } + */ + shl + } + } +} diff --git a/library/compiler-builtins/compiler-builtins/src/int/specialized_div_rem/trifecta.rs b/library/compiler-builtins/compiler-builtins/src/int/specialized_div_rem/trifecta.rs new file mode 100644 index 0000000000000..7e104053b8b9e --- /dev/null +++ b/library/compiler-builtins/compiler-builtins/src/int/specialized_div_rem/trifecta.rs @@ -0,0 +1,386 @@ +/// Creates an unsigned division function optimized for division of integers with bitwidths +/// larger than the largest hardware integer division supported. These functions use large radix +/// division algorithms that require both fast division and very fast widening multiplication on the +/// target microarchitecture. Otherwise, `impl_delegate` should be used instead. +#[allow(unused_macros)] +macro_rules! impl_trifecta { + ( + $fn:ident, // name of the unsigned division function + $zero_div_fn:ident, // function called when division by zero is attempted + $half_division:ident, // function for division of a $uX by a $uX + $n_h:expr, // the number of bits in $iH or $uH + $uH:ident, // unsigned integer with half the bit width of $uX + $uX:ident, // unsigned integer with half the bit width of $uD + $uD:ident // unsigned integer type for the inputs and outputs of `$unsigned_name` + ) => { + /// Computes the quotient and remainder of `duo` divided by `div` and returns them as a + /// tuple. + pub fn $fn(duo: $uD, div: $uD) -> ($uD, $uD) { + // This is called the trifecta algorithm because it uses three main algorithms: short + // division for small divisors, the two possibility algorithm for large divisors, and an + // undersubtracting long division algorithm for intermediate cases. + + // This replicates `carrying_mul` (rust-lang rfc #2417). LLVM correctly optimizes this + // to use a widening multiply to 128 bits on the relevant architectures. + fn carrying_mul(lhs: $uX, rhs: $uX) -> ($uX, $uX) { + let tmp = (lhs as $uD).wrapping_mul(rhs as $uD); + (tmp as $uX, (tmp >> ($n_h * 2)) as $uX) + } + fn carrying_mul_add(lhs: $uX, mul: $uX, add: $uX) -> ($uX, $uX) { + let tmp = (lhs as $uD) + .wrapping_mul(mul as $uD) + .wrapping_add(add as $uD); + (tmp as $uX, (tmp >> ($n_h * 2)) as $uX) + } + + // the number of bits in a $uX + let n = $n_h * 2; + + if div == 0 { + $zero_div_fn() + } + + // Trying to use a normalization shift function will cause inelegancies in the code and + // inefficiencies for architectures with a native count leading zeros instruction. The + // undersubtracting algorithm needs both values (keeping the original `div_lz` but + // updating `duo_lz` multiple times), so we assume hardware support for fast + // `leading_zeros` calculation. + let div_lz = div.leading_zeros(); + let mut duo_lz = duo.leading_zeros(); + + // the possible ranges of `duo` and `div` at this point: + // `0 <= duo < 2^n_d` + // `1 <= div < 2^n_d` + + // quotient is 0 or 1 branch + if div_lz <= duo_lz { + // The quotient cannot be more than 1. The highest set bit of `duo` needs to be at + // least one place higher than `div` for the quotient to be more than 1. + if duo >= div { + return (1, duo - div); + } else { + return (0, duo); + } + } + + // `_sb` is the number of significant bits (from the ones place to the highest set bit) + // `{2, 2^div_sb} <= duo < 2^n_d` + // `1 <= div < {2^duo_sb, 2^(n_d - 1)}` + // smaller division branch + if duo_lz >= n { + // `duo < 2^n` so it will fit in a $uX. `div` will also fit in a $uX (because of the + // `div_lz <= duo_lz` branch) so no numerical error. + let (quo, rem) = $half_division(duo as $uX, div as $uX); + return (quo as $uD, rem as $uD); + } + + // `{2^n, 2^div_sb} <= duo < 2^n_d` + // `1 <= div < {2^duo_sb, 2^(n_d - 1)}` + // short division branch + if div_lz >= (n + $n_h) { + // `1 <= div < {2^duo_sb, 2^n_h}` + + // It is barely possible to improve the performance of this by calculating the + // reciprocal and removing one `$half_division`, but only if the CPU can do fast + // multiplications in parallel. Other reciprocal based methods can remove two + // `$half_division`s, but have multiplications that cannot be done in parallel and + // reduce performance. I have decided to use this trivial short division method and + // rely on the CPU having quick divisions. + + let duo_hi = (duo >> n) as $uX; + let div_0 = div as $uH as $uX; + let (quo_hi, rem_3) = $half_division(duo_hi, div_0); + + let duo_mid = ((duo >> $n_h) as $uH as $uX) | (rem_3 << $n_h); + let (quo_1, rem_2) = $half_division(duo_mid, div_0); + + let duo_lo = (duo as $uH as $uX) | (rem_2 << $n_h); + let (quo_0, rem_1) = $half_division(duo_lo, div_0); + + return ( + (quo_0 as $uD) | ((quo_1 as $uD) << $n_h) | ((quo_hi as $uD) << n), + rem_1 as $uD, + ); + } + + // relative leading significant bits, cannot overflow because of above branches + let lz_diff = div_lz - duo_lz; + + // `{2^n, 2^div_sb} <= duo < 2^n_d` + // `2^n_h <= div < {2^duo_sb, 2^(n_d - 1)}` + // `mul` or `mul - 1` branch + if lz_diff < $n_h { + // Two possibility division algorithm + + // The most significant bits of `duo` and `div` are within `$n_h` bits of each + // other. If we take the `n` most significant bits of `duo` and divide them by the + // corresponding bits in `div`, it produces a quotient value `quo`. It happens that + // `quo` or `quo - 1` will always be the correct quotient for the whole number. In + // other words, the bits less significant than the `n` most significant bits of + // `duo` and `div` can only influence the quotient to be one of two values. + // Because there are only two possibilities, there only needs to be one `$uH` sized + // division, a `$uH` by `$uD` multiplication, and only one branch with a few simple + // operations. + // + // Proof that the true quotient can only be `quo` or `quo - 1`. + // All `/` operators here are floored divisions. + // + // `shift` is the number of bits not in the higher `n` significant bits of `duo`. + // (definitions) + // 0. shift = n - duo_lz + // 1. duo_sig_n == duo / 2^shift + // 2. div_sig_n == div / 2^shift + // 3. quo == duo_sig_n / div_sig_n + // + // + // We are trying to find the true quotient, `true_quo`. + // 4. true_quo = duo / div. (definition) + // + // This is true because of the bits that are cut off during the bit shift. + // 5. duo_sig_n * 2^shift <= duo < (duo_sig_n + 1) * 2^shift. + // 6. div_sig_n * 2^shift <= div < (div_sig_n + 1) * 2^shift. + // + // Dividing each bound of (5) by each bound of (6) gives 4 possibilities for what + // `true_quo == duo / div` is bounded by: + // (duo_sig_n * 2^shift) / (div_sig_n * 2^shift) + // (duo_sig_n * 2^shift) / ((div_sig_n + 1) * 2^shift) + // ((duo_sig_n + 1) * 2^shift) / (div_sig_n * 2^shift) + // ((duo_sig_n + 1) * 2^shift) / ((div_sig_n + 1) * 2^shift) + // + // Simplifying each of these four: + // duo_sig_n / div_sig_n + // duo_sig_n / (div_sig_n + 1) + // (duo_sig_n + 1) / div_sig_n + // (duo_sig_n + 1) / (div_sig_n + 1) + // + // Taking the smallest and the largest of these as the low and high bounds + // and replacing `duo / div` with `true_quo`: + // 7. duo_sig_n / (div_sig_n + 1) <= true_quo < (duo_sig_n + 1) / div_sig_n + // + // The `lz_diff < n_h` conditional on this branch makes sure that `div_sig_n` is at + // least `2^n_h`, and the `div_lz <= duo_lz` branch makes sure that the highest bit + // of `div_sig_n` is not the `2^(n - 1)` bit. + // 8. `2^(n - 1) <= duo_sig_n < 2^n` + // 9. `2^n_h <= div_sig_n < 2^(n - 1)` + // + // We want to prove that either + // `(duo_sig_n + 1) / div_sig_n == duo_sig_n / (div_sig_n + 1)` or that + // `(duo_sig_n + 1) / div_sig_n == duo_sig_n / (div_sig_n + 1) + 1`. + // + // We also want to prove that `quo` is one of these: + // `duo_sig_n / div_sig_n == duo_sig_n / (div_sig_n + 1)` or + // `duo_sig_n / div_sig_n == (duo_sig_n + 1) / div_sig_n`. + // + // When 1 is added to the numerator of `duo_sig_n / div_sig_n` to produce + // `(duo_sig_n + 1) / div_sig_n`, it is not possible that the value increases by + // more than 1 with floored integer arithmetic and `div_sig_n != 0`. Consider + // `x/y + 1 < (x + 1)/y` <=> `x/y + 1 < x/y + 1/y` <=> `1 < 1/y` <=> `y < 1`. + // `div_sig_n` is a nonzero integer. Thus, + // 10. `duo_sig_n / div_sig_n == (duo_sig_n + 1) / div_sig_n` or + // `(duo_sig_n / div_sig_n) + 1 == (duo_sig_n + 1) / div_sig_n. + // + // When 1 is added to the denominator of `duo_sig_n / div_sig_n` to produce + // `duo_sig_n / (div_sig_n + 1)`, it is not possible that the value decreases by + // more than 1 with the bounds (8) and (9). Consider `x/y - 1 <= x/(y + 1)` <=> + // `(x - y)/y < x/(y + 1)` <=> `(y + 1)*(x - y) < x*y` <=> `x*y - y*y + x - y < x*y` + // <=> `x < y*y + y`. The smallest value of `div_sig_n` is `2^n_h` and the largest + // value of `duo_sig_n` is `2^n - 1`. Substituting reveals `2^n - 1 < 2^n + 2^n_h`. + // Thus, + // 11. `duo_sig_n / div_sig_n == duo_sig_n / (div_sig_n + 1)` or + // `(duo_sig_n / div_sig_n) - 1` == duo_sig_n / (div_sig_n + 1)` + // + // Combining both (10) and (11), we know that + // `quo - 1 <= duo_sig_n / (div_sig_n + 1) <= true_quo + // < (duo_sig_n + 1) / div_sig_n <= quo + 1` and therefore: + // 12. quo - 1 <= true_quo < quo + 1 + // + // In a lot of division algorithms using smaller divisions to construct a larger + // division, we often encounter a situation where the approximate `quo` value + // calculated from a smaller division is multiple increments away from the true + // `quo` value. In those algorithms, multiple correction steps have to be applied. + // Those correction steps may need more multiplications to test `duo - (quo*div)` + // again. Because of the fact that our `quo` can only be one of two values, we can + // see if `duo - (quo*div)` overflows. If it did overflow, then we know that we have + // the larger of the two values (since the true quotient is unique, and any larger + // quotient will cause `duo - (quo*div)` to be negative). Also because there is only + // one correction needed, we can calculate the remainder `duo - (true_quo*div) == + // duo - ((quo - 1)*div) == duo - (quo*div - div) == duo + div - quo*div`. + // If `duo - (quo*div)` did not overflow, then we have the correct answer. + let shift = n - duo_lz; + let duo_sig_n = (duo >> shift) as $uX; + let div_sig_n = (div >> shift) as $uX; + let quo = $half_division(duo_sig_n, div_sig_n).0; + + // The larger `quo` value can overflow `$uD` in the right circumstances. This is a + // manual `carrying_mul_add` with overflow checking. + let div_lo = div as $uX; + let div_hi = (div >> n) as $uX; + let (tmp_lo, carry) = carrying_mul(quo, div_lo); + let (tmp_hi, overflow) = carrying_mul_add(quo, div_hi, carry); + let tmp = (tmp_lo as $uD) | ((tmp_hi as $uD) << n); + if (overflow != 0) || (duo < tmp) { + return ( + (quo - 1) as $uD, + // Both the addition and subtraction can overflow, but when combined end up + // as a correct positive number. + duo.wrapping_add(div).wrapping_sub(tmp), + ); + } else { + return (quo as $uD, duo - tmp); + } + } + + // Undersubtracting long division algorithm. + // Instead of clearing a minimum of 1 bit from `duo` per iteration via binary long + // division, `n_h - 1` bits are cleared per iteration with this algorithm. It is a more + // complicated version of regular long division. Most integer division algorithms tend + // to guess a part of the quotient, and may have a larger quotient than the true + // quotient (which when multiplied by `div` will "oversubtract" the original dividend). + // They then check if the quotient was in fact too large and then have to correct it. + // This long division algorithm has been carefully constructed to always underguess the + // quotient by slim margins. This allows different subalgorithms to be blindly jumped to + // without needing an extra correction step. + // + // The only problem is that this subalgorithm will not work for many ranges of `duo` and + // `div`. Fortunately, the short division, two possibility algorithm, and other simple + // cases happen to exactly fill these gaps. + // + // For an example, consider the division of 76543210 by 213 and assume that `n_h` is + // equal to two decimal digits (note: we are working with base 10 here for readability). + // The first `sig_n_h` part of the divisor (21) is taken and is incremented by 1 to + // prevent oversubtraction. We also record the number of extra places not a part of + // the `sig_n` or `sig_n_h` parts. + // + // sig_n_h == 2 digits, sig_n == 4 digits + // + // vvvv <- `duo_sig_n` + // 76543210 + // ^^^^ <- extra places in duo, `duo_extra == 4` + // + // vv <- `div_sig_n_h` + // 213 + // ^ <- extra places in div, `div_extra == 1` + // + // The difference in extra places, `duo_extra - div_extra == extra_shl == 3`, is used + // for shifting partial sums in the long division. + // + // In the first step, the first `sig_n` part of duo (7654) is divided by + // `div_sig_n_h_add_1` (22), which results in a partial quotient of 347. This is + // multiplied by the whole divisor to make 73911, which is shifted left by `extra_shl` + // and subtracted from duo. The partial quotient is also shifted left by `extra_shl` to + // be added to `quo`. + // + // 347 + // ________ + // |76543210 + // -73911 + // 2632210 + // + // Variables dependent on duo have to be updated: + // + // vvvv <- `duo_sig_n == 2632` + // 2632210 + // ^^^ <- `duo_extra == 3` + // + // `extra_shl == 2` + // + // Two more steps are taken after this and then duo fits into `n` bits, and then a final + // normal long division step is made. The partial quotients are all progressively added + // to each other in the actual algorithm, but here I have left them all in a tower that + // can be added together to produce the quotient, 359357. + // + // 14 + // 443 + // 119 + // 347 + // ________ + // |76543210 + // -73911 + // 2632210 + // -25347 + // 97510 + // -94359 + // 3151 + // -2982 + // 169 <- the remainder + + let mut duo = duo; + let mut quo: $uD = 0; + + // The number of lesser significant bits not a part of `div_sig_n_h` + let div_extra = (n + $n_h) - div_lz; + + // The most significant `n_h` bits of div + let div_sig_n_h = (div >> div_extra) as $uH; + + // This needs to be a `$uX` in case of overflow from the increment + let div_sig_n_h_add1 = (div_sig_n_h as $uX) + 1; + + // `{2^n, 2^(div_sb + n_h)} <= duo < 2^n_d` + // `2^n_h <= div < {2^(duo_sb - n_h), 2^n}` + loop { + // The number of lesser significant bits not a part of `duo_sig_n` + let duo_extra = n - duo_lz; + + // The most significant `n` bits of `duo` + let duo_sig_n = (duo >> duo_extra) as $uX; + + // the two possibility algorithm requires that the difference between msbs is less + // than `n_h`, so the comparison is `<=` here. + if div_extra <= duo_extra { + // Undersubtracting long division step + let quo_part = $half_division(duo_sig_n, div_sig_n_h_add1).0 as $uD; + let extra_shl = duo_extra - div_extra; + + // Addition to the quotient. + quo += (quo_part << extra_shl); + + // Subtraction from `duo`. At least `n_h - 1` bits are cleared from `duo` here. + duo -= (div.wrapping_mul(quo_part) << extra_shl); + } else { + // Two possibility algorithm + let shift = n - duo_lz; + let duo_sig_n = (duo >> shift) as $uX; + let div_sig_n = (div >> shift) as $uX; + let quo_part = $half_division(duo_sig_n, div_sig_n).0; + let div_lo = div as $uX; + let div_hi = (div >> n) as $uX; + + let (tmp_lo, carry) = carrying_mul(quo_part, div_lo); + // The undersubtracting long division algorithm has already run once, so + // overflow beyond `$uD` bits is not possible here + let (tmp_hi, _) = carrying_mul_add(quo_part, div_hi, carry); + let tmp = (tmp_lo as $uD) | ((tmp_hi as $uD) << n); + + if duo < tmp { + return ( + quo + ((quo_part - 1) as $uD), + duo.wrapping_add(div).wrapping_sub(tmp), + ); + } else { + return (quo + (quo_part as $uD), duo - tmp); + } + } + + duo_lz = duo.leading_zeros(); + + if div_lz <= duo_lz { + // quotient can have 0 or 1 added to it + if div <= duo { + return (quo + 1, duo - div); + } else { + return (quo, duo); + } + } + + // This can only happen if `div_sd < n` (because of previous "quo = 0 or 1" + // branches), but it is not worth it to unroll further. + if n <= duo_lz { + // simple division and addition + let tmp = $half_division(duo as $uX, div as $uX); + return (quo + (tmp.0 as $uD), tmp.1 as $uD); + } + } + } + }; +} diff --git a/library/compiler-builtins/compiler-builtins/src/int/trailing_zeros.rs b/library/compiler-builtins/compiler-builtins/src/int/trailing_zeros.rs new file mode 100644 index 0000000000000..c45d6b1cfe8d8 --- /dev/null +++ b/library/compiler-builtins/compiler-builtins/src/int/trailing_zeros.rs @@ -0,0 +1,69 @@ +#[cfg(feature = "unstable-public-internals")] +pub use implementation::trailing_zeros; +#[cfg(not(feature = "unstable-public-internals"))] +pub(crate) use implementation::trailing_zeros; + +mod implementation { + use crate::int::{CastInto, Int}; + + /// Returns number of trailing binary zeros in `x`. + #[allow(dead_code)] + pub fn trailing_zeros + CastInto + CastInto>(x: T) -> usize { + let mut x = x; + let mut r: u32 = 0; + let mut t: u32; + + const { assert!(T::BITS <= 64) }; + if T::BITS >= 64 { + r += ((CastInto::::cast(x) == 0) as u32) << 5; // if (x has no 32 small bits) t = 32 else 0 + x >>= r; // remove 32 zero bits + } + + if T::BITS >= 32 { + t = ((CastInto::::cast(x) == 0) as u32) << 4; // if (x has no 16 small bits) t = 16 else 0 + r += t; + x >>= t; // x = [0 - 0xFFFF] + higher garbage bits + } + + const { assert!(T::BITS >= 16) }; + t = ((CastInto::::cast(x) == 0) as u32) << 3; + x >>= t; // x = [0 - 0xFF] + higher garbage bits + r += t; + + let mut x: u8 = x.cast(); + + t = (((x & 0x0F) == 0) as u32) << 2; + x >>= t; // x = [0 - 0xF] + higher garbage bits + r += t; + + t = (((x & 0x3) == 0) as u32) << 1; + x >>= t; // x = [0 - 0x3] + higher garbage bits + r += t; + + x &= 3; + + r as usize + ((2 - (x >> 1) as usize) & (((x & 1) == 0) as usize).wrapping_neg()) + } +} + +intrinsics! { + /// Returns the number of trailing binary zeros in `x` (32 bit version). + pub extern "C" fn __ctzsi2(x: u32) -> usize { + trailing_zeros(x) + } + + /// Returns the number of trailing binary zeros in `x` (64 bit version). + pub extern "C" fn __ctzdi2(x: u64) -> usize { + trailing_zeros(x) + } + + /// Returns the number of trailing binary zeros in `x` (128 bit version). + pub extern "C" fn __ctzti2(x: u128) -> usize { + let lo = x as u64; + if lo == 0 { + 64 + __ctzdi2((x >> 64) as u64) + } else { + __ctzdi2(lo) + } + } +} diff --git a/library/compiler-builtins/compiler-builtins/src/int/traits.rs b/library/compiler-builtins/compiler-builtins/src/int/traits.rs new file mode 100644 index 0000000000000..152cb2eee2ee6 --- /dev/null +++ b/library/compiler-builtins/compiler-builtins/src/int/traits.rs @@ -0,0 +1,411 @@ +use core::ops; + +/// Minimal integer implementations needed on all integer types, including wide integers. +#[allow(dead_code)] +pub trait MinInt: + Copy + + core::fmt::Debug + + ops::BitOr + + ops::Not + + ops::Shl +{ + /// Type with the same width but other signedness + type OtherSign: MinInt; + /// Unsigned version of Self + type UnsignedInt: MinInt; + + /// If `Self` is a signed integer + const SIGNED: bool; + + /// The bitwidth of the int type + const BITS: u32; + + const ZERO: Self; + const ONE: Self; + const MIN: Self; + const MAX: Self; +} + +/// Trait for some basic operations on integers +#[allow(dead_code)] +pub trait Int: + MinInt + + PartialEq + + PartialOrd + + ops::AddAssign + + ops::SubAssign + + ops::BitAndAssign + + ops::BitOrAssign + + ops::BitXorAssign + + ops::ShlAssign + + ops::ShrAssign + + ops::Add + + ops::Sub + + ops::Mul + + ops::Div + + ops::Shr + + ops::BitXor + + ops::BitAnd +{ + /// LUT used for maximizing the space covered and minimizing the computational cost of fuzzing + /// in `builtins-test`. For example, Self = u128 produces [0,1,2,7,8,15,16,31,32,63,64,95,96, + /// 111,112,119,120,125,126,127]. + const FUZZ_LENGTHS: [u8; 20] = make_fuzz_lengths(::BITS); + + /// The number of entries of `FUZZ_LENGTHS` actually used. The maximum is 20 for u128. + const FUZZ_NUM: usize = { + let log2 = (::BITS - 1).count_ones() as usize; + if log2 == 3 { + // case for u8 + 6 + } else { + // 3 entries on each extreme, 2 in the middle, and 4 for each scale of intermediate + // boundaries. + 8 + (4 * (log2 - 4)) + } + }; + + fn unsigned(self) -> Self::UnsignedInt; + fn from_unsigned(unsigned: Self::UnsignedInt) -> Self; + fn unsigned_abs(self) -> Self::UnsignedInt; + + fn from_bool(b: bool) -> Self; + + /// Prevents the need for excessive conversions between signed and unsigned + fn logical_shr(self, other: u32) -> Self; + + /// Absolute difference between two integers. + fn abs_diff(self, other: Self) -> Self::UnsignedInt; + + // copied from primitive integers, but put in a trait + fn is_zero(self) -> bool; + fn wrapping_neg(self) -> Self; + fn wrapping_add(self, other: Self) -> Self; + fn wrapping_mul(self, other: Self) -> Self; + fn wrapping_sub(self, other: Self) -> Self; + fn wrapping_shl(self, other: u32) -> Self; + fn wrapping_shr(self, other: u32) -> Self; + fn rotate_left(self, other: u32) -> Self; + fn overflowing_add(self, other: Self) -> (Self, bool); + fn leading_zeros(self) -> u32; + fn ilog2(self) -> u32; +} + +pub(crate) const fn make_fuzz_lengths(bits: u32) -> [u8; 20] { + let mut v = [0u8; 20]; + v[0] = 0; + v[1] = 1; + v[2] = 2; // important for parity and the iX::MIN case when reversed + let mut i = 3; + + // No need for any more until the byte boundary, because there should be no algorithms + // that are sensitive to anything not next to byte boundaries after 2. We also scale + // in powers of two, which is important to prevent u128 corner tests from getting too + // big. + let mut l = 8; + loop { + if l >= ((bits / 2) as u8) { + break; + } + // get both sides of the byte boundary + v[i] = l - 1; + i += 1; + v[i] = l; + i += 1; + l *= 2; + } + + if bits != 8 { + // add the lower side of the middle boundary + v[i] = ((bits / 2) - 1) as u8; + i += 1; + } + + // We do not want to jump directly from the Self::BITS/2 boundary to the Self::BITS + // boundary because of algorithms that split the high part up. We reverse the scaling + // as we go to Self::BITS. + let mid = i; + let mut j = 1; + loop { + v[i] = (bits as u8) - (v[mid - j]) - 1; + if j == mid { + break; + } + i += 1; + j += 1; + } + v +} + +macro_rules! int_impl_common { + ($ty:ty) => { + fn from_bool(b: bool) -> Self { + b as $ty + } + + fn logical_shr(self, other: u32) -> Self { + Self::from_unsigned(self.unsigned().wrapping_shr(other)) + } + + fn is_zero(self) -> bool { + self == Self::ZERO + } + + fn wrapping_neg(self) -> Self { + ::wrapping_neg(self) + } + + fn wrapping_add(self, other: Self) -> Self { + ::wrapping_add(self, other) + } + + fn wrapping_mul(self, other: Self) -> Self { + ::wrapping_mul(self, other) + } + fn wrapping_sub(self, other: Self) -> Self { + ::wrapping_sub(self, other) + } + + fn wrapping_shl(self, other: u32) -> Self { + ::wrapping_shl(self, other) + } + + fn wrapping_shr(self, other: u32) -> Self { + ::wrapping_shr(self, other) + } + + fn rotate_left(self, other: u32) -> Self { + ::rotate_left(self, other) + } + + fn overflowing_add(self, other: Self) -> (Self, bool) { + ::overflowing_add(self, other) + } + + fn leading_zeros(self) -> u32 { + ::leading_zeros(self) + } + + fn ilog2(self) -> u32 { + ::ilog2(self) + } + }; +} + +macro_rules! int_impl { + ($ity:ty, $uty:ty) => { + impl MinInt for $uty { + type OtherSign = $ity; + type UnsignedInt = $uty; + + const BITS: u32 = ::ZERO.count_zeros(); + const SIGNED: bool = Self::MIN != Self::ZERO; + + const ZERO: Self = 0; + const ONE: Self = 1; + const MIN: Self = ::MIN; + const MAX: Self = ::MAX; + } + + impl Int for $uty { + fn unsigned(self) -> $uty { + self + } + + // It makes writing macros easier if this is implemented for both signed and unsigned + #[allow(clippy::wrong_self_convention)] + fn from_unsigned(me: $uty) -> Self { + me + } + + fn unsigned_abs(self) -> Self { + self + } + + fn abs_diff(self, other: Self) -> Self { + self.abs_diff(other) + } + + int_impl_common!($uty); + } + + impl MinInt for $ity { + type OtherSign = $uty; + type UnsignedInt = $uty; + + const BITS: u32 = ::ZERO.count_zeros(); + const SIGNED: bool = Self::MIN != Self::ZERO; + + const ZERO: Self = 0; + const ONE: Self = 1; + const MIN: Self = ::MIN; + const MAX: Self = ::MAX; + } + + impl Int for $ity { + fn unsigned(self) -> $uty { + self as $uty + } + + fn from_unsigned(me: $uty) -> Self { + me as $ity + } + + fn unsigned_abs(self) -> Self::UnsignedInt { + self.unsigned_abs() + } + + fn abs_diff(self, other: Self) -> $uty { + self.abs_diff(other) + } + + int_impl_common!($ity); + } + }; +} + +int_impl!(isize, usize); +int_impl!(i8, u8); +int_impl!(i16, u16); +int_impl!(i32, u32); +int_impl!(i64, u64); +int_impl!(i128, u128); + +/// Trait for integers twice the bit width of another integer. This is implemented for all +/// primitives except for `u8`, because there is not a smaller primitive. +pub trait DInt: MinInt { + /// Integer that is half the bit width of the integer this trait is implemented for + type H: HInt; + + /// Returns the low half of `self` + fn lo(self) -> Self::H; + /// Returns the high half of `self` + fn hi(self) -> Self::H; + /// Returns the low and high halves of `self` as a tuple + fn lo_hi(self) -> (Self::H, Self::H) { + (self.lo(), self.hi()) + } + /// Constructs an integer using lower and higher half parts + fn from_lo_hi(lo: Self::H, hi: Self::H) -> Self { + lo.zero_widen() | hi.widen_hi() + } +} + +/// Trait for integers half the bit width of another integer. This is implemented for all +/// primitives except for `u128`, because it there is not a larger primitive. +pub trait HInt: Int { + /// Integer that is double the bit width of the integer this trait is implemented for + type D: DInt + MinInt; + + // NB: some of the below methods could have default implementations (e.g. `widen_hi`), but for + // unknown reasons this can cause infinite recursion when optimizations are disabled. See + // for context. + + /// Widens (using default extension) the integer to have double bit width + fn widen(self) -> Self::D; + /// Widens (zero extension only) the integer to have double bit width. This is needed to get + /// around problems with associated type bounds (such as `Int`) being unstable + fn zero_widen(self) -> Self::D; + /// Widens the integer to have double bit width and shifts the integer into the higher bits + fn widen_hi(self) -> Self::D; + /// Widening multiplication with zero widening. This cannot overflow. + fn zero_widen_mul(self, rhs: Self) -> Self::D; + /// Widening multiplication. This cannot overflow. + fn widen_mul(self, rhs: Self) -> Self::D; +} + +macro_rules! impl_d_int { + ($($X:ident $D:ident),*) => { + $( + impl DInt for $D { + type H = $X; + + fn lo(self) -> Self::H { + self as $X + } + fn hi(self) -> Self::H { + (self >> <$X as MinInt>::BITS) as $X + } + } + )* + }; +} + +macro_rules! impl_h_int { + ($($H:ident $uH:ident $X:ident),*) => { + $( + impl HInt for $H { + type D = $X; + + fn widen(self) -> Self::D { + self as $X + } + fn zero_widen(self) -> Self::D { + (self as $uH) as $X + } + fn zero_widen_mul(self, rhs: Self) -> Self::D { + self.zero_widen().wrapping_mul(rhs.zero_widen()) + } + fn widen_mul(self, rhs: Self) -> Self::D { + self.widen().wrapping_mul(rhs.widen()) + } + fn widen_hi(self) -> Self::D { + (self as $X) << ::BITS + } + } + )* + }; +} + +impl_d_int!(u8 u16, u16 u32, u32 u64, u64 u128, i8 i16, i16 i32, i32 i64, i64 i128); +impl_h_int!( + u8 u8 u16, + u16 u16 u32, + u32 u32 u64, + u64 u64 u128, + i8 u8 i16, + i16 u16 i32, + i32 u32 i64, + i64 u64 i128 +); + +/// Trait to express (possibly lossy) casting of integers +pub trait CastInto: Copy { + fn cast(self) -> T; +} + +pub trait CastFrom: Copy { + fn cast_from(value: T) -> Self; +} + +impl + Copy> CastFrom for T { + fn cast_from(value: U) -> Self { + value.cast() + } +} + +macro_rules! cast_into { + ($ty:ty) => { + cast_into!($ty; usize, isize, u8, i8, u16, i16, u32, i32, u64, i64, u128, i128); + }; + ($ty:ty; $($into:ty),*) => {$( + impl CastInto<$into> for $ty { + fn cast(self) -> $into { + self as $into + } + } + )*}; +} + +cast_into!(usize); +cast_into!(isize); +cast_into!(u8); +cast_into!(i8); +cast_into!(u16); +cast_into!(i16); +cast_into!(u32); +cast_into!(i32); +cast_into!(u64); +cast_into!(i64); +cast_into!(u128); +cast_into!(i128); diff --git a/library/compiler-builtins/compiler-builtins/src/int/udiv.rs b/library/compiler-builtins/compiler-builtins/src/int/udiv.rs new file mode 100644 index 0000000000000..b9dee63c4cc7a --- /dev/null +++ b/library/compiler-builtins/compiler-builtins/src/int/udiv.rs @@ -0,0 +1,199 @@ +#[cfg(not(feature = "unstable-public-internals"))] +pub(crate) use crate::int::specialized_div_rem::*; +#[cfg(feature = "unstable-public-internals")] +pub use crate::int::specialized_div_rem::*; + +intrinsics! { + #[maybe_use_optimized_c_shim] + #[arm_aeabi_alias = __aeabi_uidiv] + /// Returns `n / d` + pub extern "C" fn __udivsi3(n: u32, d: u32) -> u32 { + u32_div_rem(n, d).0 + } + + #[maybe_use_optimized_c_shim] + /// Returns `n % d` + pub extern "C" fn __umodsi3(n: u32, d: u32) -> u32 { + u32_div_rem(n, d).1 + } +} + +#[cfg(not(target_arch = "avr"))] +intrinsics! { + #[maybe_use_optimized_c_shim] + /// Returns `n / d` and sets `*rem = n % d` + pub extern "C" fn __udivmodsi4(n: u32, d: u32, rem: Option<&mut u32>) -> u32 { + let quo_rem = u32_div_rem(n, d); + if let Some(rem) = rem { + *rem = quo_rem.1; + } + quo_rem.0 + } +} + +#[cfg(target_arch = "avr")] +intrinsics! { + /// Returns `n / d` and `n % d` packed together. + /// + /// Ideally we'd use `-> (u32, u32)` or some kind of a packed struct, but + /// both force a stack allocation, while our result has to be in R18:R26. + pub extern "C" fn __udivmodsi4(n: u32, d: u32) -> u64 { + let (div, rem) = u32_div_rem(n, d); + + ((rem as u64) << 32) | (div as u64) + } + + #[unsafe(naked)] + pub unsafe extern "C" fn __udivmodqi4() { + // compute unsigned 8-bit `n / d` and `n % d`. + // + // Note: GCC implements a [non-standard calling convention](https://gcc.gnu.org/wiki/avr-gcc#Exceptions_to_the_Calling_Convention) for this function. + // Inputs: + // R24: dividend + // R22: divisor + // Outputs: + // R24: quotient (dividend / divisor) + // R25: remainder (dividend % divisor) + // Clobbers: + // R23: loop counter + core::arch::naked_asm!( + // This assembly routine implements the [long division](https://en.wikipedia.org/wiki/Division_algorithm#Long_division) algorithm. + // Bits shift out of the dividend and into the quotient, so R24 is used for both. + "clr R25", // remainder = 0 + + "ldi R23, 8", // for each bit + "1:", + "lsl R24", // shift the dividend MSb + "rol R25", // into the remainder LSb + + "cp R25, R22", // if remainder >= divisor + "brlo 2f", + "sub R25, R22", // remainder -= divisor + "sbr R24, 1", // quotient |= 1 + "2:", + + "dec R23", // end loop + "brne 1b", + "ret", + ); + } + + #[unsafe(naked)] + pub unsafe extern "C" fn __udivmodhi4() { + // compute unsigned 16-bit `n / d` and `n % d`. + // + // Note: GCC implements a [non-standard calling convention](https://gcc.gnu.org/wiki/avr-gcc#Exceptions_to_the_Calling_Convention) for this function. + // Inputs: + // R24: dividend [low] + // R25: dividend [high] + // R22: divisor [low] + // R23: divisor [high] + // Outputs: + // R22: quotient [low] (dividend / divisor) + // R23: quotient [high] + // R24: remainder [low] (dividend % divisor) + // R25: remainder [high] + // Clobbers: + // R21: loop counter + // R26: divisor [low] + // R27: divisor [high] + core::arch::naked_asm!( + // This assembly routine implements the [long division](https://en.wikipedia.org/wiki/Division_algorithm#Long_division) algorithm. + // Bits shift out of the dividend and into the quotient, so R24+R25 are used for both. + "mov R26, R22", // move divisor to make room for quotient + "mov R27, R23", + "mov R22, R24", // move dividend to output location (becomes quotient) + "mov R23, R25", + "clr R24", // remainder = 0 + "clr R25", + + "ldi R21, 16", // for each bit + "1:", + "lsl R22", // shift the dividend MSb + "rol R23", + "rol R24", // into the remainder LSb + "rol R25", + + "cp R24, R26", // if remainder >= divisor + "cpc R25, R27", + "brlo 2f", + "sub R24, R26", // remainder -= divisor + "sbc R25, R27", + "sbr R22, 1", // quotient |= 1 + "2:", + + "dec R21", // end loop + "brne 1b", + "ret", + ); + } + +} + +intrinsics! { + #[maybe_use_optimized_c_shim] + /// Returns `n / d` + pub extern "C" fn __udivdi3(n: u64, d: u64) -> u64 { + u64_div_rem(n, d).0 + } + + #[maybe_use_optimized_c_shim] + /// Returns `n % d` + pub extern "C" fn __umoddi3(n: u64, d: u64) -> u64 { + u64_div_rem(n, d).1 + } + + #[maybe_use_optimized_c_shim] + /// Returns `n / d` and sets `*rem = n % d` + pub extern "C" fn __udivmoddi4(n: u64, d: u64, rem: Option<&mut u64>) -> u64 { + let quo_rem = u64_div_rem(n, d); + if let Some(rem) = rem { + *rem = quo_rem.1; + } + quo_rem.0 + } + + // Note: we use block configuration and not `if cfg!(...)`, because we need to entirely disable + // the existence of `u128_div_rem` to get 32-bit SPARC to compile, see `u128_divide_sparc` docs. + + /// Returns `n / d` + pub extern "C" fn __udivti3(n: u128, d: u128) -> u128 { + #[cfg(not(any(target_arch = "sparc", target_arch = "sparc64")))] { + u128_div_rem(n, d).0 + } + #[cfg(any(target_arch = "sparc", target_arch = "sparc64"))] { + u128_divide_sparc(n, d, &mut 0) + } + } + + /// Returns `n % d` + pub extern "C" fn __umodti3(n: u128, d: u128) -> u128 { + #[cfg(not(any(target_arch = "sparc", target_arch = "sparc64")))] { + u128_div_rem(n, d).1 + } + #[cfg(any(target_arch = "sparc", target_arch = "sparc64"))] { + let mut rem = 0; + u128_divide_sparc(n, d, &mut rem); + rem + } + } + + /// Returns `n / d` and sets `*rem = n % d` + pub extern "C" fn __udivmodti4(n: u128, d: u128, rem: Option<&mut u128>) -> u128 { + #[cfg(not(any(target_arch = "sparc", target_arch = "sparc64")))] { + let quo_rem = u128_div_rem(n, d); + if let Some(rem) = rem { + *rem = quo_rem.1; + } + quo_rem.0 + } + #[cfg(any(target_arch = "sparc", target_arch = "sparc64"))] { + let mut tmp = 0; + let quo = u128_divide_sparc(n, d, &mut tmp); + if let Some(rem) = rem { + *rem = tmp; + } + quo + } + } +} diff --git a/library/compiler-builtins/compiler-builtins/src/lib.miri.rs b/library/compiler-builtins/compiler-builtins/src/lib.miri.rs new file mode 100644 index 0000000000000..17288058e5e8d --- /dev/null +++ b/library/compiler-builtins/compiler-builtins/src/lib.miri.rs @@ -0,0 +1,5 @@ +//! Grep bootstrap for `MIRI_REPLACE_LIBRS_IF_NOT_TEST` to learn what this is about. +#![no_std] +#![feature(rustc_private)] +extern crate compiler_builtins as real; +pub use real::*; diff --git a/library/compiler-builtins/compiler-builtins/src/lib.rs b/library/compiler-builtins/compiler-builtins/src/lib.rs new file mode 100644 index 0000000000000..6a6b28067e8c8 --- /dev/null +++ b/library/compiler-builtins/compiler-builtins/src/lib.rs @@ -0,0 +1,84 @@ +#![cfg_attr(feature = "compiler-builtins", compiler_builtins)] +#![cfg_attr(all(target_family = "wasm"), feature(wasm_numeric_instr))] +#![feature(abi_unadjusted)] +#![feature(asm_experimental_arch)] +#![feature(cfg_target_has_atomic)] +#![feature(compiler_builtins)] +#![feature(core_intrinsics)] +#![feature(linkage)] +#![feature(naked_functions)] +#![feature(repr_simd)] +#![cfg_attr(f16_enabled, feature(f16))] +#![cfg_attr(f128_enabled, feature(f128))] +#![no_builtins] +#![no_std] +#![allow(unused_features)] +#![allow(internal_features)] +// We use `u128` in a whole bunch of places which we currently agree with the +// compiler on ABIs and such, so we should be "good enough" for now and changes +// to the `u128` ABI will be reflected here. +#![allow(improper_ctypes, improper_ctypes_definitions)] +// `mem::swap` cannot be used because it may generate references to memcpy in unoptimized code. +#![allow(clippy::manual_swap)] +// Support compiling on both stage0 and stage1 which may differ in supported stable features. +#![allow(stable_features)] +// By default, disallow this as it is forbidden in edition 2024. There is a lot of unsafe code to +// be migrated, however, so exceptions exist. +#![warn(unsafe_op_in_unsafe_fn)] + +// We disable #[no_mangle] for tests so that we can verify the test results +// against the native compiler-rt implementations of the builtins. + +// NOTE cfg(all(feature = "c", ..)) indicate that compiler-rt provides an arch optimized +// implementation of that intrinsic and we'll prefer to use that + +// NOTE(aapcs, aeabi, arm) ARM targets use intrinsics named __aeabi_* instead of the intrinsics +// that follow "x86 naming convention" (e.g. addsf3). Those aeabi intrinsics must adhere to the +// AAPCS calling convention (`extern "aapcs"`) because that's how LLVM will call them. + +#[cfg(test)] +extern crate core; + +#[macro_use] +mod macros; + +pub mod float; +pub mod int; +pub mod math; +pub mod mem; + +// `libm` expects its `support` module to be available in the crate root. +use math::libm_math::support; + +#[cfg(target_arch = "arm")] +pub mod arm; + +#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] +pub mod aarch64; + +#[cfg(all(target_arch = "aarch64", target_os = "linux", not(feature = "no-asm"),))] +pub mod aarch64_linux; + +#[cfg(all( + kernel_user_helpers, + any(target_os = "linux", target_os = "android"), + target_arch = "arm" +))] +pub mod arm_linux; + +#[cfg(target_arch = "avr")] +pub mod avr; + +#[cfg(target_arch = "hexagon")] +pub mod hexagon; + +#[cfg(any(target_arch = "riscv32", target_arch = "riscv64"))] +pub mod riscv; + +#[cfg(target_arch = "x86")] +pub mod x86; + +#[cfg(target_arch = "x86_64")] +pub mod x86_64; + +pub mod probestack; diff --git a/library/compiler-builtins/compiler-builtins/src/macros.rs b/library/compiler-builtins/compiler-builtins/src/macros.rs new file mode 100644 index 0000000000000..22e0dd27f2f5a --- /dev/null +++ b/library/compiler-builtins/compiler-builtins/src/macros.rs @@ -0,0 +1,486 @@ +//! Macros shared throughout the compiler-builtins implementation + +/// The "main macro" used for defining intrinsics. +/// +/// The compiler-builtins library is super platform-specific with tons of crazy +/// little tweaks for various platforms. As a result it *could* involve a lot of +/// #[cfg] and macro soup, but the intention is that this macro alleviates a lot +/// of that complexity. Ideally this macro has all the weird ABI things +/// platforms need and elsewhere in this library it just looks like normal Rust +/// code. +/// +/// All intrinsics functions are marked with #[linkage = "weak"] when +/// `not(windows) and not(target_vendor = "apple")`. +/// `weak` linkage attribute is used so that these functions can be replaced +/// by another implementation at link time. This is particularly useful for mixed +/// Rust/C++ binaries that want to use the C++ intrinsics, otherwise linking against +/// the Rust stdlib will replace those from the compiler-rt library. +/// +/// This macro is structured to be invoked with a bunch of functions that looks +/// like: +/// ```ignore +/// intrinsics! { +/// pub extern "C" fn foo(a: i32) -> u32 { +/// // ... +/// } +/// +/// #[nonstandard_attribute] +/// pub extern "C" fn bar(a: i32) -> u32 { +/// // ... +/// } +/// } +/// ``` +/// +/// Each function is defined in a manner that looks like a normal Rust function. +/// The macro then accepts a few nonstandard attributes that can decorate +/// various functions. Each of the attributes is documented below with what it +/// can do, and each of them slightly tweaks how further expansion happens. +/// +/// A quick overview of attributes supported right now are: +/// +/// * `maybe_use_optimized_c_shim` - indicates that the Rust implementation is +/// ignored if an optimized C version was compiled. +/// * `aapcs_on_arm` - forces the ABI of the function to be `"aapcs"` on ARM and +/// the specified ABI everywhere else. +/// * `unadjusted_on_win64` - like `aapcs_on_arm` this switches to the +/// `"unadjusted"` abi on Win64 and the specified abi elsewhere. +/// * `arm_aeabi_alias` - handles the "aliasing" of various intrinsics on ARM +/// their otherwise typical names to other prefixed ones. +/// * `ppc_alias` - changes the name of the symbol on PowerPC platforms without +/// changing any other behavior. This is mostly for `f128`, which is `tf` on +/// most platforms but `kf` on PowerPC. +macro_rules! intrinsics { + () => (); + + // Support cfg_attr: + ( + #[cfg_attr($e:meta, $($attr:tt)*)] + $(#[$($attrs:tt)*])* + pub extern $abi:tt fn $name:ident( $($argname:ident: $ty:ty),* ) $(-> $ret:ty)? { + $($body:tt)* + } + $($rest:tt)* + ) => ( + #[cfg($e)] + intrinsics! { + #[$($attr)*] + $(#[$($attrs)*])* + pub extern $abi fn $name($($argname: $ty),*) $(-> $ret)? { + $($body)* + } + } + + #[cfg(not($e))] + intrinsics! { + $(#[$($attrs)*])* + pub extern $abi fn $name($($argname: $ty),*) $(-> $ret)? { + $($body)* + } + } + + intrinsics!($($rest)*); + ); + // Same as above but for unsafe. + ( + #[cfg_attr($e:meta, $($attr:tt)*)] + $(#[$($attrs:tt)*])* + pub unsafe extern $abi:tt fn $name:ident( $($argname:ident: $ty:ty),* ) $(-> $ret:ty)? { + $($body:tt)* + } + $($rest:tt)* + ) => ( + #[cfg($e)] + intrinsics! { + #[$($attr)*] + $(#[$($attrs)*])* + pub unsafe extern $abi fn $name($($argname: $ty),*) $(-> $ret)? { + $($body)* + } + } + + #[cfg(not($e))] + intrinsics! { + $(#[$($attrs)*])* + pub unsafe extern $abi fn $name($($argname: $ty),*) $(-> $ret)? { + $($body)* + } + } + + intrinsics!($($rest)*); + ); + + // Right now there's a bunch of architecture-optimized intrinsics in the + // stock compiler-rt implementation. Not all of these have been ported over + // to Rust yet so when the `c` feature of this crate is enabled we fall back + // to the architecture-specific versions which should be more optimized. The + // purpose of this macro is to easily allow specifying this. + // + // The `#[maybe_use_optimized_c_shim]` attribute indicates that this + // intrinsic may have an optimized C version. In these situations the build + // script, if the C code is enabled and compiled, will emit a cfg directive + // to get passed to rustc for our compilation. If that cfg is set we skip + // the Rust implementation, but if the attribute is not enabled then we + // compile in the Rust implementation. + ( + #[maybe_use_optimized_c_shim] + $(#[$($attr:tt)*])* + pub $(unsafe $(@ $empty:tt)? )? extern $abi:tt fn $name:ident( $($argname:ident: $ty:ty),* ) $(-> $ret:ty)? { + $($body:tt)* + } + + $($rest:tt)* + ) => ( + #[cfg($name = "optimized-c")] + pub $(unsafe $($empty)? )? extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? { + extern $abi { + fn $name($($argname: $ty),*) $(-> $ret)?; + } + unsafe { + $name($($argname),*) + } + } + + #[cfg(not($name = "optimized-c"))] + intrinsics! { + $(#[$($attr)*])* + pub $(unsafe $($empty)? )? extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? { + $($body)* + } + } + + intrinsics!($($rest)*); + ); + + // We recognize the `#[aapcs_on_arm]` attribute here and generate the + // same intrinsic but force it to have the `"aapcs"` calling convention on + // ARM and `"C"` elsewhere. + ( + #[aapcs_on_arm] + $(#[$($attr:tt)*])* + pub extern $abi:tt fn $name:ident( $($argname:ident: $ty:ty),* ) $(-> $ret:ty)? { + $($body:tt)* + } + + $($rest:tt)* + ) => ( + #[cfg(target_arch = "arm")] + intrinsics! { + $(#[$($attr)*])* + pub extern "aapcs" fn $name( $($argname: $ty),* ) $(-> $ret)? { + $($body)* + } + } + + #[cfg(not(target_arch = "arm"))] + intrinsics! { + $(#[$($attr)*])* + pub extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? { + $($body)* + } + } + + intrinsics!($($rest)*); + ); + + // Like aapcs above we recognize an attribute for the "unadjusted" abi on + // win64 for some methods. + ( + #[unadjusted_on_win64] + $(#[$($attr:tt)*])* + pub extern $abi:tt fn $name:ident( $($argname:ident: $ty:ty),* ) $(-> $ret:ty)? { + $($body:tt)* + } + + $($rest:tt)* + ) => ( + #[cfg(all(any(windows, target_os = "cygwin", all(target_os = "uefi", target_arch = "x86_64")), target_pointer_width = "64"))] + intrinsics! { + $(#[$($attr)*])* + pub extern "unadjusted" fn $name( $($argname: $ty),* ) $(-> $ret)? { + $($body)* + } + } + + #[cfg(not(all(any(windows, target_os = "cygwin", all(target_os = "uefi", target_arch = "x86_64")), target_pointer_width = "64")))] + intrinsics! { + $(#[$($attr)*])* + pub extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? { + $($body)* + } + } + + intrinsics!($($rest)*); + ); + + // `arm_aeabi_alias` would conflict with `f16_apple_{arg,ret}_abi` not handled here. Avoid macro ambiguity by combining in a + // single `#[]`. + ( + #[apple_f16_arg_abi] + #[arm_aeabi_alias = $alias:ident] + $($t:tt)* + ) => { + intrinsics! { + #[apple_f16_arg_abi, arm_aeabi_alias = $alias] + $($t)* + } + }; + ( + #[apple_f16_ret_abi] + #[arm_aeabi_alias = $alias:ident] + $($t:tt)* + ) => { + intrinsics! { + #[apple_f16_ret_abi, arm_aeabi_alias = $alias] + $($t)* + } + }; + + // On x86 (32-bit and 64-bit) Apple platforms, `f16` is passed and returned like a `u16` unless + // the builtin involves `f128`. + ( + // `arm_aeabi_alias` would conflict if not handled here. Avoid macro ambiguity by combining + // in a single `#[]`. + #[apple_f16_arg_abi $(, arm_aeabi_alias = $alias:ident)?] + $(#[$($attr:tt)*])* + pub extern $abi:tt fn $name:ident( $($argname:ident: $ty:ty),* ) $(-> $ret:ty)? { + $($body:tt)* + } + + $($rest:tt)* + ) => ( + #[cfg(all(target_vendor = "apple", any(target_arch = "x86", target_arch = "x86_64")))] + $(#[$($attr)*])* + pub extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? { + $($body)* + } + + #[cfg(all(target_vendor = "apple", any(target_arch = "x86", target_arch = "x86_64"), not(feature = "mangled-names")))] + mod $name { + #[unsafe(no_mangle)] + #[cfg_attr(not(any(all(windows, target_env = "gnu"), target_os = "cygwin")), linkage = "weak")] + $(#[$($attr)*])* + extern $abi fn $name( $($argname: u16),* ) $(-> $ret)? { + super::$name($(f16::from_bits($argname)),*) + } + } + + #[cfg(not(all(target_vendor = "apple", any(target_arch = "x86", target_arch = "x86_64"))))] + intrinsics! { + $(#[arm_aeabi_alias = $alias])? + $(#[$($attr)*])* + pub extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? { + $($body)* + } + } + + intrinsics!($($rest)*); + ); + ( + #[apple_f16_ret_abi $(, arm_aeabi_alias = $alias:ident)?] + $(#[$($attr:tt)*])* + pub extern $abi:tt fn $name:ident( $($argname:ident: $ty:ty),* ) $(-> $ret:ty)? { + $($body:tt)* + } + + $($rest:tt)* + ) => ( + #[cfg(all(target_vendor = "apple", any(target_arch = "x86", target_arch = "x86_64")))] + $(#[$($attr)*])* + pub extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? { + $($body)* + } + + #[cfg(all(target_vendor = "apple", any(target_arch = "x86", target_arch = "x86_64"), not(feature = "mangled-names")))] + mod $name { + #[unsafe(no_mangle)] + #[cfg_attr(not(any(all(windows, target_env = "gnu"), target_os = "cygwin")), linkage = "weak")] + $(#[$($attr)*])* + extern $abi fn $name( $($argname: $ty),* ) -> u16 { + super::$name($($argname),*).to_bits() + } + } + + #[cfg(not(all(target_vendor = "apple", any(target_arch = "x86", target_arch = "x86_64"))))] + intrinsics! { + $(#[arm_aeabi_alias = $alias])? + $(#[$($attr)*])* + pub extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? { + $($body)* + } + } + + intrinsics!($($rest)*); + ); + + // A bunch of intrinsics on ARM are aliased in the standard compiler-rt + // build under `__aeabi_*` aliases, and LLVM will call these instead of the + // original function. The aliasing here is used to generate these symbols in + // the object file. + ( + #[arm_aeabi_alias = $alias:ident] + $(#[$($attr:tt)*])* + pub extern $abi:tt fn $name:ident( $($argname:ident: $ty:ty),* ) $(-> $ret:ty)? { + $($body:tt)* + } + + $($rest:tt)* + ) => ( + #[cfg(target_arch = "arm")] + $(#[$($attr)*])* + pub extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? { + $($body)* + } + + #[cfg(all(target_arch = "arm", not(feature = "mangled-names")))] + mod $name { + #[unsafe(no_mangle)] + #[cfg_attr(not(any(all(windows, target_env = "gnu"), target_os = "cygwin")), linkage = "weak")] + $(#[$($attr)*])* + extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? { + super::$name($($argname),*) + } + } + + #[cfg(all(target_arch = "arm", not(feature = "mangled-names")))] + mod $alias { + #[unsafe(no_mangle)] + #[cfg_attr(not(any(all(windows, target_env = "gnu"), target_os = "cygwin")), linkage = "weak")] + $(#[$($attr)*])* + extern "aapcs" fn $alias( $($argname: $ty),* ) $(-> $ret)? { + super::$name($($argname),*) + } + } + + #[cfg(not(target_arch = "arm"))] + intrinsics! { + $(#[$($attr)*])* + pub extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? { + $($body)* + } + } + + intrinsics!($($rest)*); + ); + + // PowerPC usually uses `kf` rather than `tf` for `f128`. This is just an easy + // way to add an alias on those targets. + ( + #[ppc_alias = $alias:ident] + $(#[$($attr:tt)*])* + pub extern $abi:tt fn $name:ident( $($argname:ident: $ty:ty),* ) $(-> $ret:ty)? { + $($body:tt)* + } + + $($rest:tt)* + ) => ( + #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] + intrinsics! { + $(#[$($attr)*])* + pub extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? { + $($body)* + } + } + + #[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))] + intrinsics! { + $(#[$($attr)*])* + pub extern $abi fn $alias( $($argname: $ty),* ) $(-> $ret)? { + $($body)* + } + } + + intrinsics!($($rest)*); + ); + + // C mem* functions are only generated when the "mem" feature is enabled. + ( + #[mem_builtin] + $(#[$($attr:tt)*])* + pub unsafe extern $abi:tt fn $name:ident( $($argname:ident: $ty:ty),* ) $(-> $ret:ty)? { + $($body:tt)* + } + + $($rest:tt)* + ) => ( + $(#[$($attr)*])* + pub unsafe extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? { + $($body)* + } + + #[cfg(all(feature = "mem", not(feature = "mangled-names")))] + mod $name { + $(#[$($attr)*])* + #[unsafe(no_mangle)] + #[cfg_attr(not(any(all(windows, target_env = "gnu"), target_os = "cygwin")), linkage = "weak")] + unsafe extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? { + super::$name($($argname),*) + } + } + + intrinsics!($($rest)*); + ); + + // Naked functions are special: we can't generate wrappers for them since + // they use a custom calling convention. + ( + #[unsafe(naked)] + $(#[$($attr:tt)*])* + pub unsafe extern $abi:tt fn $name:ident( $($argname:ident: $ty:ty),* ) $(-> $ret:ty)? { + $($body:tt)* + } + + $($rest:tt)* + ) => ( + // `#[naked]` definitions are referenced by other places, so we can't use `cfg` like the others + pub mod $name { + #[unsafe(naked)] + $(#[$($attr)*])* + #[cfg_attr(not(feature = "mangled-names"), no_mangle)] + #[cfg_attr(not(any(all(windows, target_env = "gnu"), target_os = "cygwin")), linkage = "weak")] + pub unsafe extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? { + $($body)* + } + } + + intrinsics!($($rest)*); + ); + + // This is the final catch-all rule. At this point we generate an + // intrinsic with a conditional `#[no_mangle]` directive to avoid + // interfering with duplicate symbols and whatnot during testing. + // + // The implementation is placed in a separate module, to take advantage + // of the fact that rustc partitions functions into code generation + // units based on module they are defined in. As a result we will have + // a separate object file for each intrinsic. For further details see + // corresponding PR in rustc https://github.com/rust-lang/rust/pull/70846 + // + // After the intrinsic is defined we just continue with the rest of the + // input we were given. + ( + $(#[$($attr:tt)*])* + pub $(unsafe $(@ $empty:tt)?)? extern $abi:tt fn $name:ident( $($argname:ident: $ty:ty),* ) $(-> $ret:ty)? { + $($body:tt)* + } + + $($rest:tt)* + ) => ( + $(#[$($attr)*])* + pub $(unsafe $($empty)?)? extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? { + $($body)* + } + + #[cfg(not(feature = "mangled-names"))] + mod $name { + $(#[$($attr)*])* + #[unsafe(no_mangle)] + #[cfg_attr(not(any(all(windows, target_env = "gnu"), target_os = "cygwin")), linkage = "weak")] + $(unsafe $($empty)?)? extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? { + // SAFETY: same preconditions. + $(unsafe $($empty)?)? { super::$name($($argname),*) } + } + } + + intrinsics!($($rest)*); + ); +} diff --git a/library/compiler-builtins/compiler-builtins/src/math/mod.rs b/library/compiler-builtins/compiler-builtins/src/math/mod.rs new file mode 100644 index 0000000000000..62d7296741057 --- /dev/null +++ b/library/compiler-builtins/compiler-builtins/src/math/mod.rs @@ -0,0 +1,199 @@ +#[rustfmt::skip] +#[allow(dead_code)] +#[allow(unused_imports)] +#[allow(clippy::all)] +#[path = "../../../libm/src/math/mod.rs"] +pub(crate) mod libm_math; + +macro_rules! libm_intrinsics { + ($(fn $fun:ident($($iid:ident : $ity:ty),+) -> $oty:ty;)+) => { + intrinsics! { + $( + pub extern "C" fn $fun($($iid: $ity),+) -> $oty { + $crate::math::libm_math::$fun($($iid),+) + } + )+ + } + } +} + +/// This set of functions is well tested in `libm` and known to provide similar performance to +/// system `libm`, as well as the same or better accuracy. +pub mod full_availability { + #[cfg(f16_enabled)] + libm_intrinsics! { + fn ceilf16(x: f16) -> f16; + fn copysignf16(x: f16, y: f16) -> f16; + fn fabsf16(x: f16) -> f16; + fn fdimf16(x: f16, y: f16) -> f16; + fn floorf16(x: f16) -> f16; + fn fmaxf16(x: f16, y: f16) -> f16; + fn fmaximumf16(x: f16, y: f16) -> f16; + fn fminf16(x: f16, y: f16) -> f16; + fn fminimumf16(x: f16, y: f16) -> f16; + fn fmodf16(x: f16, y: f16) -> f16; + fn rintf16(x: f16) -> f16; + fn roundevenf16(x: f16) -> f16; + fn roundf16(x: f16) -> f16; + fn sqrtf16(x: f16) -> f16; + fn truncf16(x: f16) -> f16; + } + + /* Weak linkage is unreliable on Windows and Apple, so we don't expose symbols that we know + * the system libc provides in order to avoid conflicts. */ + + #[cfg(all(not(windows), not(target_vendor = "apple")))] + libm_intrinsics! { + /* f32 */ + fn cbrtf(n: f32) -> f32; + fn ceilf(x: f32) -> f32; + fn copysignf(x: f32, y: f32) -> f32; + fn fabsf(x: f32) -> f32; + fn fdimf(a: f32, b: f32) -> f32; + fn floorf(x: f32) -> f32; + fn fmaf(x: f32, y: f32, z: f32) -> f32; + fn fmaxf(x: f32, y: f32) -> f32; + fn fminf(x: f32, y: f32) -> f32; + fn fmodf(x: f32, y: f32) -> f32; + fn rintf(x: f32) -> f32; + fn roundf(x: f32) -> f32; + fn sqrtf(x: f32) -> f32; + fn truncf(x: f32) -> f32; + + /* f64 */ + fn cbrt(x: f64) -> f64; + fn ceil(x: f64) -> f64; + fn copysign(x: f64, y: f64) -> f64; + fn fabs(x: f64) -> f64; + fn fdim(a: f64, b: f64) -> f64; + fn floor(x: f64) -> f64; + fn fma(x: f64, y: f64, z: f64) -> f64; + fn fmax(x: f64, y: f64) -> f64; + fn fmin(x: f64, y: f64) -> f64; + fn fmod(x: f64, y: f64) -> f64; + fn rint(x: f64) -> f64; + fn round(x: f64) -> f64; + fn sqrt(x: f64) -> f64; + fn trunc(x: f64) -> f64; + } + + // Windows and MacOS do not yet expose roundeven and IEEE 754-2019 `maximum` / `minimum`, + // however, so we still provide a fallback. + libm_intrinsics! { + fn fmaximum(x: f64, y: f64) -> f64; + fn fmaximumf(x: f32, y: f32) -> f32; + fn fminimum(x: f64, y: f64) -> f64; + fn fminimumf(x: f32, y: f32) -> f32; + fn roundeven(x: f64) -> f64; + fn roundevenf(x: f32) -> f32; + } + + #[cfg(f128_enabled)] + libm_intrinsics! { + fn ceilf128(x: f128) -> f128; + fn copysignf128(x: f128, y: f128) -> f128; + fn fabsf128(x: f128) -> f128; + fn fdimf128(x: f128, y: f128) -> f128; + fn floorf128(x: f128) -> f128; + fn fmaf128(x: f128, y: f128, z: f128) -> f128; + fn fmaxf128(x: f128, y: f128) -> f128; + fn fmaximumf128(x: f128, y: f128) -> f128; + fn fminf128(x: f128, y: f128) -> f128; + fn fminimumf128(x: f128, y: f128) -> f128; + fn fmodf128(x: f128, y: f128) -> f128; + fn rintf128(x: f128) -> f128; + fn roundevenf128(x: f128) -> f128; + fn roundf128(x: f128) -> f128; + fn sqrtf128(x: f128) -> f128; + fn truncf128(x: f128) -> f128; + } +} + +/// This group of functions has more performance or precision issues than system versions, or +/// are otherwise less well tested. Provide them only on platforms that have problems with the +/// system `libm`. +/// +/// As `libm` improves, more functions will be moved from this group to the first group. +/// +/// Do not supply for any of the following: +/// - x86 without sse2 due to ABI issues +/// - +/// - but exclude UEFI since it is a soft-float target +/// - +/// - All unix targets (linux, macos, freebsd, android, etc) +/// - wasm with known target_os +#[cfg(not(any( + all( + target_arch = "x86", + not(target_feature = "sse2"), + not(target_os = "uefi"), + ), + unix, + all(target_family = "wasm", not(target_os = "unknown")) +)))] +pub mod partial_availability { + #[cfg(not(windows))] + libm_intrinsics! { + fn acos(x: f64) -> f64; + fn acosf(n: f32) -> f32; + fn asin(x: f64) -> f64; + fn asinf(n: f32) -> f32; + fn atan(x: f64) -> f64; + fn atan2(x: f64, y: f64) -> f64; + fn atan2f(a: f32, b: f32) -> f32; + fn atanf(n: f32) -> f32; + fn cos(x: f64) -> f64; + fn cosf(x: f32) -> f32; + fn cosh(x: f64) -> f64; + fn coshf(n: f32) -> f32; + fn erf(x: f64) -> f64; + fn erfc(x: f64) -> f64; + fn erfcf(x: f32) -> f32; + fn erff(x: f32) -> f32; + fn exp(x: f64) -> f64; + fn exp2(x: f64) -> f64; + fn exp2f(x: f32) -> f32; + fn expf(x: f32) -> f32; + fn expm1(x: f64) -> f64; + fn expm1f(n: f32) -> f32; + fn hypot(x: f64, y: f64) -> f64; + fn hypotf(x: f32, y: f32) -> f32; + fn ldexp(f: f64, n: i32) -> f64; + fn ldexpf(f: f32, n: i32) -> f32; + fn log(x: f64) -> f64; + fn log10(x: f64) -> f64; + fn log10f(x: f32) -> f32; + fn log1p(x: f64) -> f64; + fn log1pf(n: f32) -> f32; + fn log2(x: f64) -> f64; + fn log2f(x: f32) -> f32; + fn logf(x: f32) -> f32; + fn pow(x: f64, y: f64) -> f64; + fn powf(x: f32, y: f32) -> f32; + fn sin(x: f64) -> f64; + fn sinf(x: f32) -> f32; + fn sinh(x: f64) -> f64; + fn sinhf(n: f32) -> f32; + fn tan(x: f64) -> f64; + fn tanf(n: f32) -> f32; + fn tanh(x: f64) -> f64; + fn tanhf(n: f32) -> f32; + fn tgamma(x: f64) -> f64; + fn tgammaf(x: f32) -> f32; + } + + // allow for windows (and other targets) + intrinsics! { + pub extern "C" fn lgamma_r(x: f64, s: &mut i32) -> f64 { + let r = super::libm_math::lgamma_r(x); + *s = r.1; + r.0 + } + + pub extern "C" fn lgammaf_r(x: f32, s: &mut i32) -> f32 { + let r = super::libm_math::lgammaf_r(x); + *s = r.1; + r.0 + } + } +} diff --git a/library/compiler-builtins/compiler-builtins/src/mem/impls.rs b/library/compiler-builtins/compiler-builtins/src/mem/impls.rs new file mode 100644 index 0000000000000..14a4787485dc8 --- /dev/null +++ b/library/compiler-builtins/compiler-builtins/src/mem/impls.rs @@ -0,0 +1,408 @@ +// In C and Rust it is UB to read or write to usize::MAX because if an allocation extends to the +// last byte of address space (there must be an allocation to do the read or write), in C computing +// its one-past-the-end pointer would be equal to NULL and in Rust computing the address of a +// trailing ZST member with a safe place projection would wrap (place projection address computation +// is non-wrapping). +// +// However, some embedded systems have special memory at usize::MAX, and need to access that +// memory. If they do that with the intrinsics provided by compiler-builtins (such as memcpy!), the +// ptr::add in these loops will wrap. And if compiler-builtins is compiled with cfg(ub_checks), +// this will fail a UB check at runtime. +// +// Since this scenario is UB, we are within our rights hit this check and halt execution... +// But we are also within our rights to try to make it work. +// We use wrapping_add/wrapping_sub for pointer arithmetic in this module in an attempt to support +// this use. Of course this is not a guarantee that such use will work, it just means that this +// crate doing wrapping pointer arithmetic with a method that must not wrap won't be the problem if +// something does go wrong at runtime. +use core::intrinsics::likely; + +const WORD_SIZE: usize = core::mem::size_of::(); +const WORD_MASK: usize = WORD_SIZE - 1; + +// If the number of bytes involved exceed this threshold we will opt in word-wise copy. +// The value here selected is max(2 * WORD_SIZE, 16): +// * We need at least 2 * WORD_SIZE bytes to guarantee that at least 1 word will be copied through +// word-wise copy. +// * The word-wise copy logic needs to perform some checks so it has some small overhead. +// ensures that even on 32-bit platforms we have copied at least 8 bytes through +// word-wise copy so the saving of word-wise copy outweighs the fixed overhead. +const WORD_COPY_THRESHOLD: usize = if 2 * WORD_SIZE > 16 { + 2 * WORD_SIZE +} else { + 16 +}; + +#[cfg(feature = "mem-unaligned")] +unsafe fn read_usize_unaligned(x: *const usize) -> usize { + // Do not use `core::ptr::read_unaligned` here, since it calls `copy_nonoverlapping` which + // is translated to memcpy in LLVM. + let x_read = (x as *const [u8; core::mem::size_of::()]).read(); + usize::from_ne_bytes(x_read) +} + +/// Loads a `T`-sized chunk from `src` into `dst` at offset `offset`, if that does not exceed +/// `load_sz`. The offset pointers must both be `T`-aligned. Returns the new offset, advanced by the +/// chunk size if a load happened. +#[cfg(not(feature = "mem-unaligned"))] +#[inline(always)] +unsafe fn load_chunk_aligned( + src: *const usize, + dst: *mut usize, + load_sz: usize, + offset: usize, +) -> usize { + let chunk_sz = core::mem::size_of::(); + if (load_sz & chunk_sz) != 0 { + *dst.wrapping_byte_add(offset).cast::() = *src.wrapping_byte_add(offset).cast::(); + offset | chunk_sz + } else { + offset + } +} + +/// Load `load_sz` many bytes from `src`, which must be usize-aligned. Acts as if we did a `usize` +/// read with the out-of-bounds part filled with 0s. +/// `load_sz` be strictly less than `WORD_SIZE`. +#[cfg(not(feature = "mem-unaligned"))] +#[inline(always)] +unsafe fn load_aligned_partial(src: *const usize, load_sz: usize) -> usize { + debug_assert!(load_sz < WORD_SIZE); + // We can read up to 7 bytes here, which is enough for WORD_SIZE of 8 + // (since `load_sz < WORD_SIZE`). + const { assert!(WORD_SIZE <= 8) }; + + let mut i = 0; + let mut out = 0usize; + // We load in decreasing order, so the pointers remain sufficiently aligned for the next step. + i = load_chunk_aligned::(src, &raw mut out, load_sz, i); + i = load_chunk_aligned::(src, &raw mut out, load_sz, i); + i = load_chunk_aligned::(src, &raw mut out, load_sz, i); + debug_assert!(i == load_sz); + out +} + +/// Load `load_sz` many bytes from `src.wrapping_byte_add(WORD_SIZE - load_sz)`. `src` must be +/// `usize`-aligned. The bytes are returned as the *last* bytes of the return value, i.e., this acts +/// as if we had done a `usize` read from `src`, with the out-of-bounds part filled with 0s. +/// `load_sz` be strictly less than `WORD_SIZE`. +#[cfg(not(feature = "mem-unaligned"))] +#[inline(always)] +unsafe fn load_aligned_end_partial(src: *const usize, load_sz: usize) -> usize { + debug_assert!(load_sz < WORD_SIZE); + // We can read up to 7 bytes here, which is enough for WORD_SIZE of 8 + // (since `load_sz < WORD_SIZE`). + const { assert!(WORD_SIZE <= 8) }; + + let mut i = 0; + let mut out = 0usize; + // Obtain pointers pointing to the beginning of the range we want to load. + let src_shifted = src.wrapping_byte_add(WORD_SIZE - load_sz); + let out_shifted = (&raw mut out).wrapping_byte_add(WORD_SIZE - load_sz); + // We load in increasing order, so by the time we reach `u16` things are 2-aligned etc. + i = load_chunk_aligned::(src_shifted, out_shifted, load_sz, i); + i = load_chunk_aligned::(src_shifted, out_shifted, load_sz, i); + i = load_chunk_aligned::(src_shifted, out_shifted, load_sz, i); + debug_assert!(i == load_sz); + out +} + +#[inline(always)] +pub unsafe fn copy_forward(mut dest: *mut u8, mut src: *const u8, mut n: usize) { + #[inline(always)] + unsafe fn copy_forward_bytes(mut dest: *mut u8, mut src: *const u8, n: usize) { + let dest_end = dest.wrapping_add(n); + while dest < dest_end { + *dest = *src; + dest = dest.wrapping_add(1); + src = src.wrapping_add(1); + } + } + + #[inline(always)] + unsafe fn copy_forward_aligned_words(dest: *mut u8, src: *const u8, n: usize) { + let mut dest_usize = dest as *mut usize; + let mut src_usize = src as *mut usize; + let dest_end = dest.wrapping_add(n) as *mut usize; + + while dest_usize < dest_end { + *dest_usize = *src_usize; + dest_usize = dest_usize.wrapping_add(1); + src_usize = src_usize.wrapping_add(1); + } + } + + /// `n` is in units of bytes, but must be a multiple of the word size and must not be 0. + /// `src` *must not* be `usize`-aligned. + #[cfg(not(feature = "mem-unaligned"))] + #[inline(always)] + unsafe fn copy_forward_misaligned_words(dest: *mut u8, src: *const u8, n: usize) { + debug_assert!(n > 0 && n % WORD_SIZE == 0); + debug_assert!(src.addr() % WORD_SIZE != 0); + + let mut dest_usize = dest as *mut usize; + let dest_end = dest.wrapping_add(n) as *mut usize; + + // Calculate the misalignment offset and shift needed to reassemble value. + // Since `src` is definitely not aligned, `offset` is in the range 1..WORD_SIZE. + let offset = src as usize & WORD_MASK; + let shift = offset * 8; + + // Realign src + let mut src_aligned = src.wrapping_byte_sub(offset) as *mut usize; + let mut prev_word = load_aligned_end_partial(src_aligned, WORD_SIZE - offset); + + while dest_usize.wrapping_add(1) < dest_end { + src_aligned = src_aligned.wrapping_add(1); + let cur_word = *src_aligned; + let reassembled = if cfg!(target_endian = "little") { + prev_word >> shift | cur_word << (WORD_SIZE * 8 - shift) + } else { + prev_word << shift | cur_word >> (WORD_SIZE * 8 - shift) + }; + prev_word = cur_word; + + *dest_usize = reassembled; + dest_usize = dest_usize.wrapping_add(1); + } + + // There's one more element left to go, and we can't use the loop for that as on the `src` side, + // it is partially out-of-bounds. + src_aligned = src_aligned.wrapping_add(1); + let cur_word = load_aligned_partial(src_aligned, offset); + let reassembled = if cfg!(target_endian = "little") { + prev_word >> shift | cur_word << (WORD_SIZE * 8 - shift) + } else { + prev_word << shift | cur_word >> (WORD_SIZE * 8 - shift) + }; + // prev_word does not matter any more + + *dest_usize = reassembled; + // dest_usize does not matter any more + } + + /// `n` is in units of bytes, but must be a multiple of the word size and must not be 0. + /// `src` *must not* be `usize`-aligned. + #[cfg(feature = "mem-unaligned")] + #[inline(always)] + unsafe fn copy_forward_misaligned_words(dest: *mut u8, src: *const u8, n: usize) { + let mut dest_usize = dest as *mut usize; + let mut src_usize = src as *mut usize; + let dest_end = dest.wrapping_add(n) as *mut usize; + + while dest_usize < dest_end { + *dest_usize = read_usize_unaligned(src_usize); + dest_usize = dest_usize.wrapping_add(1); + src_usize = src_usize.wrapping_add(1); + } + } + + if n >= WORD_COPY_THRESHOLD { + // Align dest + // Because of n >= 2 * WORD_SIZE, dst_misalignment < n + let dest_misalignment = (dest as usize).wrapping_neg() & WORD_MASK; + copy_forward_bytes(dest, src, dest_misalignment); + dest = dest.wrapping_add(dest_misalignment); + src = src.wrapping_add(dest_misalignment); + n -= dest_misalignment; + + let n_words = n & !WORD_MASK; + let src_misalignment = src as usize & WORD_MASK; + if likely(src_misalignment == 0) { + copy_forward_aligned_words(dest, src, n_words); + } else { + copy_forward_misaligned_words(dest, src, n_words); + } + dest = dest.wrapping_add(n_words); + src = src.wrapping_add(n_words); + n -= n_words; + } + copy_forward_bytes(dest, src, n); +} + +#[inline(always)] +pub unsafe fn copy_backward(dest: *mut u8, src: *const u8, mut n: usize) { + // The following backward copy helper functions uses the pointers past the end + // as their inputs instead of pointers to the start! + #[inline(always)] + unsafe fn copy_backward_bytes(mut dest: *mut u8, mut src: *const u8, n: usize) { + let dest_start = dest.wrapping_sub(n); + while dest_start < dest { + dest = dest.wrapping_sub(1); + src = src.wrapping_sub(1); + *dest = *src; + } + } + + #[inline(always)] + unsafe fn copy_backward_aligned_words(dest: *mut u8, src: *const u8, n: usize) { + let mut dest_usize = dest as *mut usize; + let mut src_usize = src as *mut usize; + let dest_start = dest.wrapping_sub(n) as *mut usize; + + while dest_start < dest_usize { + dest_usize = dest_usize.wrapping_sub(1); + src_usize = src_usize.wrapping_sub(1); + *dest_usize = *src_usize; + } + } + + /// `n` is in units of bytes, but must be a multiple of the word size and must not be 0. + /// `src` *must not* be `usize`-aligned. + #[cfg(not(feature = "mem-unaligned"))] + #[inline(always)] + unsafe fn copy_backward_misaligned_words(dest: *mut u8, src: *const u8, n: usize) { + debug_assert!(n > 0 && n % WORD_SIZE == 0); + debug_assert!(src.addr() % WORD_SIZE != 0); + + let mut dest_usize = dest as *mut usize; + let dest_start = dest.wrapping_sub(n) as *mut usize; // we're moving towards the start + + // Calculate the misalignment offset and shift needed to reassemble value. + // Since `src` is definitely not aligned, `offset` is in the range 1..WORD_SIZE. + let offset = src as usize & WORD_MASK; + let shift = offset * 8; + + // Realign src + let mut src_aligned = src.wrapping_byte_sub(offset) as *mut usize; + let mut prev_word = load_aligned_partial(src_aligned, offset); + + while dest_start.wrapping_add(1) < dest_usize { + src_aligned = src_aligned.wrapping_sub(1); + let cur_word = *src_aligned; + let reassembled = if cfg!(target_endian = "little") { + prev_word << (WORD_SIZE * 8 - shift) | cur_word >> shift + } else { + prev_word >> (WORD_SIZE * 8 - shift) | cur_word << shift + }; + prev_word = cur_word; + + dest_usize = dest_usize.wrapping_sub(1); + *dest_usize = reassembled; + } + + // There's one more element left to go, and we can't use the loop for that as on the `src` side, + // it is partially out-of-bounds. + src_aligned = src_aligned.wrapping_sub(1); + let cur_word = load_aligned_end_partial(src_aligned, WORD_SIZE - offset); + let reassembled = if cfg!(target_endian = "little") { + prev_word << (WORD_SIZE * 8 - shift) | cur_word >> shift + } else { + prev_word >> (WORD_SIZE * 8 - shift) | cur_word << shift + }; + // prev_word does not matter any more + + dest_usize = dest_usize.wrapping_sub(1); + *dest_usize = reassembled; + } + + /// `n` is in units of bytes, but must be a multiple of the word size and must not be 0. + /// `src` *must not* be `usize`-aligned. + #[cfg(feature = "mem-unaligned")] + #[inline(always)] + unsafe fn copy_backward_misaligned_words(dest: *mut u8, src: *const u8, n: usize) { + let mut dest_usize = dest as *mut usize; + let mut src_usize = src as *mut usize; + let dest_start = dest.wrapping_sub(n) as *mut usize; + + while dest_start < dest_usize { + dest_usize = dest_usize.wrapping_sub(1); + src_usize = src_usize.wrapping_sub(1); + *dest_usize = read_usize_unaligned(src_usize); + } + } + + let mut dest = dest.wrapping_add(n); + let mut src = src.wrapping_add(n); + + if n >= WORD_COPY_THRESHOLD { + // Align dest + // Because of n >= 2 * WORD_SIZE, dst_misalignment < n + let dest_misalignment = dest as usize & WORD_MASK; + copy_backward_bytes(dest, src, dest_misalignment); + dest = dest.wrapping_sub(dest_misalignment); + src = src.wrapping_sub(dest_misalignment); + n -= dest_misalignment; + + let n_words = n & !WORD_MASK; + let src_misalignment = src as usize & WORD_MASK; + if likely(src_misalignment == 0) { + copy_backward_aligned_words(dest, src, n_words); + } else { + copy_backward_misaligned_words(dest, src, n_words); + } + dest = dest.wrapping_sub(n_words); + src = src.wrapping_sub(n_words); + n -= n_words; + } + copy_backward_bytes(dest, src, n); +} + +#[inline(always)] +pub unsafe fn set_bytes(mut s: *mut u8, c: u8, mut n: usize) { + #[inline(always)] + pub unsafe fn set_bytes_bytes(mut s: *mut u8, c: u8, n: usize) { + let end = s.wrapping_add(n); + while s < end { + *s = c; + s = s.wrapping_add(1); + } + } + + #[inline(always)] + pub unsafe fn set_bytes_words(s: *mut u8, c: u8, n: usize) { + let mut broadcast = c as usize; + let mut bits = 8; + while bits < WORD_SIZE * 8 { + broadcast |= broadcast << bits; + bits *= 2; + } + + let mut s_usize = s as *mut usize; + let end = s.wrapping_add(n) as *mut usize; + + while s_usize < end { + *s_usize = broadcast; + s_usize = s_usize.wrapping_add(1); + } + } + + if likely(n >= WORD_COPY_THRESHOLD) { + // Align s + // Because of n >= 2 * WORD_SIZE, dst_misalignment < n + let misalignment = (s as usize).wrapping_neg() & WORD_MASK; + set_bytes_bytes(s, c, misalignment); + s = s.wrapping_add(misalignment); + n -= misalignment; + + let n_words = n & !WORD_MASK; + set_bytes_words(s, c, n_words); + s = s.wrapping_add(n_words); + n -= n_words; + } + set_bytes_bytes(s, c, n); +} + +#[inline(always)] +pub unsafe fn compare_bytes(s1: *const u8, s2: *const u8, n: usize) -> i32 { + let mut i = 0; + while i < n { + let a = *s1.wrapping_add(i); + let b = *s2.wrapping_add(i); + if a != b { + return a as i32 - b as i32; + } + i += 1; + } + 0 +} + +#[inline(always)] +pub unsafe fn c_string_length(mut s: *const core::ffi::c_char) -> usize { + let mut n = 0; + while *s != 0 { + n += 1; + s = s.wrapping_add(1); + } + n +} diff --git a/library/compiler-builtins/compiler-builtins/src/mem/mod.rs b/library/compiler-builtins/compiler-builtins/src/mem/mod.rs new file mode 100644 index 0000000000000..6828f3804e0fe --- /dev/null +++ b/library/compiler-builtins/compiler-builtins/src/mem/mod.rs @@ -0,0 +1,60 @@ +// Trying to satisfy clippy here is hopeless +#![allow(clippy::style)] +// FIXME(e2024): this eventually needs to be removed. +#![allow(unsafe_op_in_unsafe_fn)] + +#[allow(warnings)] +#[cfg(target_pointer_width = "16")] +type c_int = i16; +#[allow(warnings)] +#[cfg(not(target_pointer_width = "16"))] +type c_int = i32; + +// memcpy/memmove/memset have optimized implementations on some architectures +#[cfg_attr( + all(not(feature = "no-asm"), target_arch = "x86_64"), + path = "x86_64.rs" +)] +mod impls; + +intrinsics! { + #[mem_builtin] + pub unsafe extern "C" fn memcpy(dest: *mut u8, src: *const u8, n: usize) -> *mut u8 { + impls::copy_forward(dest, src, n); + dest + } + + #[mem_builtin] + pub unsafe extern "C" fn memmove(dest: *mut u8, src: *const u8, n: usize) -> *mut u8 { + let delta = (dest as usize).wrapping_sub(src as usize); + if delta >= n { + // We can copy forwards because either dest is far enough ahead of src, + // or src is ahead of dest (and delta overflowed). + impls::copy_forward(dest, src, n); + } else { + impls::copy_backward(dest, src, n); + } + dest + } + + #[mem_builtin] + pub unsafe extern "C" fn memset(s: *mut u8, c: crate::mem::c_int, n: usize) -> *mut u8 { + impls::set_bytes(s, c as u8, n); + s + } + + #[mem_builtin] + pub unsafe extern "C" fn memcmp(s1: *const u8, s2: *const u8, n: usize) -> i32 { + impls::compare_bytes(s1, s2, n) + } + + #[mem_builtin] + pub unsafe extern "C" fn bcmp(s1: *const u8, s2: *const u8, n: usize) -> i32 { + memcmp(s1, s2, n) + } + + #[mem_builtin] + pub unsafe extern "C" fn strlen(s: *const core::ffi::c_char) -> usize { + impls::c_string_length(s) + } +} diff --git a/library/compiler-builtins/compiler-builtins/src/mem/x86_64.rs b/library/compiler-builtins/compiler-builtins/src/mem/x86_64.rs new file mode 100644 index 0000000000000..5cbe83ab1e21d --- /dev/null +++ b/library/compiler-builtins/compiler-builtins/src/mem/x86_64.rs @@ -0,0 +1,313 @@ +// On most modern Intel and AMD processors, "rep movsq" and "rep stosq" have +// been enhanced to perform better than an simple qword loop, making them ideal +// for implementing memcpy/memset. Note that "rep cmps" has received no such +// enhancement, so it is not used to implement memcmp. +// +// On certain recent Intel processors, "rep movsb" and "rep stosb" have been +// further enhanced to automatically select the best microarchitectural +// implementation based on length and alignment. See the following features from +// the "Intel® 64 and IA-32 Architectures Optimization Reference Manual": +// - ERMSB - Enhanced REP MOVSB and STOSB (Ivy Bridge and later) +// - FSRM - Fast Short REP MOV (Ice Lake and later) +// - Fast Zero-Length MOVSB (On no current hardware) +// - Fast Short STOSB (On no current hardware) +// +// To simplify things, we switch to using the byte-based variants if the "ermsb" +// feature is present at compile-time. We don't bother detecting other features. +// Note that ERMSB does not enhance the backwards (DF=1) "rep movsb". + +use core::arch::asm; +use core::{intrinsics, mem}; + +#[inline(always)] +#[cfg(target_feature = "ermsb")] +pub unsafe fn copy_forward(dest: *mut u8, src: *const u8, count: usize) { + // FIXME: Use the Intel syntax once we drop LLVM 9 support on rust-lang/rust. + core::arch::asm!( + "repe movsb (%rsi), (%rdi)", + inout("rcx") count => _, + inout("rdi") dest => _, + inout("rsi") src => _, + options(att_syntax, nostack, preserves_flags) + ); +} + +#[inline(always)] +#[cfg(not(target_feature = "ermsb"))] +pub unsafe fn copy_forward(mut dest: *mut u8, mut src: *const u8, count: usize) { + let (pre_byte_count, qword_count, byte_count) = rep_param(dest, count); + // Separating the blocks gives the compiler more freedom to reorder instructions. + asm!( + "rep movsb", + inout("ecx") pre_byte_count => _, + inout("rdi") dest => dest, + inout("rsi") src => src, + options(att_syntax, nostack, preserves_flags) + ); + asm!( + "rep movsq", + inout("rcx") qword_count => _, + inout("rdi") dest => dest, + inout("rsi") src => src, + options(att_syntax, nostack, preserves_flags) + ); + asm!( + "rep movsb", + inout("ecx") byte_count => _, + inout("rdi") dest => _, + inout("rsi") src => _, + options(att_syntax, nostack, preserves_flags) + ); +} + +#[inline(always)] +pub unsafe fn copy_backward(dest: *mut u8, src: *const u8, count: usize) { + let (pre_byte_count, qword_count, byte_count) = rep_param(dest, count); + // We can't separate this block due to std/cld + asm!( + "std", + "rep movsb", + "sub $7, %rsi", + "sub $7, %rdi", + "mov {qword_count}, %rcx", + "rep movsq", + "test {pre_byte_count:e}, {pre_byte_count:e}", + "add $7, %rsi", + "add $7, %rdi", + "mov {pre_byte_count:e}, %ecx", + "rep movsb", + "cld", + pre_byte_count = in(reg) pre_byte_count, + qword_count = in(reg) qword_count, + inout("ecx") byte_count => _, + inout("rdi") dest.add(count - 1) => _, + inout("rsi") src.add(count - 1) => _, + // We modify flags, but we restore it afterwards + options(att_syntax, nostack, preserves_flags) + ); +} + +#[inline(always)] +#[cfg(target_feature = "ermsb")] +pub unsafe fn set_bytes(dest: *mut u8, c: u8, count: usize) { + // FIXME: Use the Intel syntax once we drop LLVM 9 support on rust-lang/rust. + core::arch::asm!( + "repe stosb %al, (%rdi)", + inout("rcx") count => _, + inout("rdi") dest => _, + inout("al") c => _, + options(att_syntax, nostack, preserves_flags) + ) +} + +#[inline(always)] +#[cfg(not(target_feature = "ermsb"))] +pub unsafe fn set_bytes(mut dest: *mut u8, c: u8, count: usize) { + let c = c as u64 * 0x0101_0101_0101_0101; + let (pre_byte_count, qword_count, byte_count) = rep_param(dest, count); + // Separating the blocks gives the compiler more freedom to reorder instructions. + asm!( + "rep stosb", + inout("ecx") pre_byte_count => _, + inout("rdi") dest => dest, + in("rax") c, + options(att_syntax, nostack, preserves_flags) + ); + asm!( + "rep stosq", + inout("rcx") qword_count => _, + inout("rdi") dest => dest, + in("rax") c, + options(att_syntax, nostack, preserves_flags) + ); + asm!( + "rep stosb", + inout("ecx") byte_count => _, + inout("rdi") dest => _, + in("rax") c, + options(att_syntax, nostack, preserves_flags) + ); +} + +#[inline(always)] +pub unsafe fn compare_bytes(a: *const u8, b: *const u8, n: usize) -> i32 { + #[inline(always)] + unsafe fn cmp(mut a: *const T, mut b: *const T, n: usize, f: F) -> i32 + where + T: Clone + Copy + Eq, + U: Clone + Copy + Eq, + F: FnOnce(*const U, *const U, usize) -> i32, + { + // Ensure T is not a ZST. + const { assert!(mem::size_of::() != 0) }; + + let end = a.add(intrinsics::unchecked_div(n, mem::size_of::())); + while a != end { + if a.read_unaligned() != b.read_unaligned() { + return f(a.cast(), b.cast(), mem::size_of::()); + } + a = a.add(1); + b = b.add(1); + } + f( + a.cast(), + b.cast(), + intrinsics::unchecked_rem(n, mem::size_of::()), + ) + } + let c1 = |mut a: *const u8, mut b: *const u8, n| { + for _ in 0..n { + if a.read() != b.read() { + return i32::from(a.read()) - i32::from(b.read()); + } + a = a.add(1); + b = b.add(1); + } + 0 + }; + let c2 = |a: *const u16, b, n| cmp(a, b, n, c1); + let c4 = |a: *const u32, b, n| cmp(a, b, n, c2); + let c8 = |a: *const u64, b, n| cmp(a, b, n, c4); + let c16 = |a: *const u128, b, n| cmp(a, b, n, c8); + c16(a.cast(), b.cast(), n) +} + +// In order to process more than on byte simultaneously when executing strlen, +// two things must be considered: +// * An n byte read with an n-byte aligned address will never cross +// a page boundary and will always succeed. Any smaller alignment +// may result in a read that will cross a page boundary, which may +// trigger an access violation. +// * Surface Rust considers any kind of out-of-bounds read as undefined +// behaviour. To dodge this, memory access operations are written +// using inline assembly. + +#[cfg(target_feature = "sse2")] +#[inline(always)] +pub unsafe fn c_string_length(mut s: *const core::ffi::c_char) -> usize { + use core::arch::x86_64::{__m128i, _mm_cmpeq_epi8, _mm_movemask_epi8, _mm_set1_epi8}; + + let mut n = 0; + + // The use of _mm_movemask_epi8 and company allow for speedups, + // but they aren't cheap by themselves. Thus, possibly small strings + // are handled in simple loops. + + for _ in 0..4 { + if *s == 0 { + return n; + } + + n += 1; + s = s.add(1); + } + + // Shave of the least significand bits to align the address to a 16 + // byte boundary. The shaved of bits are used to correct the first iteration. + + let align = s as usize & 15; + let mut s = ((s as usize) - align) as *const __m128i; + let zero = _mm_set1_epi8(0); + + let x = { + let r; + asm!( + "movdqa ({addr}), {dest}", + addr = in(reg) s, + dest = out(xmm_reg) r, + options(att_syntax, nostack), + ); + r + }; + let cmp = _mm_movemask_epi8(_mm_cmpeq_epi8(x, zero)) >> align; + + if cmp != 0 { + return n + cmp.trailing_zeros() as usize; + } + + n += 16 - align; + s = s.add(1); + + loop { + let x = { + let r; + asm!( + "movdqa ({addr}), {dest}", + addr = in(reg) s, + dest = out(xmm_reg) r, + options(att_syntax, nostack), + ); + r + }; + let cmp = _mm_movemask_epi8(_mm_cmpeq_epi8(x, zero)) as u32; + if cmp == 0 { + n += 16; + s = s.add(1); + } else { + return n + cmp.trailing_zeros() as usize; + } + } +} + +// Provided for scenarios like kernel development, where SSE might not +// be available. +#[cfg(not(target_feature = "sse2"))] +#[inline(always)] +pub unsafe fn c_string_length(mut s: *const core::ffi::c_char) -> usize { + let mut n = 0; + + // Check bytes in steps of one until + // either a zero byte is discovered or + // pointer is aligned to an eight byte boundary. + + while s as usize & 7 != 0 { + if *s == 0 { + return n; + } + n += 1; + s = s.add(1); + } + + // Check bytes in steps of eight until a zero + // byte is discovered. + + let mut s = s as *const u64; + + loop { + let mut cs = { + let r: u64; + asm!( + "mov ({addr}), {dest}", + addr = in(reg) s, + dest = out(reg) r, + options(att_syntax, nostack), + ); + r + }; + // Detect if a word has a zero byte, taken from + // https://graphics.stanford.edu/~seander/bithacks.html + if (cs.wrapping_sub(0x0101010101010101) & !cs & 0x8080808080808080) != 0 { + loop { + if cs & 255 == 0 { + return n; + } else { + cs >>= 8; + n += 1; + } + } + } else { + n += 8; + s = s.add(1); + } + } +} + +/// Determine optimal parameters for a `rep` instruction. +fn rep_param(dest: *mut u8, mut count: usize) -> (usize, usize, usize) { + // Unaligned writes are still slow on modern processors, so align the destination address. + let pre_byte_count = ((8 - (dest as usize & 0b111)) & 0b111).min(count); + count -= pre_byte_count; + let qword_count = count >> 3; + let byte_count = count & 0b111; + (pre_byte_count, qword_count, byte_count) +} diff --git a/library/compiler-builtins/compiler-builtins/src/probestack.rs b/library/compiler-builtins/compiler-builtins/src/probestack.rs new file mode 100644 index 0000000000000..5b6abd21a1db3 --- /dev/null +++ b/library/compiler-builtins/compiler-builtins/src/probestack.rs @@ -0,0 +1,350 @@ +// Copyright 2017 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +//! This module defines the `__rust_probestack` intrinsic which is used in the +//! implementation of "stack probes" on certain platforms. +//! +//! The purpose of a stack probe is to provide a static guarantee that if a +//! thread has a guard page then a stack overflow is guaranteed to hit that +//! guard page. If a function did not have a stack probe then there's a risk of +//! having a stack frame *larger* than the guard page, so a function call could +//! skip over the guard page entirely and then later hit maybe the heap or +//! another thread, possibly leading to security vulnerabilities such as [The +//! Stack Clash], for example. +//! +//! [The Stack Clash]: https://blog.qualys.com/securitylabs/2017/06/19/the-stack-clash +//! +//! The `__rust_probestack` is called in the prologue of functions whose stack +//! size is larger than the guard page, for example larger than 4096 bytes on +//! x86. This function is then responsible for "touching" all pages relevant to +//! the stack to ensure that that if any of them are the guard page we'll hit +//! them guaranteed. +//! +//! The precise ABI for how this function operates is defined by LLVM. There's +//! no real documentation as to what this is, so you'd basically need to read +//! the LLVM source code for reference. Often though the test cases can be +//! illuminating as to the ABI that's generated, or just looking at the output +//! of `llc`. +//! +//! Note that `#[naked]` is typically used here for the stack probe because the +//! ABI corresponds to no actual ABI. +//! +//! Finally it's worth noting that at the time of this writing LLVM only has +//! support for stack probes on x86 and x86_64. There's no support for stack +//! probes on any other architecture like ARM or PowerPC64. LLVM I'm sure would +//! be more than welcome to accept such a change! + +#![cfg(not(feature = "mangled-names"))] +// Windows and Cygwin already has builtins to do this. +#![cfg(not(any(windows, target_os = "cygwin")))] +// All these builtins require assembly +#![cfg(not(feature = "no-asm"))] +// We only define stack probing for these architectures today. +#![cfg(any(target_arch = "x86_64", target_arch = "x86"))] + +extern "C" { + pub fn __rust_probestack(); +} + +// A wrapper for our implementation of __rust_probestack, which allows us to +// keep the assembly inline while controlling all CFI directives in the assembly +// emitted for the function. +// +// This is the ELF version. +#[cfg(not(any(target_vendor = "apple", target_os = "uefi")))] +macro_rules! define_rust_probestack { + ($body: expr) => { + concat!( + " + .pushsection .text.__rust_probestack + .globl __rust_probestack + .type __rust_probestack, @function + .hidden __rust_probestack + __rust_probestack: + ", + $body, + " + .size __rust_probestack, . - __rust_probestack + .popsection + " + ) + }; +} + +#[cfg(all(target_os = "uefi", target_arch = "x86_64"))] +macro_rules! define_rust_probestack { + ($body: expr) => { + concat!( + " + .globl __rust_probestack + __rust_probestack: + ", + $body + ) + }; +} + +// Same as above, but for Mach-O. Note that the triple underscore +// is deliberate +#[cfg(target_vendor = "apple")] +macro_rules! define_rust_probestack { + ($body: expr) => { + concat!( + " + .globl ___rust_probestack + ___rust_probestack: + ", + $body + ) + }; +} + +// In UEFI x86 arch, triple underscore is deliberate. +#[cfg(all(target_os = "uefi", target_arch = "x86"))] +macro_rules! define_rust_probestack { + ($body: expr) => { + concat!( + " + .globl ___rust_probestack + ___rust_probestack: + ", + $body + ) + }; +} + +// Our goal here is to touch each page between %rsp+8 and %rsp+8-%rax, +// ensuring that if any pages are unmapped we'll make a page fault. +// +// The ABI here is that the stack frame size is located in `%rax`. Upon +// return we're not supposed to modify `%rsp` or `%rax`. +// +// Any changes to this function should be replicated to the SGX version below. +#[cfg(all( + target_arch = "x86_64", + not(all(target_env = "sgx", target_vendor = "fortanix")) +))] +core::arch::global_asm!( + define_rust_probestack!( + " + .cfi_startproc + pushq %rbp + .cfi_adjust_cfa_offset 8 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp + + mov %rax,%r11 // duplicate %rax as we're clobbering %r11 + + // Main loop, taken in one page increments. We're decrementing rsp by + // a page each time until there's less than a page remaining. We're + // guaranteed that this function isn't called unless there's more than a + // page needed. + // + // Note that we're also testing against `8(%rsp)` to account for the 8 + // bytes pushed on the stack orginally with our return address. Using + // `8(%rsp)` simulates us testing the stack pointer in the caller's + // context. + + // It's usually called when %rax >= 0x1000, but that's not always true. + // Dynamic stack allocation, which is needed to implement unsized + // rvalues, triggers stackprobe even if %rax < 0x1000. + // Thus we have to check %r11 first to avoid segfault. + cmp $0x1000,%r11 + jna 3f +2: + sub $0x1000,%rsp + test %rsp,8(%rsp) + sub $0x1000,%r11 + cmp $0x1000,%r11 + ja 2b + +3: + // Finish up the last remaining stack space requested, getting the last + // bits out of r11 + sub %r11,%rsp + test %rsp,8(%rsp) + + // Restore the stack pointer to what it previously was when entering + // this function. The caller will readjust the stack pointer after we + // return. + add %rax,%rsp + + leave + .cfi_def_cfa_register %rsp + .cfi_adjust_cfa_offset -8 + ret + .cfi_endproc + " + ), + options(att_syntax) +); + +// This function is the same as above, except that some instructions are +// [manually patched for LVI]. +// +// [manually patched for LVI]: https://software.intel.com/security-software-guidance/insights/deep-dive-load-value-injection#specialinstructions +#[cfg(all( + target_arch = "x86_64", + all(target_env = "sgx", target_vendor = "fortanix") +))] +core::arch::global_asm!( + define_rust_probestack!( + " + .cfi_startproc + pushq %rbp + .cfi_adjust_cfa_offset 8 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp + + mov %rax,%r11 // duplicate %rax as we're clobbering %r11 + + // Main loop, taken in one page increments. We're decrementing rsp by + // a page each time until there's less than a page remaining. We're + // guaranteed that this function isn't called unless there's more than a + // page needed. + // + // Note that we're also testing against `8(%rsp)` to account for the 8 + // bytes pushed on the stack orginally with our return address. Using + // `8(%rsp)` simulates us testing the stack pointer in the caller's + // context. + + // It's usually called when %rax >= 0x1000, but that's not always true. + // Dynamic stack allocation, which is needed to implement unsized + // rvalues, triggers stackprobe even if %rax < 0x1000. + // Thus we have to check %r11 first to avoid segfault. + cmp $0x1000,%r11 + jna 3f +2: + sub $0x1000,%rsp + test %rsp,8(%rsp) + sub $0x1000,%r11 + cmp $0x1000,%r11 + ja 2b + +3: + // Finish up the last remaining stack space requested, getting the last + // bits out of r11 + sub %r11,%rsp + test %rsp,8(%rsp) + + // Restore the stack pointer to what it previously was when entering + // this function. The caller will readjust the stack pointer after we + // return. + add %rax,%rsp + + leave + .cfi_def_cfa_register %rsp + .cfi_adjust_cfa_offset -8 + pop %r11 + lfence + jmp *%r11 + .cfi_endproc + " + ), + options(att_syntax) +); + +#[cfg(all(target_arch = "x86", not(target_os = "uefi")))] +// This is the same as x86_64 above, only translated for 32-bit sizes. Note +// that on Unix we're expected to restore everything as it was, this +// function basically can't tamper with anything. +// +// The ABI here is the same as x86_64, except everything is 32-bits large. +core::arch::global_asm!( + define_rust_probestack!( + " + .cfi_startproc + push %ebp + .cfi_adjust_cfa_offset 4 + .cfi_offset %ebp, -8 + mov %esp, %ebp + .cfi_def_cfa_register %ebp + push %ecx + mov %eax,%ecx + + cmp $0x1000,%ecx + jna 3f +2: + sub $0x1000,%esp + test %esp,8(%esp) + sub $0x1000,%ecx + cmp $0x1000,%ecx + ja 2b + +3: + sub %ecx,%esp + test %esp,8(%esp) + + add %eax,%esp + pop %ecx + leave + .cfi_def_cfa_register %esp + .cfi_adjust_cfa_offset -4 + ret + .cfi_endproc + " + ), + options(att_syntax) +); + +#[cfg(all(target_arch = "x86", target_os = "uefi"))] +// UEFI target is windows like target. LLVM will do _chkstk things like windows. +// probestack function will also do things like _chkstk in MSVC. +// So we need to sub %ax %sp in probestack when arch is x86. +// +// REF: Rust commit(74e80468347) +// rust\src\llvm-project\llvm\lib\Target\X86\X86FrameLowering.cpp: 805 +// Comments in LLVM: +// MSVC x32's _chkstk and cygwin/mingw's _alloca adjust %esp themselves. +// MSVC x64's __chkstk and cygwin/mingw's ___chkstk_ms do not adjust %rsp +// themselves. +core::arch::global_asm!( + define_rust_probestack!( + " + .cfi_startproc + push %ebp + .cfi_adjust_cfa_offset 4 + .cfi_offset %ebp, -8 + mov %esp, %ebp + .cfi_def_cfa_register %ebp + push %ecx + push %edx + mov %eax,%ecx + + cmp $0x1000,%ecx + jna 3f +2: + sub $0x1000,%esp + test %esp,8(%esp) + sub $0x1000,%ecx + cmp $0x1000,%ecx + ja 2b + +3: + sub %ecx,%esp + test %esp,8(%esp) + mov 4(%ebp),%edx + mov %edx, 12(%esp) + add %eax,%esp + pop %edx + pop %ecx + leave + + sub %eax, %esp + .cfi_def_cfa_register %esp + .cfi_adjust_cfa_offset -4 + ret + .cfi_endproc + " + ), + options(att_syntax) +); diff --git a/library/compiler-builtins/compiler-builtins/src/riscv.rs b/library/compiler-builtins/compiler-builtins/src/riscv.rs new file mode 100644 index 0000000000000..bf31255334193 --- /dev/null +++ b/library/compiler-builtins/compiler-builtins/src/riscv.rs @@ -0,0 +1,50 @@ +intrinsics! { + // Ancient Egyptian/Ethiopian/Russian multiplication method + // see https://en.wikipedia.org/wiki/Ancient_Egyptian_multiplication + // + // This is a long-available stock algorithm; e.g. it is documented in + // Knuth's "The Art of Computer Programming" volume 2 (under the section + // "Evaluation of Powers") since at least the 2nd edition (1981). + // + // The main attraction of this method is that it implements (software) + // multiplication atop four simple operations: doubling, halving, checking + // if a value is even/odd, and addition. This is *not* considered to be the + // fastest multiplication method, but it may be amongst the simplest (and + // smallest with respect to code size). + // + // for reference, see also implementation from gcc + // https://raw.githubusercontent.com/gcc-mirror/gcc/master/libgcc/config/epiphany/mulsi3.c + // + // and from LLVM (in relatively readable RISC-V assembly): + // https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/builtins/riscv/int_mul_impl.inc + pub extern "C" fn __mulsi3(a: u32, b: u32) -> u32 { + let (mut a, mut b) = (a, b); + let mut r: u32 = 0; + + while a > 0 { + if a & 1 > 0 { + r = r.wrapping_add(b); + } + a >>= 1; + b <<= 1; + } + + r + } + + #[cfg(not(target_feature = "m"))] + pub extern "C" fn __muldi3(a: u64, b: u64) -> u64 { + let (mut a, mut b) = (a, b); + let mut r: u64 = 0; + + while a > 0 { + if a & 1 > 0 { + r = r.wrapping_add(b); + } + a >>= 1; + b <<= 1; + } + + r + } +} diff --git a/library/compiler-builtins/compiler-builtins/src/x86.rs b/library/compiler-builtins/compiler-builtins/src/x86.rs new file mode 100644 index 0000000000000..01152d9c79869 --- /dev/null +++ b/library/compiler-builtins/compiler-builtins/src/x86.rs @@ -0,0 +1,53 @@ +#![allow(unused_imports)] + +use core::intrinsics; + +// NOTE These functions are implemented using assembly because they using a custom +// calling convention which can't be implemented using a normal Rust function + +// NOTE These functions are never mangled as they are not tested against compiler-rt + +intrinsics! { + #[unsafe(naked)] + #[cfg(all( + any(all(windows, target_env = "gnu"), target_os = "uefi"), + not(feature = "no-asm") + ))] + pub unsafe extern "C" fn __chkstk() { + core::arch::naked_asm!( + "jmp __alloca", // Jump to __alloca since fallthrough may be unreliable" + options(att_syntax) + ); + } + + #[unsafe(naked)] + #[cfg(all( + any(all(windows, target_env = "gnu"), target_os = "uefi"), + not(feature = "no-asm") + ))] + pub unsafe extern "C" fn _alloca() { + // __chkstk and _alloca are the same function + core::arch::naked_asm!( + "push %ecx", + "cmp $0x1000,%eax", + "lea 8(%esp),%ecx", // esp before calling this routine -> ecx + "jb 1f", + "2:", + "sub $0x1000,%ecx", + "test %ecx,(%ecx)", + "sub $0x1000,%eax", + "cmp $0x1000,%eax", + "ja 2b", + "1:", + "sub %eax,%ecx", + "test %ecx,(%ecx)", + "lea 4(%esp),%eax", // load pointer to the return address into eax + "mov %ecx,%esp", // install the new top of stack pointer into esp + "mov -4(%eax),%ecx", // restore ecx + "push (%eax)", // push return address onto the stack + "sub %esp,%eax", // restore the original value in eax + "ret", + options(att_syntax) + ); + } +} diff --git a/library/compiler-builtins/compiler-builtins/src/x86_64.rs b/library/compiler-builtins/compiler-builtins/src/x86_64.rs new file mode 100644 index 0000000000000..fc1190f79b237 --- /dev/null +++ b/library/compiler-builtins/compiler-builtins/src/x86_64.rs @@ -0,0 +1,51 @@ +#![allow(unused_imports)] + +use core::intrinsics; + +// NOTE These functions are implemented using assembly because they using a custom +// calling convention which can't be implemented using a normal Rust function + +// NOTE These functions are never mangled as they are not tested against compiler-rt + +intrinsics! { + #[unsafe(naked)] + #[cfg(all( + any( + all(windows, target_env = "gnu"), + target_os = "cygwin", + target_os = "uefi" + ), + not(feature = "no-asm") + ))] + pub unsafe extern "C" fn ___chkstk_ms() { + core::arch::naked_asm!( + "push %rcx", + "push %rax", + "cmp $0x1000,%rax", + "lea 24(%rsp),%rcx", + "jb 1f", + "2:", + "sub $0x1000,%rcx", + "test %rcx,(%rcx)", + "sub $0x1000,%rax", + "cmp $0x1000,%rax", + "ja 2b", + "1:", + "sub %rax,%rcx", + "test %rcx,(%rcx)", + "pop %rax", + "pop %rcx", + "ret", + options(att_syntax) + ); + } +} + +// HACK(https://github.com/rust-lang/rust/issues/62785): x86_64-unknown-uefi needs special LLVM +// support unless we emit the _fltused +mod _fltused { + #[unsafe(no_mangle)] + #[used] + #[cfg(target_os = "uefi")] + static _fltused: i32 = 0; +} diff --git a/library/compiler-builtins/crates/libm-macros/Cargo.toml b/library/compiler-builtins/crates/libm-macros/Cargo.toml new file mode 100644 index 0000000000000..3929854f08e65 --- /dev/null +++ b/library/compiler-builtins/crates/libm-macros/Cargo.toml @@ -0,0 +1,22 @@ +[package] +name = "libm-macros" +version = "0.1.0" +edition = "2024" +publish = false +license = "MIT OR Apache-2.0" + +[lib] +proc-macro = true + +[dependencies] +heck = "0.5.0" +proc-macro2 = "1.0.94" +quote = "1.0.40" +syn = { version = "2.0.100", features = ["full", "extra-traits", "visit-mut"] } + +[lints.rust] +# Values used during testing +unexpected_cfgs = { level = "warn", check-cfg = [ + 'cfg(f16_enabled)', + 'cfg(f128_enabled)', +] } diff --git a/library/compiler-builtins/crates/libm-macros/src/enums.rs b/library/compiler-builtins/crates/libm-macros/src/enums.rs new file mode 100644 index 0000000000000..b4646f984d471 --- /dev/null +++ b/library/compiler-builtins/crates/libm-macros/src/enums.rs @@ -0,0 +1,171 @@ +use heck::ToUpperCamelCase; +use proc_macro2 as pm2; +use proc_macro2::{Ident, Span}; +use quote::quote; +use syn::spanned::Spanned; +use syn::{Fields, ItemEnum, Variant}; + +use crate::{ALL_OPERATIONS, base_name}; + +/// Implement `#[function_enum]`, see documentation in `lib.rs`. +pub fn function_enum( + mut item: ItemEnum, + attributes: pm2::TokenStream, +) -> syn::Result { + expect_empty_enum(&item)?; + let attr_span = attributes.span(); + let mut attr = attributes.into_iter(); + + // Attribute should be the identifier of the `BaseName` enum. + let Some(tt) = attr.next() else { + return Err(syn::Error::new(attr_span, "expected one attribute")); + }; + + let pm2::TokenTree::Ident(base_enum) = tt else { + return Err(syn::Error::new(tt.span(), "expected an identifier")); + }; + + if let Some(tt) = attr.next() { + return Err(syn::Error::new( + tt.span(), + "unexpected token after identifier", + )); + } + + let enum_name = &item.ident; + let mut as_str_arms = Vec::new(); + let mut from_str_arms = Vec::new(); + let mut base_arms = Vec::new(); + + for func in ALL_OPERATIONS.iter() { + let fn_name = func.name; + let ident = Ident::new(&fn_name.to_upper_camel_case(), Span::call_site()); + let bname_ident = Ident::new(&base_name(fn_name).to_upper_camel_case(), Span::call_site()); + + // Match arm for `fn as_str(self)` matcher + as_str_arms.push(quote! { Self::#ident => #fn_name }); + from_str_arms.push(quote! { #fn_name => Self::#ident }); + + // Match arm for `fn base_name(self)` matcher + base_arms.push(quote! { Self::#ident => #base_enum::#bname_ident }); + + let variant = Variant { + attrs: Vec::new(), + ident, + fields: Fields::Unit, + discriminant: None, + }; + + item.variants.push(variant); + } + + let variants = item.variants.iter(); + + let res = quote! { + // Instantiate the enum + #item + + impl #enum_name { + /// All variants of this enum. + pub const ALL: &[Self] = &[ + #( Self::#variants, )* + ]; + + /// The stringified version of this function name. + pub const fn as_str(self) -> &'static str { + match self { + #( #as_str_arms , )* + } + } + + /// If `s` is the name of a function, return it. + pub fn from_str(s: &str) -> Option { + let ret = match s { + #( #from_str_arms , )* + _ => return None, + }; + Some(ret) + } + + /// The base name enum for this function. + pub const fn base_name(self) -> #base_enum { + match self { + #( #base_arms, )* + } + } + + /// Return information about this operation. + pub fn math_op(self) -> &'static crate::op::MathOpInfo { + crate::op::ALL_OPERATIONS.iter().find(|op| op.name == self.as_str()).unwrap() + } + } + }; + + Ok(res) +} + +/// Implement `#[base_name_enum]`, see documentation in `lib.rs`. +pub fn base_name_enum( + mut item: ItemEnum, + attributes: pm2::TokenStream, +) -> syn::Result { + expect_empty_enum(&item)?; + if !attributes.is_empty() { + let sp = attributes.span(); + return Err(syn::Error::new(sp.span(), "no attributes expected")); + } + + let mut base_names: Vec<_> = ALL_OPERATIONS + .iter() + .map(|func| base_name(func.name)) + .collect(); + base_names.sort_unstable(); + base_names.dedup(); + + let item_name = &item.ident; + let mut as_str_arms = Vec::new(); + + for base_name in base_names { + let ident = Ident::new(&base_name.to_upper_camel_case(), Span::call_site()); + + // Match arm for `fn as_str(self)` matcher + as_str_arms.push(quote! { Self::#ident => #base_name }); + + let variant = Variant { + attrs: Vec::new(), + ident, + fields: Fields::Unit, + discriminant: None, + }; + + item.variants.push(variant); + } + + let res = quote! { + // Instantiate the enum + #item + + impl #item_name { + /// The stringified version of this base name. + pub const fn as_str(self) -> &'static str { + match self { + #( #as_str_arms ),* + } + } + } + }; + + Ok(res) +} + +/// Verify that an enum is empty, otherwise return an error +fn expect_empty_enum(item: &ItemEnum) -> syn::Result<()> { + if !item.variants.is_empty() { + Err(syn::Error::new( + item.variants.span(), + "expected an empty enum", + )) + } else { + Ok(()) + } +} diff --git a/library/compiler-builtins/crates/libm-macros/src/lib.rs b/library/compiler-builtins/crates/libm-macros/src/lib.rs new file mode 100644 index 0000000000000..482da974ca89f --- /dev/null +++ b/library/compiler-builtins/crates/libm-macros/src/lib.rs @@ -0,0 +1,504 @@ +#![feature(let_chains)] + +mod enums; +mod parse; +mod shared; + +use parse::{Invocation, StructuredInput}; +use proc_macro as pm; +use proc_macro2::{self as pm2, Span}; +use quote::{ToTokens, quote}; +pub(crate) use shared::{ALL_OPERATIONS, FloatTy, MathOpInfo, Ty}; +use syn::spanned::Spanned; +use syn::visit_mut::VisitMut; +use syn::{Ident, ItemEnum}; + +const KNOWN_TYPES: &[&str] = &[ + "FTy", "CFn", "CArgs", "CRet", "RustFn", "RustArgs", "RustRet", "public", +]; + +/// Populate an enum with a variant representing function. Names are in upper camel case. +/// +/// Applied to an empty enum. Expects one attribute `#[function_enum(BaseName)]` that provides +/// the name of the `BaseName` enum. +#[proc_macro_attribute] +pub fn function_enum(attributes: pm::TokenStream, tokens: pm::TokenStream) -> pm::TokenStream { + let item = syn::parse_macro_input!(tokens as ItemEnum); + let res = enums::function_enum(item, attributes.into()); + + match res { + Ok(ts) => ts, + Err(e) => e.into_compile_error(), + } + .into() +} + +/// Create an enum representing all possible base names, with names in upper camel case. +/// +/// Applied to an empty enum. +#[proc_macro_attribute] +pub fn base_name_enum(attributes: pm::TokenStream, tokens: pm::TokenStream) -> pm::TokenStream { + let item = syn::parse_macro_input!(tokens as ItemEnum); + let res = enums::base_name_enum(item, attributes.into()); + + match res { + Ok(ts) => ts, + Err(e) => e.into_compile_error(), + } + .into() +} + +/// Do something for each function present in this crate. +/// +/// Takes a callback macro and invokes it multiple times, once for each function that +/// this crate exports. This makes it easy to create generic tests, benchmarks, or other checks +/// and apply it to each symbol. +/// +/// Additionally, the `extra` and `fn_extra` patterns can make use of magic identifiers: +/// +/// - `MACRO_FN_NAME`: gets replaced with the name of the function on that invocation. +/// - `MACRO_FN_NAME_NORMALIZED`: similar to the above, but removes sufixes so e.g. `sinf` becomes +/// `sin`, `cosf128` becomes `cos`, etc. +/// +/// Invoke as: +/// +/// ``` +/// // Macro that is invoked once per function +/// macro_rules! callback_macro { +/// ( +/// // Name of that function +/// fn_name: $fn_name:ident, +/// // The basic float type for this function (e.g. `f32`, `f64`) +/// FTy: $FTy:ty, +/// // Function signature of the C version (e.g. `fn(f32, &mut f32) -> f32`) +/// CFn: $CFn:ty, +/// // A tuple representing the C version's arguments (e.g. `(f32, &mut f32)`) +/// CArgs: $CArgs:ty, +/// // The C version's return type (e.g. `f32`) +/// CRet: $CRet:ty, +/// // Function signature of the Rust version (e.g. `fn(f32) -> (f32, f32)`) +/// RustFn: $RustFn:ty, +/// // A tuple representing the Rust version's arguments (e.g. `(f32,)`) +/// RustArgs: $RustArgs:ty, +/// // The Rust version's return type (e.g. `(f32, f32)`) +/// RustRet: $RustRet:ty, +/// // True if this is part of `libm`'s public API +/// public: $public:expr, +/// // Attributes for the current function, if any +/// attrs: [$($attr:meta),*], +/// // Extra tokens passed directly (if any) +/// extra: [$extra:ident], +/// // Extra function-tokens passed directly (if any) +/// fn_extra: $fn_extra:expr, +/// ) => { }; +/// } +/// +/// // All fields except for `callback` are optional. +/// libm_macros::for_each_function! { +/// // The macro to invoke as a callback +/// callback: callback_macro, +/// // Which types to include either as a list (`[CFn, RustFn, RustArgs]`) or "all" +/// emit_types: all, +/// // Functions to skip, i.e. `callback` shouldn't be called at all for these. +/// skip: [sin, cos], +/// // Attributes passed as `attrs` for specific functions. For example, here the invocation +/// // with `sinf` and that with `cosf` will both get `meta1` and `meta2`, but no others will. +/// // +/// // Note that `f16_enabled` and `f128_enabled` will always get emitted regardless of whether +/// // or not this is specified. +/// attributes: [ +/// #[meta1] +/// #[meta2] +/// [sinf, cosf], +/// ], +/// // Any tokens that should be passed directly to all invocations of the callback. This can +/// // be used to pass local variables or other things the macro needs access to. +/// extra: [foo], +/// // Similar to `extra`, but allow providing a pattern for only specific functions. Uses +/// // a simplified match-like syntax. +/// fn_extra: match MACRO_FN_NAME { +/// hypot | hypotf => |x| x.hypot(), +/// // `ALL_*` magic matchers also work to extract specific types +/// ALL_F64 => |x| x, +/// // The default pattern gets applied to everything that did not match +/// _ => |x| x, +/// }, +/// } +/// ``` +#[proc_macro] +pub fn for_each_function(tokens: pm::TokenStream) -> pm::TokenStream { + let input = syn::parse_macro_input!(tokens as Invocation); + + let res = StructuredInput::from_fields(input) + .and_then(|mut s_in| validate(&mut s_in).map(|fn_list| (s_in, fn_list))) + .and_then(|(s_in, fn_list)| expand(s_in, &fn_list)); + + match res { + Ok(ts) => ts.into(), + Err(e) => e.into_compile_error().into(), + } +} + +/// Check for any input that is structurally correct but has other problems. +/// +/// Returns the list of function names that we should expand for. +fn validate(input: &mut StructuredInput) -> syn::Result> { + // Replace magic mappers with a list of relevant functions. + if let Some(map) = &mut input.fn_extra { + for (name, ty) in [ + ("ALL_F16", FloatTy::F16), + ("ALL_F32", FloatTy::F32), + ("ALL_F64", FloatTy::F64), + ("ALL_F128", FloatTy::F128), + ] { + let Some(k) = map.keys().find(|key| *key == name) else { + continue; + }; + + let key = k.clone(); + let val = map.remove(&key).unwrap(); + + for op in ALL_OPERATIONS.iter().filter(|op| op.float_ty == ty) { + map.insert(Ident::new(op.name, key.span()), val.clone()); + } + } + } + + // Collect lists of all functions that are provied as macro inputs in various fields (only, + // skip, attributes). + let attr_mentions = input + .attributes + .iter() + .flat_map(|map_list| map_list.iter()) + .flat_map(|attr_map| attr_map.names.iter()); + let only_mentions = input.only.iter().flat_map(|only_list| only_list.iter()); + let fn_extra_mentions = input + .fn_extra + .iter() + .flat_map(|v| v.keys()) + .filter(|name| *name != "_"); + let all_mentioned_fns = input + .skip + .iter() + .chain(only_mentions) + .chain(attr_mentions) + .chain(fn_extra_mentions); + + // Make sure that every function mentioned is a real function + for mentioned in all_mentioned_fns { + if !ALL_OPERATIONS.iter().any(|func| mentioned == func.name) { + let e = syn::Error::new( + mentioned.span(), + format!("unrecognized function name `{mentioned}`"), + ); + return Err(e); + } + } + + if !input.skip.is_empty() && input.only.is_some() { + let e = syn::Error::new( + input.only_span.unwrap(), + "only one of `skip` or `only` may be specified", + ); + return Err(e); + } + + // Construct a list of what we intend to expand + let mut fn_list = Vec::new(); + for func in ALL_OPERATIONS.iter() { + let fn_name = func.name; + // If we have an `only` list and it does _not_ contain this function name, skip it + if input + .only + .as_ref() + .is_some_and(|only| !only.iter().any(|o| o == fn_name)) + { + continue; + } + + // If there is a `skip` list that contains this function name, skip it + if input.skip.iter().any(|s| s == fn_name) { + continue; + } + + // Omit f16 and f128 functions if requested + if input.skip_f16_f128 && (func.float_ty == FloatTy::F16 || func.float_ty == FloatTy::F128) + { + continue; + } + + // Run everything else + fn_list.push(func); + } + + // Types that the user would like us to provide in the macro + let mut add_all_types = false; + for ty in &input.emit_types { + let ty_name = ty.to_string(); + if ty_name == "all" { + add_all_types = true; + continue; + } + + // Check that all requested types are valid + if !KNOWN_TYPES.contains(&ty_name.as_str()) { + let e = syn::Error::new( + ty_name.span(), + format!("unrecognized type identifier `{ty_name}`"), + ); + return Err(e); + } + } + + if add_all_types { + // Ensure that if `all` was specified that nothing else was + if input.emit_types.len() > 1 { + let e = syn::Error::new( + input.emit_types_span.unwrap(), + "if `all` is specified, no other type identifiers may be given", + ); + return Err(e); + } + + // ...and then add all types + input.emit_types.clear(); + for ty in KNOWN_TYPES { + let ident = Ident::new(ty, Span::call_site()); + input.emit_types.push(ident); + } + } + + if let Some(map) = &input.fn_extra + && !map.keys().any(|key| key == "_") + { + // No default provided; make sure every expected function is covered + let mut fns_not_covered = Vec::new(); + for func in &fn_list { + if !map.keys().any(|key| key == func.name) { + // `name` was not mentioned in the `match` statement + fns_not_covered.push(func); + } + } + + if !fns_not_covered.is_empty() { + let e = syn::Error::new( + input.fn_extra_span.unwrap(), + format!( + "`fn_extra`: no default `_` pattern specified and the following \ + patterns are not covered: {fns_not_covered:#?}" + ), + ); + return Err(e); + } + }; + + Ok(fn_list) +} + +/// Expand our structured macro input into invocations of the callback macro. +fn expand(input: StructuredInput, fn_list: &[&MathOpInfo]) -> syn::Result { + let mut out = pm2::TokenStream::new(); + let default_ident = Ident::new("_", Span::call_site()); + let callback = input.callback; + + for func in fn_list { + let fn_name = Ident::new(func.name, Span::call_site()); + + // Prepare attributes in an `attrs: ...` field + let mut meta_fields = Vec::new(); + if let Some(attrs) = &input.attributes { + let meta_iter = attrs + .iter() + .filter(|map| map.names.contains(&fn_name)) + .flat_map(|map| &map.meta) + .map(|v| v.into_token_stream()); + + meta_fields.extend(meta_iter); + } + + // Always emit f16 and f128 meta so this doesn't need to be repeated everywhere + if func.rust_sig.args.contains(&Ty::F16) || func.rust_sig.returns.contains(&Ty::F16) { + let ts = quote! { cfg(f16_enabled) }; + meta_fields.push(ts); + } + if func.rust_sig.args.contains(&Ty::F128) || func.rust_sig.returns.contains(&Ty::F128) { + let ts = quote! { cfg(f128_enabled) }; + meta_fields.push(ts); + } + + let meta_field = quote! { attrs: [ #( #meta_fields ),* ], }; + + // Prepare extra in an `extra: ...` field, running the replacer + let extra_field = match input.extra.clone() { + Some(mut extra) => { + let mut v = MacroReplace::new(func.name); + v.visit_expr_mut(&mut extra); + v.finish()?; + + quote! { extra: #extra, } + } + None => pm2::TokenStream::new(), + }; + + // Prepare function-specific extra in a `fn_extra: ...` field, running the replacer + let fn_extra_field = match input.fn_extra { + Some(ref map) => { + let mut fn_extra = map + .get(&fn_name) + .or_else(|| map.get(&default_ident)) + .unwrap() + .clone(); + + let mut v = MacroReplace::new(func.name); + v.visit_expr_mut(&mut fn_extra); + v.finish()?; + + quote! { fn_extra: #fn_extra, } + } + None => pm2::TokenStream::new(), + }; + + let base_fty = func.float_ty; + let c_args = &func.c_sig.args; + let c_ret = &func.c_sig.returns; + let rust_args = &func.rust_sig.args; + let rust_ret = &func.rust_sig.returns; + let public = func.public; + + let mut ty_fields = Vec::new(); + for ty in &input.emit_types { + let field = match ty.to_string().as_str() { + "FTy" => quote! { FTy: #base_fty, }, + "CFn" => quote! { CFn: fn( #(#c_args),* ,) -> ( #(#c_ret),* ), }, + "CArgs" => quote! { CArgs: ( #(#c_args),* ,), }, + "CRet" => quote! { CRet: ( #(#c_ret),* ), }, + "RustFn" => quote! { RustFn: fn( #(#rust_args),* ,) -> ( #(#rust_ret),* ), }, + "RustArgs" => quote! { RustArgs: ( #(#rust_args),* ,), }, + "RustRet" => quote! { RustRet: ( #(#rust_ret),* ), }, + "public" => quote! { public: #public, }, + _ => unreachable!("checked in validation"), + }; + ty_fields.push(field); + } + + let new = quote! { + #callback! { + fn_name: #fn_name, + #( #ty_fields )* + #meta_field + #extra_field + #fn_extra_field + } + }; + + out.extend(new); + } + + Ok(out) +} + +/// Visitor to replace "magic" identifiers that we allow: `MACRO_FN_NAME` and +/// `MACRO_FN_NAME_NORMALIZED`. +struct MacroReplace { + fn_name: &'static str, + /// Remove the trailing `f` or `f128` to make + norm_name: String, + error: Option, +} + +impl MacroReplace { + fn new(name: &'static str) -> Self { + let norm_name = base_name(name); + Self { + fn_name: name, + norm_name: norm_name.to_owned(), + error: None, + } + } + + fn finish(self) -> syn::Result<()> { + match self.error { + Some(e) => Err(e), + None => Ok(()), + } + } + + fn visit_ident_inner(&mut self, i: &mut Ident) { + let s = i.to_string(); + if !s.starts_with("MACRO") || self.error.is_some() { + return; + } + + match s.as_str() { + "MACRO_FN_NAME" => *i = Ident::new(self.fn_name, i.span()), + "MACRO_FN_NAME_NORMALIZED" => *i = Ident::new(&self.norm_name, i.span()), + _ => { + self.error = Some(syn::Error::new( + i.span(), + format!("unrecognized meta expression `{s}`"), + )); + } + } + } +} + +impl VisitMut for MacroReplace { + fn visit_ident_mut(&mut self, i: &mut Ident) { + self.visit_ident_inner(i); + syn::visit_mut::visit_ident_mut(self, i); + } +} + +/// Return the unsuffixed version of a function name; e.g. `abs` and `absf` both return `abs`, +/// `lgamma_r` and `lgammaf_r` both return `lgamma_r`. +fn base_name(name: &str) -> &str { + let known_mappings = &[ + ("erff", "erf"), + ("erf", "erf"), + ("lgammaf_r", "lgamma_r"), + ("modff", "modf"), + ("modf", "modf"), + ]; + + match known_mappings.iter().find(|known| known.0 == name) { + Some(found) => found.1, + None => name + .strip_suffix("f") + .or_else(|| name.strip_suffix("f16")) + .or_else(|| name.strip_suffix("f128")) + .unwrap_or(name), + } +} + +impl ToTokens for Ty { + fn to_tokens(&self, tokens: &mut pm2::TokenStream) { + let ts = match self { + Ty::F16 => quote! { f16 }, + Ty::F32 => quote! { f32 }, + Ty::F64 => quote! { f64 }, + Ty::F128 => quote! { f128 }, + Ty::I32 => quote! { i32 }, + Ty::CInt => quote! { ::core::ffi::c_int }, + Ty::MutF16 => quote! { &'a mut f16 }, + Ty::MutF32 => quote! { &'a mut f32 }, + Ty::MutF64 => quote! { &'a mut f64 }, + Ty::MutF128 => quote! { &'a mut f128 }, + Ty::MutI32 => quote! { &'a mut i32 }, + Ty::MutCInt => quote! { &'a mut core::ffi::c_int }, + }; + + tokens.extend(ts); + } +} +impl ToTokens for FloatTy { + fn to_tokens(&self, tokens: &mut pm2::TokenStream) { + let ts = match self { + FloatTy::F16 => quote! { f16 }, + FloatTy::F32 => quote! { f32 }, + FloatTy::F64 => quote! { f64 }, + FloatTy::F128 => quote! { f128 }, + }; + + tokens.extend(ts); + } +} diff --git a/library/compiler-builtins/crates/libm-macros/src/parse.rs b/library/compiler-builtins/crates/libm-macros/src/parse.rs new file mode 100644 index 0000000000000..4876f3ef7263a --- /dev/null +++ b/library/compiler-builtins/crates/libm-macros/src/parse.rs @@ -0,0 +1,296 @@ +use std::collections::BTreeMap; + +use proc_macro2::Span; +use quote::ToTokens; +use syn::parse::{Parse, ParseStream, Parser}; +use syn::punctuated::Punctuated; +use syn::spanned::Spanned; +use syn::token::{self, Comma}; +use syn::{Arm, Attribute, Expr, ExprMatch, Ident, LitBool, Meta, Token, bracketed}; + +/// The input to our macro; just a list of `field: value` items. +#[derive(Debug)] +pub struct Invocation { + fields: Punctuated, +} + +impl Parse for Invocation { + fn parse(input: ParseStream) -> syn::Result { + Ok(Self { + fields: input.parse_terminated(Mapping::parse, Token![,])?, + }) + } +} + +/// A `key: expression` mapping with nothing else. Basically a simplified `syn::Field`. +#[derive(Debug)] +struct Mapping { + name: Ident, + _sep: Token![:], + expr: Expr, +} + +impl Parse for Mapping { + fn parse(input: ParseStream) -> syn::Result { + Ok(Self { + name: input.parse()?, + _sep: input.parse()?, + expr: input.parse()?, + }) + } +} + +/// The input provided to our proc macro, after parsing into the form we expect. +#[derive(Debug)] +pub struct StructuredInput { + /// Macro to invoke once per function + pub callback: Ident, + /// Whether or not to provide `CFn` `CArgs` `RustFn` etc. This is really only needed + /// once for crate to set up the main trait. + pub emit_types: Vec, + /// Skip these functions + pub skip: Vec, + /// If true, omit f16 and f128 functions that aren't present in other libraries. + pub skip_f16_f128: bool, + /// Invoke only for these functions + pub only: Option>, + /// Attributes that get applied to specific functions + pub attributes: Option>, + /// Extra expressions to pass to all invocations of the macro + pub extra: Option, + /// Per-function extra expressions to pass to the macro + pub fn_extra: Option>, + // For diagnostics + pub emit_types_span: Option, + pub only_span: Option, + pub fn_extra_span: Option, +} + +impl StructuredInput { + pub fn from_fields(input: Invocation) -> syn::Result { + let mut map: Vec<_> = input.fields.into_iter().collect(); + let cb_expr = expect_field(&mut map, "callback")?; + let emit_types_expr = expect_field(&mut map, "emit_types").ok(); + let skip_expr = expect_field(&mut map, "skip").ok(); + let skip_f16_f128 = expect_field(&mut map, "skip_f16_f128").ok(); + let only_expr = expect_field(&mut map, "only").ok(); + let attr_expr = expect_field(&mut map, "attributes").ok(); + let extra = expect_field(&mut map, "extra").ok(); + let fn_extra = expect_field(&mut map, "fn_extra").ok(); + + if !map.is_empty() { + Err(syn::Error::new( + map.first().unwrap().name.span(), + format!("unexpected fields {map:?}"), + ))?; + } + + let emit_types_span = emit_types_expr.as_ref().map(|expr| expr.span()); + let emit_types = match emit_types_expr { + Some(expr) => Parser::parse2(parse_ident_or_array, expr.into_token_stream())?, + None => Vec::new(), + }; + + let skip = match skip_expr { + Some(expr) => Parser::parse2(parse_ident_array, expr.into_token_stream())?, + None => Vec::new(), + }; + + let skip_f16_f128 = match skip_f16_f128 { + Some(expr) => expect_litbool(expr)?.value, + None => false, + }; + + let only_span = only_expr.as_ref().map(|expr| expr.span()); + let only = match only_expr { + Some(expr) => Some(Parser::parse2(parse_ident_array, expr.into_token_stream())?), + None => None, + }; + + let attributes = match attr_expr { + Some(expr) => { + let mut attributes = Vec::new(); + let attr_exprs = Parser::parse2(parse_expr_array, expr.into_token_stream())?; + + for attr in attr_exprs { + attributes.push(syn::parse2(attr.into_token_stream())?); + } + Some(attributes) + } + None => None, + }; + + let fn_extra_span = fn_extra.as_ref().map(|expr| expr.span()); + let fn_extra = match fn_extra { + Some(expr) => Some(extract_fn_extra_field(expr)?), + None => None, + }; + + Ok(Self { + callback: expect_ident(cb_expr)?, + emit_types, + skip, + skip_f16_f128, + only, + only_span, + attributes, + extra, + fn_extra, + fn_extra_span, + emit_types_span, + }) + } +} + +fn extract_fn_extra_field(expr: Expr) -> syn::Result> { + let Expr::Match(mexpr) = expr else { + let e = syn::Error::new(expr.span(), "`fn_extra` expects a match expression"); + return Err(e); + }; + + let ExprMatch { + attrs, + match_token: _, + expr, + brace_token: _, + arms, + } = mexpr; + + expect_empty_attrs(&attrs)?; + + let match_on = expect_ident(*expr)?; + if match_on != "MACRO_FN_NAME" { + let e = syn::Error::new(match_on.span(), "only allowed to match on `MACRO_FN_NAME`"); + return Err(e); + } + + let mut res = BTreeMap::new(); + + for arm in arms { + let Arm { + attrs, + pat, + guard, + fat_arrow_token: _, + body, + comma: _, + } = arm; + + expect_empty_attrs(&attrs)?; + + let keys = match pat { + syn::Pat::Wild(w) => vec![Ident::new("_", w.span())], + _ => Parser::parse2(parse_ident_pat, pat.into_token_stream())?, + }; + + if let Some(guard) = guard { + let e = syn::Error::new(guard.0.span(), "no guards allowed in this position"); + return Err(e); + } + + for key in keys { + let inserted = res.insert(key.clone(), *body.clone()); + if inserted.is_some() { + let e = syn::Error::new(key.span(), format!("key `{key}` specified twice")); + return Err(e); + } + } + } + + Ok(res) +} + +fn expect_empty_attrs(attrs: &[Attribute]) -> syn::Result<()> { + if attrs.is_empty() { + return Ok(()); + } + + let e = syn::Error::new( + attrs.first().unwrap().span(), + "no attributes allowed in this position", + ); + Err(e) +} + +/// Extract a named field from a map, raising an error if it doesn't exist. +fn expect_field(v: &mut Vec, name: &str) -> syn::Result { + let pos = v.iter().position(|v| v.name == name).ok_or_else(|| { + syn::Error::new( + Span::call_site(), + format!("missing expected field `{name}`"), + ) + })?; + + Ok(v.remove(pos).expr) +} + +/// Coerce an expression into a simple identifier. +fn expect_ident(expr: Expr) -> syn::Result { + syn::parse2(expr.into_token_stream()) +} + +/// Coerce an expression into a simple keyword. +fn expect_litbool(expr: Expr) -> syn::Result { + syn::parse2(expr.into_token_stream()) +} + +/// Parse either a single identifier (`foo`) or an array of identifiers (`[foo, bar, baz]`). +fn parse_ident_or_array(input: ParseStream) -> syn::Result> { + if !input.peek(token::Bracket) { + return Ok(vec![input.parse()?]); + } + + parse_ident_array(input) +} + +/// Parse an array of expressions. +fn parse_expr_array(input: ParseStream) -> syn::Result> { + let content; + let _ = bracketed!(content in input); + let fields = content.parse_terminated(Expr::parse, Token![,])?; + Ok(fields.into_iter().collect()) +} + +/// Parse an array of idents, e.g. `[foo, bar, baz]`. +fn parse_ident_array(input: ParseStream) -> syn::Result> { + let content; + let _ = bracketed!(content in input); + let fields = content.parse_terminated(Ident::parse, Token![,])?; + Ok(fields.into_iter().collect()) +} + +/// Parse an pattern of idents, specifically `(foo | bar | baz)`. +fn parse_ident_pat(input: ParseStream) -> syn::Result> { + if !input.peek2(Token![|]) { + return Ok(vec![input.parse()?]); + } + + let fields = Punctuated::::parse_separated_nonempty(input)?; + Ok(fields.into_iter().collect()) +} + +/// A mapping of attributes to identifiers (just a simplified `Expr`). +/// +/// Expressed as: +/// +/// ```ignore +/// #[meta1] +/// #[meta2] +/// [foo, bar, baz] +/// ``` +#[derive(Debug)] +pub struct AttributeMap { + pub meta: Vec, + pub names: Vec, +} + +impl Parse for AttributeMap { + fn parse(input: ParseStream) -> syn::Result { + let attrs = input.call(Attribute::parse_outer)?; + + Ok(Self { + meta: attrs.into_iter().map(|a| a.meta).collect(), + names: parse_ident_array(input)?, + }) + } +} diff --git a/library/compiler-builtins/crates/libm-macros/src/shared.rs b/library/compiler-builtins/crates/libm-macros/src/shared.rs new file mode 100644 index 0000000000000..1cefe4e8c7ed4 --- /dev/null +++ b/library/compiler-builtins/crates/libm-macros/src/shared.rs @@ -0,0 +1,590 @@ +/* List of all functions that is shared between `libm-macros` and `libm-test`. */ + +use std::fmt; +use std::sync::LazyLock; + +struct NestedOp { + float_ty: FloatTy, + rust_sig: Signature, + c_sig: Option, + fn_list: &'static [&'static str], + public: bool, +} + +/// We need a flat list to work with most of the time, but define things as a more convenient +/// nested list. +const ALL_OPERATIONS_NESTED: &[NestedOp] = &[ + NestedOp { + // `fn(f16) -> f16` + float_ty: FloatTy::F16, + rust_sig: Signature { + args: &[Ty::F16], + returns: &[Ty::F16], + }, + c_sig: None, + fn_list: &[ + "ceilf16", + "fabsf16", + "floorf16", + "rintf16", + "roundevenf16", + "roundf16", + "sqrtf16", + "truncf16", + ], + public: true, + }, + NestedOp { + // `fn(f32) -> f32` + float_ty: FloatTy::F32, + rust_sig: Signature { + args: &[Ty::F32], + returns: &[Ty::F32], + }, + c_sig: None, + fn_list: &[ + "acosf", + "acoshf", + "asinf", + "asinhf", + "atanf", + "atanhf", + "cbrtf", + "ceilf", + "cosf", + "coshf", + "erfcf", + "erff", + "exp10f", + "exp2f", + "expf", + "expm1f", + "fabsf", + "floorf", + "j0f", + "j1f", + "lgammaf", + "log10f", + "log1pf", + "log2f", + "logf", + "rintf", + "roundevenf", + "roundf", + "sinf", + "sinhf", + "sqrtf", + "tanf", + "tanhf", + "tgammaf", + "truncf", + "y0f", + "y1f", + ], + public: true, + }, + NestedOp { + // `(f64) -> f64` + float_ty: FloatTy::F64, + rust_sig: Signature { + args: &[Ty::F64], + returns: &[Ty::F64], + }, + c_sig: None, + fn_list: &[ + "acos", + "acosh", + "asin", + "asinh", + "atan", + "atanh", + "cbrt", + "ceil", + "cos", + "cosh", + "erf", + "erfc", + "exp", + "exp10", + "exp2", + "expm1", + "fabs", + "floor", + "j0", + "j1", + "lgamma", + "log", + "log10", + "log1p", + "log2", + "rint", + "round", + "roundeven", + "sin", + "sinh", + "sqrt", + "tan", + "tanh", + "tgamma", + "trunc", + "y0", + "y1", + ], + public: true, + }, + NestedOp { + // `fn(f128) -> f128` + float_ty: FloatTy::F128, + rust_sig: Signature { + args: &[Ty::F128], + returns: &[Ty::F128], + }, + c_sig: None, + fn_list: &[ + "ceilf128", + "fabsf128", + "floorf128", + "rintf128", + "roundevenf128", + "roundf128", + "sqrtf128", + "truncf128", + ], + public: true, + }, + NestedOp { + // `(f16, f16) -> f16` + float_ty: FloatTy::F16, + rust_sig: Signature { + args: &[Ty::F16, Ty::F16], + returns: &[Ty::F16], + }, + c_sig: None, + fn_list: &[ + "copysignf16", + "fdimf16", + "fmaxf16", + "fmaximum_numf16", + "fmaximumf16", + "fminf16", + "fminimum_numf16", + "fminimumf16", + "fmodf16", + ], + public: true, + }, + NestedOp { + // `(f32, f32) -> f32` + float_ty: FloatTy::F32, + rust_sig: Signature { + args: &[Ty::F32, Ty::F32], + returns: &[Ty::F32], + }, + c_sig: None, + fn_list: &[ + "atan2f", + "copysignf", + "fdimf", + "fmaxf", + "fmaximum_numf", + "fmaximumf", + "fminf", + "fminimum_numf", + "fminimumf", + "fmodf", + "hypotf", + "nextafterf", + "powf", + "remainderf", + ], + public: true, + }, + NestedOp { + // `(f64, f64) -> f64` + float_ty: FloatTy::F64, + rust_sig: Signature { + args: &[Ty::F64, Ty::F64], + returns: &[Ty::F64], + }, + c_sig: None, + fn_list: &[ + "atan2", + "copysign", + "fdim", + "fmax", + "fmaximum", + "fmaximum_num", + "fmin", + "fminimum", + "fminimum_num", + "fmod", + "hypot", + "nextafter", + "pow", + "remainder", + ], + public: true, + }, + NestedOp { + // `(f128, f128) -> f128` + float_ty: FloatTy::F128, + rust_sig: Signature { + args: &[Ty::F128, Ty::F128], + returns: &[Ty::F128], + }, + c_sig: None, + fn_list: &[ + "copysignf128", + "fdimf128", + "fmaxf128", + "fmaximum_numf128", + "fmaximumf128", + "fminf128", + "fminimum_numf128", + "fminimumf128", + "fmodf128", + ], + public: true, + }, + NestedOp { + // `(f32, f32, f32) -> f32` + float_ty: FloatTy::F32, + rust_sig: Signature { + args: &[Ty::F32, Ty::F32, Ty::F32], + returns: &[Ty::F32], + }, + c_sig: None, + fn_list: &["fmaf"], + public: true, + }, + NestedOp { + // `(f64, f64, f64) -> f64` + float_ty: FloatTy::F64, + rust_sig: Signature { + args: &[Ty::F64, Ty::F64, Ty::F64], + returns: &[Ty::F64], + }, + c_sig: None, + fn_list: &["fma"], + public: true, + }, + NestedOp { + // `(f128, f128, f128) -> f128` + float_ty: FloatTy::F128, + rust_sig: Signature { + args: &[Ty::F128, Ty::F128, Ty::F128], + returns: &[Ty::F128], + }, + c_sig: None, + fn_list: &["fmaf128"], + public: true, + }, + NestedOp { + // `(f32) -> i32` + float_ty: FloatTy::F32, + rust_sig: Signature { + args: &[Ty::F32], + returns: &[Ty::I32], + }, + c_sig: None, + fn_list: &["ilogbf"], + public: true, + }, + NestedOp { + // `(f64) -> i32` + float_ty: FloatTy::F64, + rust_sig: Signature { + args: &[Ty::F64], + returns: &[Ty::I32], + }, + c_sig: None, + fn_list: &["ilogb"], + public: true, + }, + NestedOp { + // `(i32, f32) -> f32` + float_ty: FloatTy::F32, + rust_sig: Signature { + args: &[Ty::I32, Ty::F32], + returns: &[Ty::F32], + }, + c_sig: None, + fn_list: &["jnf", "ynf"], + public: true, + }, + NestedOp { + // `(i32, f64) -> f64` + float_ty: FloatTy::F64, + rust_sig: Signature { + args: &[Ty::I32, Ty::F64], + returns: &[Ty::F64], + }, + c_sig: None, + fn_list: &["jn", "yn"], + public: true, + }, + NestedOp { + // `(f16, i32) -> f16` + float_ty: FloatTy::F16, + rust_sig: Signature { + args: &[Ty::F16, Ty::I32], + returns: &[Ty::F16], + }, + c_sig: None, + fn_list: &["ldexpf16", "scalbnf16"], + public: true, + }, + NestedOp { + // `(f32, i32) -> f32` + float_ty: FloatTy::F32, + rust_sig: Signature { + args: &[Ty::F32, Ty::I32], + returns: &[Ty::F32], + }, + c_sig: None, + fn_list: &["ldexpf", "scalbnf"], + public: true, + }, + NestedOp { + // `(f64, i64) -> f64` + float_ty: FloatTy::F64, + rust_sig: Signature { + args: &[Ty::F64, Ty::I32], + returns: &[Ty::F64], + }, + c_sig: None, + fn_list: &["ldexp", "scalbn"], + public: true, + }, + NestedOp { + // `(f128, i32) -> f128` + float_ty: FloatTy::F128, + rust_sig: Signature { + args: &[Ty::F128, Ty::I32], + returns: &[Ty::F128], + }, + c_sig: None, + fn_list: &["ldexpf128", "scalbnf128"], + public: true, + }, + NestedOp { + // `(f32, &mut f32) -> f32` as `(f32) -> (f32, f32)` + float_ty: FloatTy::F32, + rust_sig: Signature { + args: &[Ty::F32], + returns: &[Ty::F32, Ty::F32], + }, + c_sig: Some(Signature { + args: &[Ty::F32, Ty::MutF32], + returns: &[Ty::F32], + }), + fn_list: &["modff"], + public: true, + }, + NestedOp { + // `(f64, &mut f64) -> f64` as `(f64) -> (f64, f64)` + float_ty: FloatTy::F64, + rust_sig: Signature { + args: &[Ty::F64], + returns: &[Ty::F64, Ty::F64], + }, + c_sig: Some(Signature { + args: &[Ty::F64, Ty::MutF64], + returns: &[Ty::F64], + }), + fn_list: &["modf"], + public: true, + }, + NestedOp { + // `(f32, &mut c_int) -> f32` as `(f32) -> (f32, i32)` + float_ty: FloatTy::F32, + rust_sig: Signature { + args: &[Ty::F32], + returns: &[Ty::F32, Ty::I32], + }, + c_sig: Some(Signature { + args: &[Ty::F32, Ty::MutCInt], + returns: &[Ty::F32], + }), + fn_list: &["frexpf", "lgammaf_r"], + public: true, + }, + NestedOp { + // `(f64, &mut c_int) -> f64` as `(f64) -> (f64, i32)` + float_ty: FloatTy::F64, + rust_sig: Signature { + args: &[Ty::F64], + returns: &[Ty::F64, Ty::I32], + }, + c_sig: Some(Signature { + args: &[Ty::F64, Ty::MutCInt], + returns: &[Ty::F64], + }), + fn_list: &["frexp", "lgamma_r"], + public: true, + }, + NestedOp { + // `(f32, f32, &mut c_int) -> f32` as `(f32, f32) -> (f32, i32)` + float_ty: FloatTy::F32, + rust_sig: Signature { + args: &[Ty::F32, Ty::F32], + returns: &[Ty::F32, Ty::I32], + }, + c_sig: Some(Signature { + args: &[Ty::F32, Ty::F32, Ty::MutCInt], + returns: &[Ty::F32], + }), + fn_list: &["remquof"], + public: true, + }, + NestedOp { + // `(f64, f64, &mut c_int) -> f64` as `(f64, f64) -> (f64, i32)` + float_ty: FloatTy::F64, + rust_sig: Signature { + args: &[Ty::F64, Ty::F64], + returns: &[Ty::F64, Ty::I32], + }, + c_sig: Some(Signature { + args: &[Ty::F64, Ty::F64, Ty::MutCInt], + returns: &[Ty::F64], + }), + fn_list: &["remquo"], + public: true, + }, + NestedOp { + // `(f32, &mut f32, &mut f32)` as `(f32) -> (f32, f32)` + float_ty: FloatTy::F32, + rust_sig: Signature { + args: &[Ty::F32], + returns: &[Ty::F32, Ty::F32], + }, + c_sig: Some(Signature { + args: &[Ty::F32, Ty::MutF32, Ty::MutF32], + returns: &[], + }), + fn_list: &["sincosf"], + public: true, + }, + NestedOp { + // `(f64, &mut f64, &mut f64)` as `(f64) -> (f64, f64)` + float_ty: FloatTy::F64, + rust_sig: Signature { + args: &[Ty::F64], + returns: &[Ty::F64, Ty::F64], + }, + c_sig: Some(Signature { + args: &[Ty::F64, Ty::MutF64, Ty::MutF64], + returns: &[], + }), + fn_list: &["sincos"], + public: true, + }, +]; + +/// A type used in a function signature. +#[allow(dead_code)] +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] +pub enum Ty { + F16, + F32, + F64, + F128, + I32, + CInt, + MutF16, + MutF32, + MutF64, + MutF128, + MutI32, + MutCInt, +} + +/// A subset of [`Ty`] representing only floats. +#[allow(dead_code)] +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] +pub enum FloatTy { + F16, + F32, + F64, + F128, +} + +impl fmt::Display for Ty { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let s = match self { + Ty::F16 => "f16", + Ty::F32 => "f32", + Ty::F64 => "f64", + Ty::F128 => "f128", + Ty::I32 => "i32", + Ty::CInt => "::core::ffi::c_int", + Ty::MutF16 => "&mut f16", + Ty::MutF32 => "&mut f32", + Ty::MutF64 => "&mut f64", + Ty::MutF128 => "&mut f128", + Ty::MutI32 => "&mut i32", + Ty::MutCInt => "&mut ::core::ffi::c_int", + }; + f.write_str(s) + } +} + +impl fmt::Display for FloatTy { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let s = match self { + FloatTy::F16 => "f16", + FloatTy::F32 => "f32", + FloatTy::F64 => "f64", + FloatTy::F128 => "f128", + }; + f.write_str(s) + } +} + +/// Representation of e.g. `(f32, f32) -> f32` +#[derive(Debug, Clone)] +pub struct Signature { + pub args: &'static [Ty], + pub returns: &'static [Ty], +} + +/// Combined information about a function implementation. +#[derive(Debug, Clone)] +pub struct MathOpInfo { + pub name: &'static str, + pub float_ty: FloatTy, + /// Function signature for C implementations + pub c_sig: Signature, + /// Function signature for Rust implementations + pub rust_sig: Signature, + /// True if part of libm's public API + pub public: bool, +} + +/// A flat representation of `ALL_FUNCTIONS`. +pub static ALL_OPERATIONS: LazyLock> = LazyLock::new(|| { + let mut ret = Vec::new(); + + for op in ALL_OPERATIONS_NESTED { + let fn_names = op.fn_list; + for name in fn_names { + let api = MathOpInfo { + name, + float_ty: op.float_ty, + rust_sig: op.rust_sig.clone(), + c_sig: op.c_sig.clone().unwrap_or_else(|| op.rust_sig.clone()), + public: op.public, + }; + ret.push(api); + } + + if !fn_names.is_sorted() { + let mut sorted = (*fn_names).to_owned(); + sorted.sort_unstable(); + panic!("names list is not sorted: {fn_names:?}\nExpected: {sorted:?}"); + } + } + + ret.sort_by_key(|item| item.name); + ret +}); diff --git a/library/compiler-builtins/crates/libm-macros/tests/basic.rs b/library/compiler-builtins/crates/libm-macros/tests/basic.rs new file mode 100644 index 0000000000000..b4276262229fe --- /dev/null +++ b/library/compiler-builtins/crates/libm-macros/tests/basic.rs @@ -0,0 +1,177 @@ +#![feature(f16)] +#![feature(f128)] +// `STATUS_DLL_NOT_FOUND` on i686 MinGW, not worth looking into. +#![cfg(not(all(target_arch = "x86", target_os = "windows", target_env = "gnu")))] + +macro_rules! basic { + ( + fn_name: $fn_name:ident, + FTy: $FTy:ty, + CFn: $CFn:ty, + CArgs: $CArgs:ty, + CRet: $CRet:ty, + RustFn: $RustFn:ty, + RustArgs: $RustArgs:ty, + RustRet: $RustRet:ty, + public: $public:expr, + attrs: [$($attr:meta),*], + extra: [$($extra_tt:tt)*], + fn_extra: $fn_extra:expr, + ) => { + $(#[$attr])* + #[allow(dead_code)] + pub mod $fn_name { + type FTy= $FTy; + type CFnTy<'a> = $CFn; + type RustFnTy = $RustFn; + type RustArgsTy = $RustArgs; + type RustRetTy = $RustRet; + const PUBLIC: bool = $public; + const A: &[&str] = &[$($extra_tt)*]; + fn foo(a: f32) -> f32 { + $fn_extra(a) + } + } + }; +} + +mod test_basic { + libm_macros::for_each_function! { + callback: basic, + emit_types: all, + skip: [sin, cos], + attributes: [ + // just some random attributes + #[allow(clippy::pedantic)] + #[allow(dead_code)] + [sinf, cosf] + ], + extra: ["foo", "bar"], + fn_extra: match MACRO_FN_NAME { + sin => |x| x + 2.0, + cos | cosf => |x: f32| x.MACRO_FN_NAME_NORMALIZED(), + _ => |_x| 100.0 + } + } +} + +macro_rules! basic_no_extra { + ( + fn_name: $fn_name:ident, + attrs: [$($attr:meta),*], + ) => { + $(#[$attr])* + mod $fn_name {} + }; +} + +mod test_basic_no_extra { + // Test with no extra, no skip, and no attributes + libm_macros::for_each_function! { + callback: basic_no_extra, + } +} + +mod test_only { + // Test that only works + libm_macros::for_each_function! { + callback: basic_no_extra, + only: [sin, sinf], + } +} + +macro_rules! specified_types { + ( + fn_name: $fn_name:ident, + RustFn: $RustFn:ty, + RustArgs: $RustArgs:ty, + attrs: [$($attr:meta),*], + ) => { + $(#[$attr])* + #[allow(dead_code)] + mod $fn_name { + type RustFnTy = $RustFn; + type RustArgsTy = $RustArgs; + } + }; +} + +mod test_emit_types { + // Test that we can specify a couple types to emit + libm_macros::for_each_function! { + callback: specified_types, + emit_types: [RustFn, RustArgs], + } +} + +#[test] +fn test_skip_f16_f128() { + macro_rules! skip_f16_f128 { + ( + fn_name: $fn_name:ident, + attrs: [$($attr:meta),*], + extra: $vec:ident, + ) => { + $vec.push(stringify!($fn_name)); + }; + } + + let mut v = Vec::new(); + // Test with no extra, no skip, and no attributes + libm_macros::for_each_function! { + callback: skip_f16_f128, + skip_f16_f128: true, + extra: v, + } + + for name in v { + assert!(!name.contains("f16"), "{name}"); + assert!(!name.contains("f128"), "{name}"); + } +} + +#[test] +fn test_fn_extra_expansion() { + macro_rules! fn_extra_expansion { + ( + fn_name: $fn_name:ident, + attrs: [$($attr:meta),*], + fn_extra: $vec:expr, + ) => { + $vec.push(stringify!($fn_name)); + }; + } + + let mut vf16 = Vec::new(); + let mut vf32 = Vec::new(); + let mut vf64 = Vec::new(); + let mut vf128 = Vec::new(); + + // Test with no extra, no skip, and no attributes + libm_macros::for_each_function! { + callback: fn_extra_expansion, + fn_extra: match MACRO_FN_NAME { + ALL_F16 => vf16, + ALL_F32 => vf32, + ALL_F64 => vf64, + ALL_F128 => vf128, + } + } + + // Skip functions with a suffix after the type spec + vf16.retain(|name| !name.ends_with("_r")); + vf32.retain(|name| !name.ends_with("_r")); + vf64.retain(|name| !name.ends_with("_r")); + vf128.retain(|name| !name.ends_with("_r")); + + for name in vf16 { + assert!(name.ends_with("f16"), "{name}"); + } + for name in vf32 { + assert!(name.ends_with("f"), "{name}"); + } + let _ = vf64; + for name in vf128 { + assert!(name.ends_with("f128"), "{name}"); + } +} diff --git a/library/compiler-builtins/crates/libm-macros/tests/enum.rs b/library/compiler-builtins/crates/libm-macros/tests/enum.rs new file mode 100644 index 0000000000000..93e209a0dcc90 --- /dev/null +++ b/library/compiler-builtins/crates/libm-macros/tests/enum.rs @@ -0,0 +1,38 @@ +#[libm_macros::function_enum(BaseName)] +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] +pub enum Identifier {} + +#[libm_macros::base_name_enum] +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] +pub enum BaseName {} + +#[test] +fn as_str() { + assert_eq!(Identifier::Sin.as_str(), "sin"); + assert_eq!(Identifier::Sinf.as_str(), "sinf"); +} + +#[test] +fn from_str() { + assert_eq!(Identifier::from_str("sin").unwrap(), Identifier::Sin); + assert_eq!(Identifier::from_str("sinf").unwrap(), Identifier::Sinf); +} + +#[test] +fn basename() { + assert_eq!(Identifier::Sin.base_name(), BaseName::Sin); + assert_eq!(Identifier::Sinf.base_name(), BaseName::Sin); +} + +#[test] +fn math_op() { + assert_eq!(Identifier::Sin.math_op().float_ty, FloatTy::F64); + assert_eq!(Identifier::Sinf.math_op().float_ty, FloatTy::F32); +} + +// Replicate the structure that we have in `libm-test` +mod op { + include!("../../libm-macros/src/shared.rs"); +} + +use op::FloatTy; diff --git a/library/compiler-builtins/crates/musl-math-sys/Cargo.toml b/library/compiler-builtins/crates/musl-math-sys/Cargo.toml new file mode 100644 index 0000000000000..d3fb147e526aa --- /dev/null +++ b/library/compiler-builtins/crates/musl-math-sys/Cargo.toml @@ -0,0 +1,14 @@ +[package] +name = "musl-math-sys" +version = "0.1.0" +edition = "2024" +publish = false +license = "MIT OR Apache-2.0" + +[dependencies] + +[dev-dependencies] +libm = { path = "../../libm" } + +[build-dependencies] +cc = "1.2.16" diff --git a/library/compiler-builtins/crates/musl-math-sys/build.rs b/library/compiler-builtins/crates/musl-math-sys/build.rs new file mode 100644 index 0000000000000..b00dbc73e2800 --- /dev/null +++ b/library/compiler-builtins/crates/musl-math-sys/build.rs @@ -0,0 +1,350 @@ +use std::collections::BTreeMap; +use std::path::{Path, PathBuf}; +use std::process::{Command, Stdio}; +use std::{env, fs, str}; + +/// Static library that will be built +const LIB_NAME: &str = "musl_math_prefixed"; + +/// Files that have more than one symbol. Map of file names to the symbols defined in that file. +const MULTIPLE_SYMBOLS: &[(&str, &[&str])] = &[ + ( + "__invtrigl", + &["__invtrigl", "__invtrigl_R", "__pio2_hi", "__pio2_lo"], + ), + ("__polevll", &["__polevll", "__p1evll"]), + ("erf", &["erf", "erfc"]), + ("erff", &["erff", "erfcf"]), + ("erfl", &["erfl", "erfcl"]), + ("exp10", &["exp10", "pow10"]), + ("exp10f", &["exp10f", "pow10f"]), + ("exp10l", &["exp10l", "pow10l"]), + ("exp2f_data", &["exp2f_data", "__exp2f_data"]), + ("exp_data", &["exp_data", "__exp_data"]), + ("j0", &["j0", "y0"]), + ("j0f", &["j0f", "y0f"]), + ("j1", &["j1", "y1"]), + ("j1f", &["j1f", "y1f"]), + ("jn", &["jn", "yn"]), + ("jnf", &["jnf", "ynf"]), + ("lgamma", &["lgamma", "__lgamma_r"]), + ("remainder", &["remainder", "drem"]), + ("remainderf", &["remainderf", "dremf"]), + ("lgammaf", &["lgammaf", "lgammaf_r", "__lgammaf_r"]), + ("lgammal", &["lgammal", "lgammal_r", "__lgammal_r"]), + ("log2_data", &["log2_data", "__log2_data"]), + ("log2f_data", &["log2f_data", "__log2f_data"]), + ("log_data", &["log_data", "__log_data"]), + ("logf_data", &["logf_data", "__logf_data"]), + ("pow_data", &["pow_data", "__pow_log_data"]), + ("powf_data", &["powf_data", "__powf_log2_data"]), + ("signgam", &["signgam", "__signgam"]), + ("sqrt_data", &["sqrt_data", "__rsqrt_tab"]), +]; + +fn main() { + let cfg = Config::from_env(); + + if cfg.target_env == "msvc" + || cfg.target_family == "wasm" + || cfg.target_features.iter().any(|f| f == "thumb-mode") + { + println!( + "cargo::warning=Musl doesn't compile with the current \ + target {}; skipping build", + &cfg.target_string + ); + return; + } + + build_musl_math(&cfg); +} + +#[allow(dead_code)] +#[derive(Debug)] +struct Config { + manifest_dir: PathBuf, + out_dir: PathBuf, + musl_dir: PathBuf, + musl_arch: String, + target_arch: String, + target_env: String, + target_family: String, + target_os: String, + target_string: String, + target_vendor: String, + target_features: Vec, +} + +impl Config { + fn from_env() -> Self { + let manifest_dir = PathBuf::from(env::var("CARGO_MANIFEST_DIR").unwrap()); + let target_features = env::var("CARGO_CFG_TARGET_FEATURE") + .map(|feats| feats.split(',').map(ToOwned::to_owned).collect()) + .unwrap_or_default(); + let musl_dir = manifest_dir.join("musl"); + + let target_arch = env::var("CARGO_CFG_TARGET_ARCH").unwrap(); + let musl_arch = if target_arch == "x86" { + "i386".to_owned() + } else { + target_arch.clone() + }; + + println!( + "cargo::rerun-if-changed={}/c_patches", + manifest_dir.display() + ); + println!("cargo::rerun-if-changed={}", musl_dir.display()); + + Self { + manifest_dir, + out_dir: PathBuf::from(env::var("OUT_DIR").unwrap()), + musl_dir, + musl_arch, + target_arch, + target_env: env::var("CARGO_CFG_TARGET_ENV").unwrap(), + target_family: env::var("CARGO_CFG_TARGET_FAMILY").unwrap(), + target_os: env::var("CARGO_CFG_TARGET_OS").unwrap(), + target_string: env::var("TARGET").unwrap(), + target_vendor: env::var("CARGO_CFG_TARGET_VENDOR").unwrap(), + target_features, + } + } +} + +/// Build musl math symbols to a static library +fn build_musl_math(cfg: &Config) { + let musl_dir = &cfg.musl_dir; + let math = musl_dir.join("src/math"); + let arch_dir = musl_dir.join("arch").join(&cfg.musl_arch); + assert!( + math.exists(), + "musl source not found. Is the submodule up to date?" + ); + + let source_map = find_math_source(&math, cfg); + let out_path = cfg.out_dir.join(format!("lib{LIB_NAME}.a")); + + // Run configuration steps. Usually done as part of the musl `Makefile`. + let obj_include = cfg.out_dir.join("musl_obj/include"); + fs::create_dir_all(&obj_include).unwrap(); + fs::create_dir_all(obj_include.join("bits")).unwrap(); + let sed_stat = Command::new("sed") + .arg("-f") + .arg(musl_dir.join("tools/mkalltypes.sed")) + .arg(arch_dir.join("bits/alltypes.h.in")) + .arg(musl_dir.join("include/alltypes.h.in")) + .stderr(Stdio::inherit()) + .output() + .unwrap(); + assert!( + sed_stat.status.success(), + "sed command failed: {:?}", + sed_stat.status + ); + + fs::write(obj_include.join("bits/alltypes.h"), sed_stat.stdout).unwrap(); + + let mut cbuild = cc::Build::new(); + cbuild + .extra_warnings(false) + .warnings(false) + .flag_if_supported("-Wno-bitwise-op-parentheses") + .flag_if_supported("-Wno-literal-range") + .flag_if_supported("-Wno-parentheses") + .flag_if_supported("-Wno-shift-count-overflow") + .flag_if_supported("-Wno-shift-op-parentheses") + .flag_if_supported("-Wno-unused-but-set-variable") + .flag_if_supported("-std=c99") + .flag_if_supported("-ffreestanding") + .flag_if_supported("-nostdinc") + .define("_ALL_SOURCE", "1") + .define( + "ROOT_INCLUDE_FEATURES", + Some(musl_dir.join("include/features.h").to_str().unwrap()), + ) + // Our overrides are in this directory + .include(cfg.manifest_dir.join("c_patches")) + .include(musl_dir.join("arch").join(&cfg.musl_arch)) + .include(musl_dir.join("arch/generic")) + .include(musl_dir.join("src/include")) + .include(musl_dir.join("src/internal")) + .include(obj_include) + .include(musl_dir.join("include")) + .file(cfg.manifest_dir.join("c_patches/alias.c")); + + for (sym_name, src_file) in source_map { + // Build the source file + cbuild.file(src_file); + + // Trickery! Redefine the symbol names to have the prefix `musl_`, which allows us to + // differentiate these symbols from whatever we provide. + if let Some((_names, syms)) = MULTIPLE_SYMBOLS + .iter() + .find(|(name, _syms)| *name == sym_name) + { + // Handle the occasional file that defines multiple symbols + for sym in *syms { + cbuild.define(sym, Some(format!("musl_{sym}").as_str())); + } + } else { + // If the file doesn't define multiple symbols, the file name will be the symbol + cbuild.define(&sym_name, Some(format!("musl_{sym_name}").as_str())); + } + } + + if cfg!(windows) { + // On Windows we don't have a good way to check symbols, so skip that step. + cbuild.compile(LIB_NAME); + return; + } + + let objfiles = cbuild.compile_intermediates(); + + // We create the archive ourselves with relocations rather than letting `cc` do it so we can + // encourage it to resolve symbols now. This should help avoid accidentally linking the wrong + // thing. + let stat = cbuild + .get_compiler() + .to_command() + .arg("-r") + .arg("-o") + .arg(&out_path) + .args(objfiles) + .status() + .unwrap(); + assert!(stat.success()); + + println!("cargo::rustc-link-lib={LIB_NAME}"); + println!("cargo::rustc-link-search=native={}", cfg.out_dir.display()); + + validate_archive_symbols(&out_path); +} + +/// Build a map of `name -> path`. `name` is typically the symbol name, but this doesn't account +/// for files that provide multiple symbols. +fn find_math_source(math_root: &Path, cfg: &Config) -> BTreeMap { + let mut map = BTreeMap::new(); + let mut arch_dir = None; + + // Locate all files and directories + for item in fs::read_dir(math_root).unwrap() { + let path = item.unwrap().path(); + let meta = fs::metadata(&path).unwrap(); + + if meta.is_dir() { + // Make note of the arch-specific directory if it exists + if path.file_name().unwrap() == cfg.target_arch.as_str() { + arch_dir = Some(path); + } + continue; + } + + // Skip non-source files + if path.extension().is_some_and(|ext| ext == "h") { + continue; + } + + let sym_name = path.file_stem().unwrap(); + map.insert(sym_name.to_str().unwrap().to_owned(), path.to_owned()); + } + + // If arch-specific versions are available, build those instead. + if let Some(arch_dir) = arch_dir { + for item in fs::read_dir(arch_dir).unwrap() { + let path = item.unwrap().path(); + let sym_name = path.file_stem().unwrap(); + + if path.extension().unwrap() == "s" { + // FIXME: we never build assembly versions since we have no good way to + // rename the symbol (our options are probably preprocessor or objcopy). + continue; + } + map.insert(sym_name.to_str().unwrap().to_owned(), path); + } + } + + map +} + +/// Make sure we don't have something like a loose unprefixed `_cos` called somewhere, which could +/// wind up linking to system libraries rather than the built musl library. +fn validate_archive_symbols(out_path: &Path) { + const ALLOWED_UNDEF_PFX: &[&str] = &[ + // PIC and arch-specific + ".TOC", + "_GLOBAL_OFFSET_TABLE_", + "__x86.get_pc_thunk", + // gcc/compiler-rt/compiler-builtins symbols + "__add", + "__aeabi_", + "__div", + "__eq", + "__extend", + "__fix", + "__float", + "__gcc_", + "__ge", + "__gt", + "__le", + "__lshr", + "__lt", + "__mul", + "__ne", + "__stack_chk_fail", + "__stack_chk_guard", + "__sub", + "__trunc", + "__undef", + // string routines + "__bzero", + "bzero", + // FPENV interfaces + "feclearexcept", + "fegetround", + "feraiseexcept", + "fesetround", + "fetestexcept", + ]; + + // List global undefined symbols + let out = Command::new("nm") + .arg("-guj") + .arg(out_path) + .stderr(Stdio::inherit()) + .output() + .unwrap(); + + let undef = str::from_utf8(&out.stdout).unwrap(); + let mut undef = undef.lines().collect::>(); + undef.retain(|sym| { + // Account for file formats that add a leading `_` + !ALLOWED_UNDEF_PFX + .iter() + .any(|pfx| sym.starts_with(pfx) || sym[1..].starts_with(pfx)) + }); + + assert!( + undef.is_empty(), + "found disallowed undefined symbols: {undef:#?}" + ); + + // Find any symbols that are missing the `_musl_` prefix` + let out = Command::new("nm") + .arg("-gUj") + .arg(out_path) + .stderr(Stdio::inherit()) + .output() + .unwrap(); + + let defined = str::from_utf8(&out.stdout).unwrap(); + let mut defined = defined.lines().collect::>(); + defined.retain(|sym| { + !(sym.starts_with("_musl_") + || sym.starts_with("musl_") + || sym.starts_with("__x86.get_pc_thunk")) + }); + + assert!(defined.is_empty(), "found unprefixed symbols: {defined:#?}"); +} diff --git a/library/compiler-builtins/crates/musl-math-sys/c_patches/alias.c b/library/compiler-builtins/crates/musl-math-sys/c_patches/alias.c new file mode 100644 index 0000000000000..63e0f08d5eb69 --- /dev/null +++ b/library/compiler-builtins/crates/musl-math-sys/c_patches/alias.c @@ -0,0 +1,40 @@ +/* On platforms that don't support weak symbols, define required aliases + * as wrappers. See comments in `features.h` for more. + */ +#if defined(__APPLE__) || defined(__MINGW32__) + +double __lgamma_r(double a, int *b); +float __lgammaf_r(float a, int *b); +long __lgammal_r(long double a, int *b); +double exp10(double a); +float exp10f(float a); +long exp10l(long double a); +double remainder(double a, double b); +float remainderf(float a, float b); + +double lgamma_r(double a, int *b) { + return __lgamma_r(a, b); +} +float lgammaf_r(float a, int *b) { + return __lgammaf_r(a, b); +} +long double lgammal_r(long double a, int *b) { + return __lgammal_r(a, b); +} +double pow10(double a) { + return exp10(a); +} +float pow10f(float a) { + return exp10f(a); +} +long double pow10l(long double a) { + return exp10l(a); +} +double drem(double a, double b) { + return remainder(a, b); +} +float dremf(float a, float b) { + return remainderf(a, b); +} + +#endif diff --git a/library/compiler-builtins/crates/musl-math-sys/c_patches/features.h b/library/compiler-builtins/crates/musl-math-sys/c_patches/features.h new file mode 100644 index 0000000000000..97af935979a2e --- /dev/null +++ b/library/compiler-builtins/crates/musl-math-sys/c_patches/features.h @@ -0,0 +1,39 @@ +/* This is meant to override Musl's src/include/features.h + * + * We use a separate file here to redefine some attributes that don't work on + * all platforms that we would like to build on. + */ + +#ifndef FEATURES_H +#define FEATURES_H + +/* Get the required `#include "../../include/features.h"` since we can't use + * the relative path. The C macros need double indirection to get a usable + * string. */ +#define _stringify_inner(s) #s +#define _stringify(s) _stringify_inner(s) +#include _stringify(ROOT_INCLUDE_FEATURES) + +#if defined(__APPLE__) +#define weak __attribute__((__weak__)) +#define hidden __attribute__((__visibility__("hidden"))) + +/* We _should_ be able to define this as: + * _Pragma(_stringify(weak musl_ ## new = musl_ ## old)) + * However, weak symbols aren't handled correctly [1]. So we manually write + * wrappers, which are in `alias.c`. + * + * [1]: https://github.com/llvm/llvm-project/issues/111321 + */ +#define weak_alias(old, new) /* nothing */ + +#else +#define weak __attribute__((__weak__)) +#define hidden __attribute__((__visibility__("hidden"))) +#define weak_alias(old, new) \ + extern __typeof(old) musl_ ## new \ + __attribute__((__weak__, __alias__(_stringify(musl_ ## old)))) + +#endif /* defined(__APPLE__) */ + +#endif diff --git a/library/compiler-builtins/crates/musl-math-sys/musl b/library/compiler-builtins/crates/musl-math-sys/musl new file mode 160000 index 0000000000000..c47ad25ea3b48 --- /dev/null +++ b/library/compiler-builtins/crates/musl-math-sys/musl @@ -0,0 +1 @@ +Subproject commit c47ad25ea3b484e10326f933e927c0bc8cded3da diff --git a/library/compiler-builtins/crates/musl-math-sys/src/lib.rs b/library/compiler-builtins/crates/musl-math-sys/src/lib.rs new file mode 100644 index 0000000000000..6a4bf4859d931 --- /dev/null +++ b/library/compiler-builtins/crates/musl-math-sys/src/lib.rs @@ -0,0 +1,287 @@ +//! Bindings to Musl math functions (these are built in `build.rs`). + +use std::ffi::{c_char, c_int, c_long}; + +/// Macro for creating bindings and exposing a safe function (since the implementations have no +/// preconditions). Included functions must have correct signatures, otherwise this will be +/// unsound. +macro_rules! functions { + ( $( + $( #[$meta:meta] )* + $pfx_name:ident: $name:ident( $($arg:ident: $aty:ty),+ ) -> $rty:ty; + )* ) => { + unsafe extern "C" { + $( fn $pfx_name( $($arg: $aty),+ ) -> $rty; )* + } + + $( + // Expose a safe version + $( #[$meta] )* + pub fn $name( $($arg: $aty),+ ) -> $rty { + // SAFETY: FFI calls with no preconditions + unsafe { $pfx_name( $($arg),+ ) } + } + )* + + #[cfg(test)] + mod tests { + use super::*; + use test_support::CallTest; + + $( functions!( + @single_test + $name($($arg: $aty),+) -> $rty + ); )* + } + }; + + (@single_test + $name:ident( $($arg:ident: $aty:ty),+ ) -> $rty:ty + ) => { + // Run a simple check to ensure we can link and call the function without crashing. + #[test] + // FIXME(#309): LE PPC crashes calling some musl functions + #[cfg_attr(all(target_arch = "powerpc64", target_endian = "little"), ignore)] + fn $name() { + $rty>::check(super::$name); + } + }; +} + +#[cfg(test)] +mod test_support { + use core::ffi::c_char; + + /// Just verify that we are able to call the function. + pub trait CallTest { + fn check(f: Self); + } + + macro_rules! impl_calltest { + ($( ($($arg:ty),*) -> $ret:ty; )*) => { + $( + impl CallTest for fn($($arg),*) -> $ret { + fn check(f: Self) { + f($(1 as $arg),*); + } + } + )* + }; + } + + impl_calltest! { + (f32) -> f32; + (f64) -> f64; + (f32, f32) -> f32; + (f64, f64) -> f64; + (i32, f32) -> f32; + (i32, f64) -> f64; + (f32, f32, f32) -> f32; + (f64, f64, f64) -> f64; + (f32, i32) -> f32; + (f32, i64) -> f32; + (f32) -> i32; + (f64) -> i32; + (f64, i32) -> f64; + (f64, i64) -> f64; + } + + impl CallTest for fn(f32, &mut f32) -> f32 { + fn check(f: Self) { + let mut tmp = 0.0; + f(0.0, &mut tmp); + } + } + impl CallTest for fn(f64, &mut f64) -> f64 { + fn check(f: Self) { + let mut tmp = 0.0; + f(0.0, &mut tmp); + } + } + impl CallTest for fn(f32, &mut i32) -> f32 { + fn check(f: Self) { + let mut tmp = 1; + f(0.0, &mut tmp); + } + } + impl CallTest for fn(f64, &mut i32) -> f64 { + fn check(f: Self) { + let mut tmp = 1; + f(0.0, &mut tmp); + } + } + impl CallTest for fn(f32, f32, &mut i32) -> f32 { + fn check(f: Self) { + let mut tmp = 1; + f(0.0, 0.0, &mut tmp); + } + } + impl CallTest for fn(f64, f64, &mut i32) -> f64 { + fn check(f: Self) { + let mut tmp = 1; + f(0.0, 0.0, &mut tmp); + } + } + impl CallTest for fn(f32, &mut f32, &mut f32) { + fn check(f: Self) { + let mut tmp1 = 1.0; + let mut tmp2 = 1.0; + f(0.0, &mut tmp1, &mut tmp2); + } + } + impl CallTest for fn(f64, &mut f64, &mut f64) { + fn check(f: Self) { + let mut tmp1 = 1.0; + let mut tmp2 = 1.0; + f(0.0, &mut tmp1, &mut tmp2); + } + } + impl CallTest for fn(*const c_char) -> f32 { + fn check(f: Self) { + f(c"1".as_ptr()); + } + } + impl CallTest for fn(*const c_char) -> f64 { + fn check(f: Self) { + f(c"1".as_ptr()); + } + } +} + +functions! { + musl_acos: acos(a: f64) -> f64; + musl_acosf: acosf(a: f32) -> f32; + musl_acosh: acosh(a: f64) -> f64; + musl_acoshf: acoshf(a: f32) -> f32; + musl_asin: asin(a: f64) -> f64; + musl_asinf: asinf(a: f32) -> f32; + musl_asinh: asinh(a: f64) -> f64; + musl_asinhf: asinhf(a: f32) -> f32; + musl_atan2: atan2(a: f64, b: f64) -> f64; + musl_atan2f: atan2f(a: f32, b: f32) -> f32; + musl_atan: atan(a: f64) -> f64; + musl_atanf: atanf(a: f32) -> f32; + musl_atanh: atanh(a: f64) -> f64; + musl_atanhf: atanhf(a: f32) -> f32; + musl_cbrt: cbrt(a: f64) -> f64; + musl_cbrtf: cbrtf(a: f32) -> f32; + musl_ceil: ceil(a: f64) -> f64; + musl_ceilf: ceilf(a: f32) -> f32; + musl_copysign: copysign(a: f64, b: f64) -> f64; + musl_copysignf: copysignf(a: f32, b: f32) -> f32; + musl_cos: cos(a: f64) -> f64; + musl_cosf: cosf(a: f32) -> f32; + musl_cosh: cosh(a: f64) -> f64; + musl_coshf: coshf(a: f32) -> f32; + musl_drem: drem(a: f64, b: f64) -> f64; + musl_dremf: dremf(a: f32, b: f32) -> f32; + musl_erf: erf(a: f64) -> f64; + musl_erfc: erfc(a: f64) -> f64; + musl_erfcf: erfcf(a: f32) -> f32; + musl_erff: erff(a: f32) -> f32; + musl_exp10: exp10(a: f64) -> f64; + musl_exp10f: exp10f(a: f32) -> f32; + musl_exp2: exp2(a: f64) -> f64; + musl_exp2f: exp2f(a: f32) -> f32; + musl_exp: exp(a: f64) -> f64; + musl_expf: expf(a: f32) -> f32; + musl_expm1: expm1(a: f64) -> f64; + musl_expm1f: expm1f(a: f32) -> f32; + musl_fabs: fabs(a: f64) -> f64; + musl_fabsf: fabsf(a: f32) -> f32; + musl_fdim: fdim(a: f64, b: f64) -> f64; + musl_fdimf: fdimf(a: f32, b: f32) -> f32; + musl_finite: finite(a: f64) -> c_int; + musl_finitef: finitef(a: f32) -> c_int; + musl_floor: floor(a: f64) -> f64; + musl_floorf: floorf(a: f32) -> f32; + musl_fma: fma(a: f64, b: f64, c: f64) -> f64; + musl_fmaf: fmaf(a: f32, b: f32, c: f32) -> f32; + musl_fmax: fmax(a: f64, b: f64) -> f64; + musl_fmaxf: fmaxf(a: f32, b: f32) -> f32; + musl_fmin: fmin(a: f64, b: f64) -> f64; + musl_fminf: fminf(a: f32, b: f32) -> f32; + musl_fmod: fmod(a: f64, b: f64) -> f64; + musl_fmodf: fmodf(a: f32, b: f32) -> f32; + musl_frexp: frexp(a: f64, b: &mut c_int) -> f64; + musl_frexpf: frexpf(a: f32, b: &mut c_int) -> f32; + musl_hypot: hypot(a: f64, b: f64) -> f64; + musl_hypotf: hypotf(a: f32, b: f32) -> f32; + musl_ilogb: ilogb(a: f64) -> c_int; + musl_ilogbf: ilogbf(a: f32) -> c_int; + musl_j0: j0(a: f64) -> f64; + musl_j0f: j0f(a: f32) -> f32; + musl_j1: j1(a: f64) -> f64; + musl_j1f: j1f(a: f32) -> f32; + musl_jn: jn(a: c_int, b: f64) -> f64; + musl_jnf: jnf(a: c_int, b: f32) -> f32; + musl_ldexp: ldexp(a: f64, b: c_int) -> f64; + musl_ldexpf: ldexpf(a: f32, b: c_int) -> f32; + musl_lgamma: lgamma(a: f64) -> f64; + musl_lgamma_r: lgamma_r(a: f64, b: &mut c_int) -> f64; + musl_lgammaf: lgammaf(a: f32) -> f32; + musl_lgammaf_r: lgammaf_r(a: f32, b: &mut c_int) -> f32; + musl_log10: log10(a: f64) -> f64; + musl_log10f: log10f(a: f32) -> f32; + musl_log1p: log1p(a: f64) -> f64; + musl_log1pf: log1pf(a: f32) -> f32; + musl_log2: log2(a: f64) -> f64; + musl_log2f: log2f(a: f32) -> f32; + musl_log: log(a: f64) -> f64; + musl_logb: logb(a: f64) -> f64; + musl_logbf: logbf(a: f32) -> f32; + musl_logf: logf(a: f32) -> f32; + musl_modf: modf(a: f64, b: &mut f64) -> f64; + musl_modff: modff(a: f32, b: &mut f32) -> f32; + + // FIXME: these need to be unsafe + #[allow(clippy::not_unsafe_ptr_arg_deref)] + musl_nan: nan(a: *const c_char) -> f64; + #[allow(clippy::not_unsafe_ptr_arg_deref)] + musl_nanf: nanf(a: *const c_char) -> f32; + + musl_nearbyint: nearbyint(a: f64) -> f64; + musl_nearbyintf: nearbyintf(a: f32) -> f32; + musl_nextafter: nextafter(a: f64, b: f64) -> f64; + musl_nextafterf: nextafterf(a: f32, b: f32) -> f32; + musl_pow10: pow10(a: f64) -> f64; + musl_pow10f: pow10f(a: f32) -> f32; + musl_pow: pow(a: f64, b: f64) -> f64; + musl_powf: powf(a: f32, b: f32) -> f32; + musl_remainder: remainder(a: f64, b: f64) -> f64; + musl_remainderf: remainderf(a: f32, b: f32) -> f32; + musl_remquo: remquo(a: f64, b: f64, c: &mut c_int) -> f64; + musl_remquof: remquof(a: f32, b: f32, c: &mut c_int) -> f32; + musl_rint: rint(a: f64) -> f64; + musl_rintf: rintf(a: f32) -> f32; + musl_round: round(a: f64) -> f64; + musl_roundf: roundf(a: f32) -> f32; + musl_scalbln: scalbln(a: f64, b: c_long) -> f64; + musl_scalblnf: scalblnf(a: f32, b: c_long) -> f32; + musl_scalbn: scalbn(a: f64, b: c_int) -> f64; + musl_scalbnf: scalbnf(a: f32, b: c_int) -> f32; + musl_significand: significand(a: f64) -> f64; + musl_significandf: significandf(a: f32) -> f32; + musl_sin: sin(a: f64) -> f64; + musl_sincos: sincos(a: f64, b: &mut f64, c: &mut f64) -> (); + musl_sincosf: sincosf(a: f32, b: &mut f32, c: &mut f32) -> (); + musl_sinf: sinf(a: f32) -> f32; + musl_sinh: sinh(a: f64) -> f64; + musl_sinhf: sinhf(a: f32) -> f32; + musl_sqrt: sqrt(a: f64) -> f64; + musl_sqrtf: sqrtf(a: f32) -> f32; + musl_tan: tan(a: f64) -> f64; + musl_tanf: tanf(a: f32) -> f32; + musl_tanh: tanh(a: f64) -> f64; + musl_tanhf: tanhf(a: f32) -> f32; + musl_tgamma: tgamma(a: f64) -> f64; + musl_tgammaf: tgammaf(a: f32) -> f32; + musl_trunc: trunc(a: f64) -> f64; + musl_truncf: truncf(a: f32) -> f32; + musl_y0: y0(a: f64) -> f64; + musl_y0f: y0f(a: f32) -> f32; + musl_y1: y1(a: f64) -> f64; + musl_y1f: y1f(a: f32) -> f32; + musl_yn: yn(a: c_int, b: f64) -> f64; + musl_ynf: ynf(a: c_int, b: f32) -> f32; +} diff --git a/library/compiler-builtins/crates/panic-handler/Cargo.toml b/library/compiler-builtins/crates/panic-handler/Cargo.toml new file mode 100644 index 0000000000000..a6764fc481b64 --- /dev/null +++ b/library/compiler-builtins/crates/panic-handler/Cargo.toml @@ -0,0 +1,12 @@ +[package] +name = "panic-handler" +version = "0.1.0" +authors = ["Alex Crichton "] +edition = "2024" +publish = false + +[lib] +test = false +bench = false + +[dependencies] diff --git a/library/compiler-builtins/crates/panic-handler/src/lib.rs b/library/compiler-builtins/crates/panic-handler/src/lib.rs new file mode 100644 index 0000000000000..673e005224bd0 --- /dev/null +++ b/library/compiler-builtins/crates/panic-handler/src/lib.rs @@ -0,0 +1,11 @@ +//! This is needed for tests on targets that require a `#[panic_handler]` function + +#![feature(no_core)] +#![no_core] + +extern crate core; + +#[panic_handler] +fn panic(_: &core::panic::PanicInfo) -> ! { + loop {} +} diff --git a/library/compiler-builtins/crates/util/Cargo.toml b/library/compiler-builtins/crates/util/Cargo.toml new file mode 100644 index 0000000000000..614c54bd83557 --- /dev/null +++ b/library/compiler-builtins/crates/util/Cargo.toml @@ -0,0 +1,19 @@ +[package] +name = "util" +version = "0.1.0" +edition = "2024" +publish = false +license = "MIT OR Apache-2.0" + +[features] +default = ["build-musl", "build-mpfr", "unstable-float"] +build-musl = ["libm-test/build-musl", "dep:musl-math-sys"] +build-mpfr = ["libm-test/build-mpfr", "dep:rug"] +unstable-float = ["libm/unstable-float", "libm-test/unstable-float", "rug?/nightly-float"] + +[dependencies] +libm = { path = "../../libm", default-features = false } +libm-macros = { path = "../libm-macros" } +libm-test = { path = "../../libm-test", default-features = false } +musl-math-sys = { path = "../musl-math-sys", optional = true } +rug = { version = "1.27.0", optional = true, default-features = false, features = ["float", "std"] } diff --git a/library/compiler-builtins/crates/util/build.rs b/library/compiler-builtins/crates/util/build.rs new file mode 100644 index 0000000000000..a1be4127527ae --- /dev/null +++ b/library/compiler-builtins/crates/util/build.rs @@ -0,0 +1,10 @@ +#![allow(unexpected_cfgs)] + +#[path = "../../libm/configure.rs"] +mod configure; + +fn main() { + println!("cargo:rerun-if-changed=../../libm/configure.rs"); + let cfg = configure::Config::from_env(); + configure::emit_libm_config(&cfg); +} diff --git a/library/compiler-builtins/crates/util/src/main.rs b/library/compiler-builtins/crates/util/src/main.rs new file mode 100644 index 0000000000000..5972181531b26 --- /dev/null +++ b/library/compiler-builtins/crates/util/src/main.rs @@ -0,0 +1,350 @@ +//! Helper CLI utility for common tasks. + +#![cfg_attr(f16_enabled, feature(f16))] +#![cfg_attr(f128_enabled, feature(f128))] + +use std::any::type_name; +use std::env; +use std::num::ParseIntError; +use std::str::FromStr; + +use libm::support::{Hexf, hf32, hf64}; +#[cfg(feature = "build-mpfr")] +use libm_test::mpfloat::MpOp; +use libm_test::{MathOp, TupleCall}; +#[cfg(feature = "build-mpfr")] +use rug::az::{self, Az}; + +const USAGE: &str = "\ +usage: + +cargo run -p util -- + +SUBCOMMAND: + eval inputs... + Evaulate the expression with a given basis. This can be useful for + running routines with a debugger, or quickly checking input. Examples: + * eval musl sinf 1.234 # print the results of musl sinf(1.234f32) + * eval mpfr pow 1.234 2.432 # print the results of mpfr pow(1.234, 2.432) +"; + +fn main() { + let args = env::args().collect::>(); + let str_args = args.iter().map(|s| s.as_str()).collect::>(); + + match &str_args.as_slice()[1..] { + ["eval", basis, op, inputs @ ..] => do_eval(basis, op, inputs), + _ => { + println!("{USAGE}\nunrecognized input `{str_args:?}`"); + std::process::exit(1); + } + } +} + +macro_rules! handle_call { + ( + fn_name: $fn_name:ident, + CFn: $CFn:ty, + RustFn: $RustFn:ty, + RustArgs: $RustArgs:ty, + attrs: [$($attr:meta),*], + extra: ($basis:ident, $op:ident, $inputs:ident), + fn_extra: $musl_fn:expr, + ) => { + $(#[$attr])* + if $op == stringify!($fn_name) { + type Op = libm_test::op::$fn_name::Routine; + + let input = <$RustArgs>::parse($inputs); + let libm_fn: ::RustFn = libm::$fn_name; + + let output = match $basis { + "libm" => input.call_intercept_panics(libm_fn), + #[cfg(feature = "build-musl")] + "musl" => { + let musl_fn: ::CFn = + $musl_fn.unwrap_or_else(|| panic!("no musl function for {}", $op)); + input.call(musl_fn) + } + #[cfg(feature = "build-mpfr")] + "mpfr" => { + let mut mp = ::new_mp(); + Op::run(&mut mp, input) + } + _ => panic!("unrecognized or disabled basis '{}'", $basis), + }; + println!("{output:?} {:x}", Hexf(output)); + return; + } + }; +} + +/// Evaluate the specified operation with a given basis. +fn do_eval(basis: &str, op: &str, inputs: &[&str]) { + libm_macros::for_each_function! { + callback: handle_call, + emit_types: [CFn, RustFn, RustArgs], + extra: (basis, op, inputs), + fn_extra: match MACRO_FN_NAME { + // Not provided by musl + fmaximum + | fmaximum_num + | fmaximum_numf + | fmaximumf + | fminimum + | fminimum_num + | fminimum_numf + | fminimumf + | roundeven + | roundevenf + | ALL_F16 + | ALL_F128 => None, + _ => Some(musl_math_sys::MACRO_FN_NAME) + } + } + + panic!("no operation matching {op}"); +} + +/// Parse a tuple from a space-delimited string. +trait ParseTuple { + fn parse(input: &[&str]) -> Self; +} + +macro_rules! impl_parse_tuple { + ($ty:ty) => { + impl ParseTuple for ($ty,) { + fn parse(input: &[&str]) -> Self { + assert_eq!(input.len(), 1, "expected a single argument, got {input:?}"); + (parse(input, 0),) + } + } + + impl ParseTuple for ($ty, $ty) { + fn parse(input: &[&str]) -> Self { + assert_eq!(input.len(), 2, "expected two arguments, got {input:?}"); + (parse(input, 0), parse(input, 1)) + } + } + + impl ParseTuple for ($ty, i32) { + fn parse(input: &[&str]) -> Self { + assert_eq!(input.len(), 2, "expected two arguments, got {input:?}"); + (parse(input, 0), parse(input, 1)) + } + } + + impl ParseTuple for (i32, $ty) { + fn parse(input: &[&str]) -> Self { + assert_eq!(input.len(), 2, "expected two arguments, got {input:?}"); + (parse(input, 0), parse(input, 1)) + } + } + + impl ParseTuple for ($ty, $ty, $ty) { + fn parse(input: &[&str]) -> Self { + assert_eq!(input.len(), 3, "expected three arguments, got {input:?}"); + (parse(input, 0), parse(input, 1), parse(input, 2)) + } + } + }; +} + +#[allow(unused_macros)] +#[cfg(feature = "build-mpfr")] +macro_rules! impl_parse_tuple_via_rug { + ($ty:ty) => { + impl ParseTuple for ($ty,) { + fn parse(input: &[&str]) -> Self { + assert_eq!(input.len(), 1, "expected a single argument, got {input:?}"); + (parse_rug(input, 0),) + } + } + + impl ParseTuple for ($ty, $ty) { + fn parse(input: &[&str]) -> Self { + assert_eq!(input.len(), 2, "expected two arguments, got {input:?}"); + (parse_rug(input, 0), parse_rug(input, 1)) + } + } + + impl ParseTuple for ($ty, i32) { + fn parse(input: &[&str]) -> Self { + assert_eq!(input.len(), 2, "expected two arguments, got {input:?}"); + (parse_rug(input, 0), parse(input, 1)) + } + } + + impl ParseTuple for (i32, $ty) { + fn parse(input: &[&str]) -> Self { + assert_eq!(input.len(), 2, "expected two arguments, got {input:?}"); + (parse(input, 0), parse_rug(input, 1)) + } + } + + impl ParseTuple for ($ty, $ty, $ty) { + fn parse(input: &[&str]) -> Self { + assert_eq!(input.len(), 3, "expected three arguments, got {input:?}"); + ( + parse_rug(input, 0), + parse_rug(input, 1), + parse_rug(input, 2), + ) + } + } + }; +} + +// Fallback for when Rug is not built. +#[allow(unused_macros)] +#[cfg(not(feature = "build-mpfr"))] +macro_rules! impl_parse_tuple_via_rug { + ($ty:ty) => { + impl ParseTuple for ($ty,) { + fn parse(_input: &[&str]) -> Self { + panic!("parsing this type requires the `build-mpfr` feature") + } + } + + impl ParseTuple for ($ty, $ty) { + fn parse(_input: &[&str]) -> Self { + panic!("parsing this type requires the `build-mpfr` feature") + } + } + + impl ParseTuple for ($ty, i32) { + fn parse(_input: &[&str]) -> Self { + panic!("parsing this type requires the `build-mpfr` feature") + } + } + + impl ParseTuple for (i32, $ty) { + fn parse(_input: &[&str]) -> Self { + panic!("parsing this type requires the `build-mpfr` feature") + } + } + + impl ParseTuple for ($ty, $ty, $ty) { + fn parse(_input: &[&str]) -> Self { + panic!("parsing this type requires the `build-mpfr` feature") + } + } + }; +} + +impl_parse_tuple!(f32); +impl_parse_tuple!(f64); + +#[cfg(f16_enabled)] +impl_parse_tuple_via_rug!(f16); +#[cfg(f128_enabled)] +impl_parse_tuple_via_rug!(f128); + +/// Try to parse the number, printing a nice message on failure. +fn parse(input: &[&str], idx: usize) -> T { + let s = input[idx]; + + let msg = || format!("invalid {} input '{s}'", type_name::()); + + if s.starts_with("0x") || s.starts_with("-0x") { + return T::from_str_radix(s, 16).unwrap_or_else(|_| panic!("{}", msg())); + } + + if s.starts_with("0b") { + return T::from_str_radix(s, 2).unwrap_or_else(|_| panic!("{}", msg())); + } + + s.parse().unwrap_or_else(|_| panic!("{}", msg())) +} + +/// Try to parse the float type going via `rug`, for `f16` and `f128` which don't yet implement +/// `FromStr`. +#[cfg(feature = "build-mpfr")] +fn parse_rug(input: &[&str], idx: usize) -> F +where + F: libm_test::Float + FromStrRadix, + rug::Float: az::Cast, +{ + let s = input[idx]; + + let msg = || format!("invalid {} input '{s}'", type_name::()); + + if s.starts_with("0x") { + return F::from_str_radix(s, 16).unwrap_or_else(|_| panic!("{}", msg())); + } + + if s.starts_with("0b") { + return F::from_str_radix(s, 2).unwrap_or_else(|_| panic!("{}", msg())); + } + + let x = rug::Float::parse(s).unwrap_or_else(|_| panic!("{}", msg())); + let x = rug::Float::with_val(F::BITS, x); + x.az() +} + +trait FromStrRadix: Sized { + fn from_str_radix(s: &str, radix: u32) -> Result; +} + +impl FromStrRadix for i32 { + fn from_str_radix(s: &str, radix: u32) -> Result { + let s = strip_radix_prefix(s, radix); + i32::from_str_radix(s, radix) + } +} + +#[cfg(f16_enabled)] +impl FromStrRadix for f16 { + fn from_str_radix(s: &str, radix: u32) -> Result { + if radix == 16 && s.contains("p") { + return Ok(libm::support::hf16(s)); + } + + let s = strip_radix_prefix(s, radix); + u16::from_str_radix(s, radix).map(Self::from_bits) + } +} + +impl FromStrRadix for f32 { + fn from_str_radix(s: &str, radix: u32) -> Result { + if radix == 16 && s.contains("p") { + // Parse as hex float + return Ok(hf32(s)); + } + + let s = strip_radix_prefix(s, radix); + u32::from_str_radix(s, radix).map(Self::from_bits) + } +} + +impl FromStrRadix for f64 { + fn from_str_radix(s: &str, radix: u32) -> Result { + if s.contains("p") { + return Ok(hf64(s)); + } + + let s = strip_radix_prefix(s, radix); + u64::from_str_radix(s, radix).map(Self::from_bits) + } +} + +#[cfg(f128_enabled)] +impl FromStrRadix for f128 { + fn from_str_radix(s: &str, radix: u32) -> Result { + if radix == 16 && s.contains("p") { + return Ok(libm::support::hf128(s)); + } + let s = strip_radix_prefix(s, radix); + u128::from_str_radix(s, radix).map(Self::from_bits) + } +} + +fn strip_radix_prefix(s: &str, radix: u32) -> &str { + if radix == 16 { + s.strip_prefix("0x").unwrap() + } else if radix == 2 { + s.strip_prefix("0b").unwrap() + } else { + s + } +} diff --git a/library/compiler-builtins/etc/function-definitions.json b/library/compiler-builtins/etc/function-definitions.json new file mode 100644 index 0000000000000..4f796905b7543 --- /dev/null +++ b/library/compiler-builtins/etc/function-definitions.json @@ -0,0 +1,1071 @@ +{ + "__comment": "Autogenerated by update-api-list.py. List of files that define a function with a given name. This file is checked in to make it obvious if refactoring breaks things", + "acos": { + "sources": [ + "libm/src/math/acos.rs" + ], + "type": "f64" + }, + "acosf": { + "sources": [ + "libm/src/math/acosf.rs" + ], + "type": "f32" + }, + "acosh": { + "sources": [ + "libm/src/math/acosh.rs" + ], + "type": "f64" + }, + "acoshf": { + "sources": [ + "libm/src/math/acoshf.rs" + ], + "type": "f32" + }, + "asin": { + "sources": [ + "libm/src/math/asin.rs" + ], + "type": "f64" + }, + "asinf": { + "sources": [ + "libm/src/math/asinf.rs" + ], + "type": "f32" + }, + "asinh": { + "sources": [ + "libm/src/math/asinh.rs" + ], + "type": "f64" + }, + "asinhf": { + "sources": [ + "libm/src/math/asinhf.rs" + ], + "type": "f32" + }, + "atan": { + "sources": [ + "libm/src/math/atan.rs" + ], + "type": "f64" + }, + "atan2": { + "sources": [ + "libm/src/math/atan2.rs" + ], + "type": "f64" + }, + "atan2f": { + "sources": [ + "libm/src/math/atan2f.rs" + ], + "type": "f32" + }, + "atanf": { + "sources": [ + "libm/src/math/atanf.rs" + ], + "type": "f32" + }, + "atanh": { + "sources": [ + "libm/src/math/atanh.rs" + ], + "type": "f64" + }, + "atanhf": { + "sources": [ + "libm/src/math/atanhf.rs" + ], + "type": "f32" + }, + "cbrt": { + "sources": [ + "libm/src/math/cbrt.rs" + ], + "type": "f64" + }, + "cbrtf": { + "sources": [ + "libm/src/math/cbrtf.rs" + ], + "type": "f32" + }, + "ceil": { + "sources": [ + "libm/src/math/arch/i586.rs", + "libm/src/math/arch/wasm32.rs", + "libm/src/math/ceil.rs", + "libm/src/math/generic/ceil.rs" + ], + "type": "f64" + }, + "ceilf": { + "sources": [ + "libm/src/math/arch/wasm32.rs", + "libm/src/math/ceil.rs", + "libm/src/math/generic/ceil.rs" + ], + "type": "f32" + }, + "ceilf128": { + "sources": [ + "libm/src/math/ceil.rs", + "libm/src/math/generic/ceil.rs" + ], + "type": "f128" + }, + "ceilf16": { + "sources": [ + "libm/src/math/ceil.rs", + "libm/src/math/generic/ceil.rs" + ], + "type": "f16" + }, + "copysign": { + "sources": [ + "libm/src/math/copysign.rs", + "libm/src/math/generic/copysign.rs" + ], + "type": "f64" + }, + "copysignf": { + "sources": [ + "libm/src/math/copysign.rs", + "libm/src/math/generic/copysign.rs" + ], + "type": "f32" + }, + "copysignf128": { + "sources": [ + "libm/src/math/copysign.rs", + "libm/src/math/generic/copysign.rs" + ], + "type": "f128" + }, + "copysignf16": { + "sources": [ + "libm/src/math/copysign.rs", + "libm/src/math/generic/copysign.rs" + ], + "type": "f16" + }, + "cos": { + "sources": [ + "libm/src/math/cos.rs" + ], + "type": "f64" + }, + "cosf": { + "sources": [ + "libm/src/math/cosf.rs" + ], + "type": "f32" + }, + "cosh": { + "sources": [ + "libm/src/math/cosh.rs" + ], + "type": "f64" + }, + "coshf": { + "sources": [ + "libm/src/math/coshf.rs" + ], + "type": "f32" + }, + "erf": { + "sources": [ + "libm/src/math/erf.rs" + ], + "type": "f64" + }, + "erfc": { + "sources": [ + "libm/src/math/erf.rs" + ], + "type": "f64" + }, + "erfcf": { + "sources": [ + "libm/src/math/erff.rs" + ], + "type": "f32" + }, + "erff": { + "sources": [ + "libm/src/math/erff.rs" + ], + "type": "f32" + }, + "exp": { + "sources": [ + "libm/src/math/exp.rs" + ], + "type": "f64" + }, + "exp10": { + "sources": [ + "libm/src/math/exp10.rs" + ], + "type": "f64" + }, + "exp10f": { + "sources": [ + "libm/src/math/exp10f.rs" + ], + "type": "f32" + }, + "exp2": { + "sources": [ + "libm/src/math/exp2.rs" + ], + "type": "f64" + }, + "exp2f": { + "sources": [ + "libm/src/math/exp2f.rs" + ], + "type": "f32" + }, + "expf": { + "sources": [ + "libm/src/math/expf.rs" + ], + "type": "f32" + }, + "expm1": { + "sources": [ + "libm/src/math/expm1.rs" + ], + "type": "f64" + }, + "expm1f": { + "sources": [ + "libm/src/math/expm1f.rs" + ], + "type": "f32" + }, + "fabs": { + "sources": [ + "libm/src/math/arch/wasm32.rs", + "libm/src/math/fabs.rs", + "libm/src/math/generic/fabs.rs" + ], + "type": "f64" + }, + "fabsf": { + "sources": [ + "libm/src/math/arch/wasm32.rs", + "libm/src/math/fabs.rs", + "libm/src/math/generic/fabs.rs" + ], + "type": "f32" + }, + "fabsf128": { + "sources": [ + "libm/src/math/fabs.rs", + "libm/src/math/generic/fabs.rs" + ], + "type": "f128" + }, + "fabsf16": { + "sources": [ + "libm/src/math/fabs.rs", + "libm/src/math/generic/fabs.rs" + ], + "type": "f16" + }, + "fdim": { + "sources": [ + "libm/src/math/fdim.rs", + "libm/src/math/generic/fdim.rs" + ], + "type": "f64" + }, + "fdimf": { + "sources": [ + "libm/src/math/fdim.rs", + "libm/src/math/generic/fdim.rs" + ], + "type": "f32" + }, + "fdimf128": { + "sources": [ + "libm/src/math/fdim.rs", + "libm/src/math/generic/fdim.rs" + ], + "type": "f128" + }, + "fdimf16": { + "sources": [ + "libm/src/math/fdim.rs", + "libm/src/math/generic/fdim.rs" + ], + "type": "f16" + }, + "floor": { + "sources": [ + "libm/src/math/arch/i586.rs", + "libm/src/math/arch/wasm32.rs", + "libm/src/math/floor.rs", + "libm/src/math/generic/floor.rs" + ], + "type": "f64" + }, + "floorf": { + "sources": [ + "libm/src/math/arch/wasm32.rs", + "libm/src/math/floor.rs", + "libm/src/math/generic/floor.rs" + ], + "type": "f32" + }, + "floorf128": { + "sources": [ + "libm/src/math/floor.rs", + "libm/src/math/generic/floor.rs" + ], + "type": "f128" + }, + "floorf16": { + "sources": [ + "libm/src/math/floor.rs", + "libm/src/math/generic/floor.rs" + ], + "type": "f16" + }, + "fma": { + "sources": [ + "libm/src/math/arch/aarch64.rs", + "libm/src/math/arch/x86/fma.rs", + "libm/src/math/fma.rs" + ], + "type": "f64" + }, + "fmaf": { + "sources": [ + "libm/src/math/arch/aarch64.rs", + "libm/src/math/arch/x86/fma.rs", + "libm/src/math/fma.rs" + ], + "type": "f32" + }, + "fmaf128": { + "sources": [ + "libm/src/math/fma.rs" + ], + "type": "f128" + }, + "fmax": { + "sources": [ + "libm/src/math/fmin_fmax.rs", + "libm/src/math/generic/fmax.rs" + ], + "type": "f64" + }, + "fmaxf": { + "sources": [ + "libm/src/math/fmin_fmax.rs", + "libm/src/math/generic/fmax.rs" + ], + "type": "f32" + }, + "fmaxf128": { + "sources": [ + "libm/src/math/fmin_fmax.rs", + "libm/src/math/generic/fmax.rs" + ], + "type": "f128" + }, + "fmaxf16": { + "sources": [ + "libm/src/math/fmin_fmax.rs", + "libm/src/math/generic/fmax.rs" + ], + "type": "f16" + }, + "fmaximum": { + "sources": [ + "libm/src/math/fminimum_fmaximum.rs", + "libm/src/math/generic/fmaximum.rs" + ], + "type": "f64" + }, + "fmaximum_num": { + "sources": [ + "libm/src/math/fminimum_fmaximum_num.rs", + "libm/src/math/generic/fmaximum_num.rs" + ], + "type": "f64" + }, + "fmaximum_numf": { + "sources": [ + "libm/src/math/fminimum_fmaximum_num.rs", + "libm/src/math/generic/fmaximum_num.rs" + ], + "type": "f32" + }, + "fmaximum_numf128": { + "sources": [ + "libm/src/math/fminimum_fmaximum_num.rs", + "libm/src/math/generic/fmaximum_num.rs" + ], + "type": "f128" + }, + "fmaximum_numf16": { + "sources": [ + "libm/src/math/fminimum_fmaximum_num.rs", + "libm/src/math/generic/fmaximum_num.rs" + ], + "type": "f16" + }, + "fmaximumf": { + "sources": [ + "libm/src/math/fminimum_fmaximum.rs", + "libm/src/math/generic/fmaximum.rs" + ], + "type": "f32" + }, + "fmaximumf128": { + "sources": [ + "libm/src/math/fminimum_fmaximum.rs", + "libm/src/math/generic/fmaximum.rs" + ], + "type": "f128" + }, + "fmaximumf16": { + "sources": [ + "libm/src/math/fminimum_fmaximum.rs", + "libm/src/math/generic/fmaximum.rs" + ], + "type": "f16" + }, + "fmin": { + "sources": [ + "libm/src/math/fmin_fmax.rs", + "libm/src/math/generic/fmin.rs" + ], + "type": "f64" + }, + "fminf": { + "sources": [ + "libm/src/math/fmin_fmax.rs", + "libm/src/math/generic/fmin.rs" + ], + "type": "f32" + }, + "fminf128": { + "sources": [ + "libm/src/math/fmin_fmax.rs", + "libm/src/math/generic/fmin.rs" + ], + "type": "f128" + }, + "fminf16": { + "sources": [ + "libm/src/math/fmin_fmax.rs", + "libm/src/math/generic/fmin.rs" + ], + "type": "f16" + }, + "fminimum": { + "sources": [ + "libm/src/math/fminimum_fmaximum.rs", + "libm/src/math/generic/fminimum.rs" + ], + "type": "f64" + }, + "fminimum_num": { + "sources": [ + "libm/src/math/fminimum_fmaximum_num.rs", + "libm/src/math/generic/fminimum_num.rs" + ], + "type": "f64" + }, + "fminimum_numf": { + "sources": [ + "libm/src/math/fminimum_fmaximum_num.rs", + "libm/src/math/generic/fminimum_num.rs" + ], + "type": "f32" + }, + "fminimum_numf128": { + "sources": [ + "libm/src/math/fminimum_fmaximum_num.rs", + "libm/src/math/generic/fminimum_num.rs" + ], + "type": "f128" + }, + "fminimum_numf16": { + "sources": [ + "libm/src/math/fminimum_fmaximum_num.rs", + "libm/src/math/generic/fminimum_num.rs" + ], + "type": "f16" + }, + "fminimumf": { + "sources": [ + "libm/src/math/fminimum_fmaximum.rs", + "libm/src/math/generic/fminimum.rs" + ], + "type": "f32" + }, + "fminimumf128": { + "sources": [ + "libm/src/math/fminimum_fmaximum.rs", + "libm/src/math/generic/fminimum.rs" + ], + "type": "f128" + }, + "fminimumf16": { + "sources": [ + "libm/src/math/fminimum_fmaximum.rs", + "libm/src/math/generic/fminimum.rs" + ], + "type": "f16" + }, + "fmod": { + "sources": [ + "libm/src/math/fmod.rs", + "libm/src/math/generic/fmod.rs" + ], + "type": "f64" + }, + "fmodf": { + "sources": [ + "libm/src/math/fmod.rs", + "libm/src/math/generic/fmod.rs" + ], + "type": "f32" + }, + "fmodf128": { + "sources": [ + "libm/src/math/fmod.rs", + "libm/src/math/generic/fmod.rs" + ], + "type": "f128" + }, + "fmodf16": { + "sources": [ + "libm/src/math/fmod.rs", + "libm/src/math/generic/fmod.rs" + ], + "type": "f16" + }, + "frexp": { + "sources": [ + "libm/src/math/frexp.rs" + ], + "type": "f64" + }, + "frexpf": { + "sources": [ + "libm/src/math/frexpf.rs" + ], + "type": "f32" + }, + "hypot": { + "sources": [ + "libm/src/math/hypot.rs" + ], + "type": "f64" + }, + "hypotf": { + "sources": [ + "libm/src/math/hypotf.rs" + ], + "type": "f32" + }, + "ilogb": { + "sources": [ + "libm/src/math/ilogb.rs" + ], + "type": "f64" + }, + "ilogbf": { + "sources": [ + "libm/src/math/ilogbf.rs" + ], + "type": "f32" + }, + "j0": { + "sources": [ + "libm/src/math/j0.rs" + ], + "type": "f64" + }, + "j0f": { + "sources": [ + "libm/src/math/j0f.rs" + ], + "type": "f32" + }, + "j1": { + "sources": [ + "libm/src/math/j1.rs" + ], + "type": "f64" + }, + "j1f": { + "sources": [ + "libm/src/math/j1f.rs" + ], + "type": "f32" + }, + "jn": { + "sources": [ + "libm/src/math/jn.rs" + ], + "type": "f64" + }, + "jnf": { + "sources": [ + "libm/src/math/jnf.rs" + ], + "type": "f32" + }, + "ldexp": { + "sources": [ + "libm/src/math/ldexp.rs" + ], + "type": "f64" + }, + "ldexpf": { + "sources": [ + "libm/src/math/ldexp.rs" + ], + "type": "f32" + }, + "ldexpf128": { + "sources": [ + "libm/src/math/ldexp.rs" + ], + "type": "f128" + }, + "ldexpf16": { + "sources": [ + "libm/src/math/ldexp.rs" + ], + "type": "f16" + }, + "lgamma": { + "sources": [ + "libm/src/math/lgamma.rs" + ], + "type": "f64" + }, + "lgamma_r": { + "sources": [ + "libm/src/math/lgamma_r.rs" + ], + "type": "f64" + }, + "lgammaf": { + "sources": [ + "libm/src/math/lgammaf.rs" + ], + "type": "f32" + }, + "lgammaf_r": { + "sources": [ + "libm/src/math/lgammaf_r.rs" + ], + "type": "f32" + }, + "log": { + "sources": [ + "libm/src/math/log.rs" + ], + "type": "f64" + }, + "log10": { + "sources": [ + "libm/src/math/log10.rs" + ], + "type": "f64" + }, + "log10f": { + "sources": [ + "libm/src/math/log10f.rs" + ], + "type": "f32" + }, + "log1p": { + "sources": [ + "libm/src/math/log1p.rs" + ], + "type": "f64" + }, + "log1pf": { + "sources": [ + "libm/src/math/log1pf.rs" + ], + "type": "f32" + }, + "log2": { + "sources": [ + "libm/src/math/log2.rs" + ], + "type": "f64" + }, + "log2f": { + "sources": [ + "libm/src/math/log2f.rs" + ], + "type": "f32" + }, + "logf": { + "sources": [ + "libm/src/math/logf.rs" + ], + "type": "f32" + }, + "modf": { + "sources": [ + "libm/src/math/modf.rs" + ], + "type": "f64" + }, + "modff": { + "sources": [ + "libm/src/math/modff.rs" + ], + "type": "f32" + }, + "nextafter": { + "sources": [ + "libm/src/math/nextafter.rs" + ], + "type": "f64" + }, + "nextafterf": { + "sources": [ + "libm/src/math/nextafterf.rs" + ], + "type": "f32" + }, + "pow": { + "sources": [ + "libm/src/math/pow.rs" + ], + "type": "f64" + }, + "powf": { + "sources": [ + "libm/src/math/powf.rs" + ], + "type": "f32" + }, + "remainder": { + "sources": [ + "libm/src/math/remainder.rs" + ], + "type": "f64" + }, + "remainderf": { + "sources": [ + "libm/src/math/remainderf.rs" + ], + "type": "f32" + }, + "remquo": { + "sources": [ + "libm/src/math/remquo.rs" + ], + "type": "f64" + }, + "remquof": { + "sources": [ + "libm/src/math/remquof.rs" + ], + "type": "f32" + }, + "rint": { + "sources": [ + "libm/src/math/arch/aarch64.rs", + "libm/src/math/arch/wasm32.rs", + "libm/src/math/rint.rs" + ], + "type": "f64" + }, + "rintf": { + "sources": [ + "libm/src/math/arch/aarch64.rs", + "libm/src/math/arch/wasm32.rs", + "libm/src/math/rint.rs" + ], + "type": "f32" + }, + "rintf128": { + "sources": [ + "libm/src/math/rint.rs" + ], + "type": "f128" + }, + "rintf16": { + "sources": [ + "libm/src/math/arch/aarch64.rs", + "libm/src/math/rint.rs" + ], + "type": "f16" + }, + "round": { + "sources": [ + "libm/src/math/generic/round.rs", + "libm/src/math/round.rs" + ], + "type": "f64" + }, + "roundeven": { + "sources": [ + "libm/src/math/roundeven.rs" + ], + "type": "f64" + }, + "roundevenf": { + "sources": [ + "libm/src/math/roundeven.rs" + ], + "type": "f32" + }, + "roundevenf128": { + "sources": [ + "libm/src/math/roundeven.rs" + ], + "type": "f128" + }, + "roundevenf16": { + "sources": [ + "libm/src/math/roundeven.rs" + ], + "type": "f16" + }, + "roundf": { + "sources": [ + "libm/src/math/generic/round.rs", + "libm/src/math/round.rs" + ], + "type": "f32" + }, + "roundf128": { + "sources": [ + "libm/src/math/generic/round.rs", + "libm/src/math/round.rs" + ], + "type": "f128" + }, + "roundf16": { + "sources": [ + "libm/src/math/generic/round.rs", + "libm/src/math/round.rs" + ], + "type": "f16" + }, + "scalbn": { + "sources": [ + "libm/src/math/generic/scalbn.rs", + "libm/src/math/scalbn.rs" + ], + "type": "f64" + }, + "scalbnf": { + "sources": [ + "libm/src/math/generic/scalbn.rs", + "libm/src/math/scalbn.rs" + ], + "type": "f32" + }, + "scalbnf128": { + "sources": [ + "libm/src/math/generic/scalbn.rs", + "libm/src/math/scalbn.rs" + ], + "type": "f128" + }, + "scalbnf16": { + "sources": [ + "libm/src/math/generic/scalbn.rs", + "libm/src/math/scalbn.rs" + ], + "type": "f16" + }, + "sin": { + "sources": [ + "libm/src/math/sin.rs" + ], + "type": "f64" + }, + "sincos": { + "sources": [ + "libm/src/math/sincos.rs" + ], + "type": "f64" + }, + "sincosf": { + "sources": [ + "libm/src/math/sincosf.rs" + ], + "type": "f32" + }, + "sinf": { + "sources": [ + "libm/src/math/sinf.rs" + ], + "type": "f32" + }, + "sinh": { + "sources": [ + "libm/src/math/sinh.rs" + ], + "type": "f64" + }, + "sinhf": { + "sources": [ + "libm/src/math/sinhf.rs" + ], + "type": "f32" + }, + "sqrt": { + "sources": [ + "libm/src/math/arch/aarch64.rs", + "libm/src/math/arch/wasm32.rs", + "libm/src/math/arch/x86.rs", + "libm/src/math/generic/sqrt.rs", + "libm/src/math/sqrt.rs" + ], + "type": "f64" + }, + "sqrtf": { + "sources": [ + "libm/src/math/arch/aarch64.rs", + "libm/src/math/arch/wasm32.rs", + "libm/src/math/arch/x86.rs", + "libm/src/math/generic/sqrt.rs", + "libm/src/math/sqrt.rs" + ], + "type": "f32" + }, + "sqrtf128": { + "sources": [ + "libm/src/math/generic/sqrt.rs", + "libm/src/math/sqrt.rs" + ], + "type": "f128" + }, + "sqrtf16": { + "sources": [ + "libm/src/math/arch/aarch64.rs", + "libm/src/math/generic/sqrt.rs", + "libm/src/math/sqrt.rs" + ], + "type": "f16" + }, + "tan": { + "sources": [ + "libm/src/math/tan.rs" + ], + "type": "f64" + }, + "tanf": { + "sources": [ + "libm/src/math/tanf.rs" + ], + "type": "f32" + }, + "tanh": { + "sources": [ + "libm/src/math/tanh.rs" + ], + "type": "f64" + }, + "tanhf": { + "sources": [ + "libm/src/math/tanhf.rs" + ], + "type": "f32" + }, + "tgamma": { + "sources": [ + "libm/src/math/tgamma.rs" + ], + "type": "f64" + }, + "tgammaf": { + "sources": [ + "libm/src/math/tgammaf.rs" + ], + "type": "f32" + }, + "trunc": { + "sources": [ + "libm/src/math/arch/wasm32.rs", + "libm/src/math/generic/trunc.rs", + "libm/src/math/trunc.rs" + ], + "type": "f64" + }, + "truncf": { + "sources": [ + "libm/src/math/arch/wasm32.rs", + "libm/src/math/generic/trunc.rs", + "libm/src/math/trunc.rs" + ], + "type": "f32" + }, + "truncf128": { + "sources": [ + "libm/src/math/generic/trunc.rs", + "libm/src/math/trunc.rs" + ], + "type": "f128" + }, + "truncf16": { + "sources": [ + "libm/src/math/generic/trunc.rs", + "libm/src/math/trunc.rs" + ], + "type": "f16" + }, + "y0": { + "sources": [ + "libm/src/math/j0.rs" + ], + "type": "f64" + }, + "y0f": { + "sources": [ + "libm/src/math/j0f.rs" + ], + "type": "f32" + }, + "y1": { + "sources": [ + "libm/src/math/j1.rs" + ], + "type": "f64" + }, + "y1f": { + "sources": [ + "libm/src/math/j1f.rs" + ], + "type": "f32" + }, + "yn": { + "sources": [ + "libm/src/math/jn.rs" + ], + "type": "f64" + }, + "ynf": { + "sources": [ + "libm/src/math/jnf.rs" + ], + "type": "f32" + } +} diff --git a/library/compiler-builtins/etc/function-list.txt b/library/compiler-builtins/etc/function-list.txt new file mode 100644 index 0000000000000..1f226c8c0ff3b --- /dev/null +++ b/library/compiler-builtins/etc/function-list.txt @@ -0,0 +1,164 @@ +# autogenerated by update-api-list.py +acos +acosf +acosh +acoshf +asin +asinf +asinh +asinhf +atan +atan2 +atan2f +atanf +atanh +atanhf +cbrt +cbrtf +ceil +ceilf +ceilf128 +ceilf16 +copysign +copysignf +copysignf128 +copysignf16 +cos +cosf +cosh +coshf +erf +erfc +erfcf +erff +exp +exp10 +exp10f +exp2 +exp2f +expf +expm1 +expm1f +fabs +fabsf +fabsf128 +fabsf16 +fdim +fdimf +fdimf128 +fdimf16 +floor +floorf +floorf128 +floorf16 +fma +fmaf +fmaf128 +fmax +fmaxf +fmaxf128 +fmaxf16 +fmaximum +fmaximum_num +fmaximum_numf +fmaximum_numf128 +fmaximum_numf16 +fmaximumf +fmaximumf128 +fmaximumf16 +fmin +fminf +fminf128 +fminf16 +fminimum +fminimum_num +fminimum_numf +fminimum_numf128 +fminimum_numf16 +fminimumf +fminimumf128 +fminimumf16 +fmod +fmodf +fmodf128 +fmodf16 +frexp +frexpf +hypot +hypotf +ilogb +ilogbf +j0 +j0f +j1 +j1f +jn +jnf +ldexp +ldexpf +ldexpf128 +ldexpf16 +lgamma +lgamma_r +lgammaf +lgammaf_r +log +log10 +log10f +log1p +log1pf +log2 +log2f +logf +modf +modff +nextafter +nextafterf +pow +powf +remainder +remainderf +remquo +remquof +rint +rintf +rintf128 +rintf16 +round +roundeven +roundevenf +roundevenf128 +roundevenf16 +roundf +roundf128 +roundf16 +scalbn +scalbnf +scalbnf128 +scalbnf16 +sin +sincos +sincosf +sinf +sinh +sinhf +sqrt +sqrtf +sqrtf128 +sqrtf16 +tan +tanf +tanh +tanhf +tgamma +tgammaf +trunc +truncf +truncf128 +truncf16 +y0 +y0f +y1 +y1f +yn +ynf diff --git a/library/compiler-builtins/etc/update-api-list.py b/library/compiler-builtins/etc/update-api-list.py new file mode 100755 index 0000000000000..28ff22f4cbb3b --- /dev/null +++ b/library/compiler-builtins/etc/update-api-list.py @@ -0,0 +1,361 @@ +#!/usr/bin/env python3 +"""Create a text file listing all public API. This can be used to ensure that all +functions are covered by our macros. + +This file additionally does tidy-esque checks that all functions are listed where +needed, or that lists are sorted. +""" + +import difflib +import json +import re +import subprocess as sp +import sys +from dataclasses import dataclass +from glob import glob +from pathlib import Path +from typing import Any, Callable, TypeAlias + +SELF_PATH = Path(__file__) +ETC_DIR = SELF_PATH.parent +ROOT_DIR = ETC_DIR.parent + +# These files do not trigger a retest. +IGNORED_SOURCES = ["libm/src/libm_helper.rs", "libm/src/math/support/float_traits.rs"] + +IndexTy: TypeAlias = dict[str, dict[str, Any]] +"""Type of the `index` item in rustdoc's JSON output""" + + +def eprint(*args, **kwargs): + """Print to stderr.""" + print(*args, file=sys.stderr, **kwargs) + + +@dataclass +class Crate: + """Representation of public interfaces and function defintion locations in + `libm`. + """ + + public_functions: list[str] + """List of all public functions.""" + defs: dict[str, list[str]] + """Map from `name->[source files]` to find all places that define a public + function. We track this to know which tests need to be rerun when specific files + get updated. + """ + types: dict[str, str] + """Map from `name->type`.""" + + def __init__(self) -> None: + self.public_functions = [] + self.defs = {} + self.types = {} + + j = self.get_rustdoc_json() + index: IndexTy = j["index"] + self._init_function_list(index) + self._init_defs(index) + self._init_types() + + @staticmethod + def get_rustdoc_json() -> dict[Any, Any]: + """Get rustdoc's JSON output for the `libm` crate.""" + + j = sp.check_output( + [ + "rustdoc", + "libm/src/lib.rs", + "--edition=2021", + "--document-private-items", + "--output-format=json", + "--cfg=f16_enabled", + "--cfg=f128_enabled", + "-Zunstable-options", + "-o-", + ], + cwd=ROOT_DIR, + text=True, + ) + j = json.loads(j) + return j + + def _init_function_list(self, index: IndexTy) -> None: + """Get a list of public functions from rustdoc JSON output. + + Note that this only finds functions that are reexported in `lib.rs`, this will + need to be adjusted if we need to account for functions that are defined there, or + glob reexports in other locations. + """ + # Filter out items that are not public + public = [i for i in index.values() if i["visibility"] == "public"] + + # Collect a list of source IDs for reexported items in `lib.rs` or `mod math`. + use = (i for i in public if "use" in i["inner"]) + use = ( + i + for i in use + if i["span"]["filename"] in ["libm/src/math/mod.rs", "libm/src/lib.rs"] + ) + reexported_ids = [item["inner"]["use"]["id"] for item in use] + + # Collect a list of reexported items that are functions + for id in reexported_ids: + srcitem = index.get(str(id)) + # External crate + if srcitem is None: + continue + + # Skip if not a function + if "function" not in srcitem["inner"]: + continue + + self.public_functions.append(srcitem["name"]) + self.public_functions.sort() + + def _init_defs(self, index: IndexTy) -> None: + defs = {name: set() for name in self.public_functions} + funcs = (i for i in index.values() if "function" in i["inner"]) + funcs = (f for f in funcs if f["name"] in self.public_functions) + for func in funcs: + defs[func["name"]].add(func["span"]["filename"]) + + # A lot of the `arch` module is often configured out so doesn't show up in docs. Use + # string matching as a fallback. + for fname in glob( + "libm/src/math/arch/**/*.rs", root_dir=ROOT_DIR, recursive=True + ): + contents = (ROOT_DIR.joinpath(fname)).read_text() + + for name in self.public_functions: + if f"fn {name}" in contents: + defs[name].add(fname) + + for name, sources in defs.items(): + base_sources = defs[base_name(name)[0]] + for src in (s for s in base_sources if "generic" in s): + sources.add(src) + + for src in IGNORED_SOURCES: + sources.discard(src) + + # Sort the set + self.defs = {k: sorted(v) for (k, v) in defs.items()} + + def _init_types(self) -> None: + self.types = {name: base_name(name)[1] for name in self.public_functions} + + def write_function_list(self, check: bool) -> None: + """Collect the list of public functions to a simple text file.""" + output = "# autogenerated by update-api-list.py\n" + for name in self.public_functions: + output += f"{name}\n" + + out_file = ETC_DIR.joinpath("function-list.txt") + + if check: + with open(out_file, "r") as f: + current = f.read() + diff_and_exit(current, output, "function list") + else: + with open(out_file, "w") as f: + f.write(output) + + def write_function_defs(self, check: bool) -> None: + """Collect the list of information about public functions to a JSON file .""" + comment = ( + "Autogenerated by update-api-list.py. " + "List of files that define a function with a given name. " + "This file is checked in to make it obvious if refactoring breaks things" + ) + + d = {"__comment": comment} + d |= { + name: {"sources": self.defs[name], "type": self.types[name]} + for name in self.public_functions + } + + out_file = ETC_DIR.joinpath("function-definitions.json") + output = json.dumps(d, indent=4) + "\n" + + if check: + with open(out_file, "r") as f: + current = f.read() + diff_and_exit(current, output, "source list") + else: + with open(out_file, "w") as f: + f.write(output) + + def tidy_lists(self) -> None: + """In each file, check annotations indicating blocks of code should be sorted or should + include all public API. + """ + + flist = sp.check_output(["git", "ls-files"], cwd=ROOT_DIR, text=True) + + for path in flist.splitlines(): + fpath = ROOT_DIR.joinpath(path) + if fpath.is_dir() or fpath == SELF_PATH: + continue + + lines = fpath.read_text().splitlines() + + validate_delimited_block( + fpath, + lines, + "verify-sorted-start", + "verify-sorted-end", + ensure_sorted, + ) + + validate_delimited_block( + fpath, + lines, + "verify-apilist-start", + "verify-apilist-end", + lambda p, n, lines: self.ensure_contains_api(p, n, lines), + ) + + def ensure_contains_api(self, fpath: Path, line_num: int, lines: list[str]): + """Given a list of strings, ensure that each public function we have is named + somewhere. + """ + not_found = [] + for func in self.public_functions: + # The function name may be on its own or somewhere in a snake case string. + pat = re.compile(rf"(\b|_){func}(\b|_)") + found = next((line for line in lines if pat.search(line)), None) + + if found is None: + not_found.append(func) + + if len(not_found) == 0: + return + + relpath = fpath.relative_to(ROOT_DIR) + eprint(f"functions not found at {relpath}:{line_num}: {not_found}") + exit(1) + + +def validate_delimited_block( + fpath: Path, + lines: list[str], + start: str, + end: str, + validate: Callable[[Path, int, list[str]], None], +) -> None: + """Identify blocks of code wrapped within `start` and `end`, collect their contents + to a list of strings, and call `validate` for each of those lists. + """ + relpath = fpath.relative_to(ROOT_DIR) + block_lines = [] + block_start_line: None | int = None + for line_num, line in enumerate(lines): + line_num += 1 + + if start in line: + block_start_line = line_num + continue + + if end in line: + if block_start_line is None: + eprint(f"`{end}` without `{start}` at {relpath}:{line_num}") + exit(1) + + validate(fpath, block_start_line, block_lines) + block_lines = [] + block_start_line = None + continue + + if block_start_line is not None: + block_lines.append(line) + + if block_start_line is not None: + eprint(f"`{start}` without `{end}` at {relpath}:{block_start_line}") + exit(1) + + +def ensure_sorted(fpath: Path, block_start_line: int, lines: list[str]) -> None: + """Ensure that a list of lines is sorted, otherwise print a diff and exit.""" + relpath = fpath.relative_to(ROOT_DIR) + diff_and_exit( + "\n".join(lines), + "\n".join(sorted(lines)), + f"sorted block at {relpath}:{block_start_line}", + ) + + +def diff_and_exit(actual: str, expected: str, name: str): + """If the two strings are different, print a diff between them and then exit + with an error. + """ + if actual == expected: + print(f"{name} output matches expected; success") + return + + a = [f"{line}\n" for line in actual.splitlines()] + b = [f"{line}\n" for line in expected.splitlines()] + + diff = difflib.unified_diff(a, b, "actual", "expected") + sys.stdout.writelines(diff) + print(f"mismatched {name}") + exit(1) + + +def base_name(name: str) -> tuple[str, str]: + """Return the basename and type from a full function name. Keep in sync with Rust's + `fn base_name`. + """ + known_mappings = [ + ("erff", ("erf", "f32")), + ("erf", ("erf", "f64")), + ("modff", ("modf", "f32")), + ("modf", ("modf", "f64")), + ("lgammaf_r", ("lgamma_r", "f32")), + ("lgamma_r", ("lgamma_r", "f64")), + ] + + found = next((base for (full, base) in known_mappings if full == name), None) + if found is not None: + return found + + if name.endswith("f"): + return (name.rstrip("f"), "f32") + + if name.endswith("f16"): + return (name.rstrip("f16"), "f16") + + if name.endswith("f128"): + return (name.rstrip("f128"), "f128") + + return (name, "f64") + + +def ensure_updated_list(check: bool) -> None: + """Runner to update the function list and JSON, or check that it is already up + to date. + """ + crate = Crate() + crate.write_function_list(check) + crate.write_function_defs(check) + + crate.tidy_lists() + + +def main(): + """By default overwrite the file. If `--check` is passed, print a diff instead and + error if the files are different. + """ + match sys.argv: + case [_]: + ensure_updated_list(False) + case [_, "--check"]: + ensure_updated_list(True) + case _: + print("unrecognized arguments") + exit(1) + + +if __name__ == "__main__": + main() diff --git a/library/compiler-builtins/libm-test/Cargo.toml b/library/compiler-builtins/libm-test/Cargo.toml new file mode 100644 index 0000000000000..7a306e735577e --- /dev/null +++ b/library/compiler-builtins/libm-test/Cargo.toml @@ -0,0 +1,74 @@ +[package] +name = "libm-test" +version = "0.1.0" +edition = "2024" +publish = false +license = "MIT OR Apache-2.0" + +[features] +default = ["build-mpfr", "build-musl", "unstable-float"] + +# Propagated from libm because this affects which functions we test. +unstable-float = ["libm/unstable-float", "rug?/nightly-float"] + +# Generate tests which are random inputs and the outputs are calculated with +# musl libc. +build-mpfr = ["dep:rug", "dep:gmp-mpfr-sys"] + +# Build our own musl for testing and benchmarks +build-musl = ["dep:musl-math-sys"] + +# Enable report generation without bringing in more dependencies by default +benchmarking-reports = ["criterion/plotters", "criterion/html_reports"] + +# Enable icount benchmarks (requires iai-callgrind and valgrind) +icount = ["dep:iai-callgrind"] + +# Run with a reduced set of benchmarks, such as for CI +short-benchmarks = [] + +[dependencies] +anyhow = "1.0.97" +# This is not directly used but is required so we can enable `gmp-mpfr-sys/force-cross`. +gmp-mpfr-sys = { version = "1.6.4", optional = true, default-features = false } +iai-callgrind = { version = "0.14.0", optional = true } +indicatif = { version = "0.17.11", default-features = false } +libm = { path = "../libm", features = ["unstable-public-internals"] } +libm-macros = { path = "../crates/libm-macros" } +musl-math-sys = { path = "../crates/musl-math-sys", optional = true } +paste = "1.0.15" +rand = "0.9.0" +rand_chacha = "0.9.0" +rayon = "1.10.0" +rug = { version = "1.27.0", optional = true, default-features = false, features = ["float", "integer", "std"] } + +[target.'cfg(target_family = "wasm")'.dependencies] +getrandom = { version = "0.3.2", features = ["wasm_js"] } + +[build-dependencies] +rand = { version = "0.9.0", optional = true } + +[dev-dependencies] +criterion = { version = "0.5.1", default-features = false, features = ["cargo_bench_support"] } +libtest-mimic = "0.8.1" + +[[bench]] +name = "icount" +harness = false +required-features = ["icount"] + +[[bench]] +name = "random" +harness = false + +[[test]] +# No harness so that we can skip tests at runtime based on env. Prefixed with +# `z` so these tests get run last. +name = "z_extensive" +harness = false + +[lints.rust] +# Values from the chared config.rs used by `libm` but not the test crate +unexpected_cfgs = { level = "warn", check-cfg = [ + 'cfg(feature, values("arch", "force-soft-floats", "unstable-intrinsics"))', +] } diff --git a/library/compiler-builtins/libm-test/benches/icount.rs b/library/compiler-builtins/libm-test/benches/icount.rs new file mode 100644 index 0000000000000..da8c6bfd15a03 --- /dev/null +++ b/library/compiler-builtins/libm-test/benches/icount.rs @@ -0,0 +1,316 @@ +//! Benchmarks that use `iai-cachegrind` to be reasonably CI-stable. + +use std::hint::black_box; + +use iai_callgrind::{library_benchmark, library_benchmark_group, main}; +use libm::support::{HInt, u256}; +use libm_test::generate::spaced; +use libm_test::{CheckBasis, CheckCtx, GeneratorKind, MathOp, OpRustArgs, TupleCall, op}; + +const BENCH_ITER_ITEMS: u64 = 500; + +macro_rules! icount_benches { + ( + fn_name: $fn_name:ident, + attrs: [$($_attr:meta),*], + ) => { + paste::paste! { + // Construct benchmark inputs from the logspace generator. + fn [< setup_ $fn_name >]() -> Vec> { + type Op = op::$fn_name::Routine; + let mut ctx = CheckCtx::new( + Op::IDENTIFIER, + CheckBasis::None, + GeneratorKind::QuickSpaced + ); + ctx.override_iterations(BENCH_ITER_ITEMS); + let ret = spaced::get_test_cases::(&ctx).0.collect::>(); + println!("operation {}, {} steps", Op::NAME, ret.len()); + ret + } + + // Run benchmarks with the above inputs. + #[library_benchmark] + #[bench::logspace([< setup_ $fn_name >]())] + fn [< icount_bench_ $fn_name >](cases: Vec>) { + type Op = op::$fn_name::Routine; + let f = black_box(Op::ROUTINE); + for input in cases.iter().copied() { + input.call(f); + } + } + + library_benchmark_group!( + name = [< icount_bench_ $fn_name _group >]; + benchmarks = [< icount_bench_ $fn_name >] + ); + } + }; +} + +libm_macros::for_each_function! { + callback: icount_benches, +} + +fn setup_u128_mul() -> Vec<(u128, u128)> { + let step = u128::MAX / 300; + let mut x = 0u128; + let mut y = 0u128; + let mut v = Vec::new(); + + loop { + 'inner: loop { + match y.checked_add(step) { + Some(new) => y = new, + None => break 'inner, + } + + v.push((x, y)) + } + + match x.checked_add(step) { + Some(new) => x = new, + None => break, + } + } + + v +} + +fn setup_u256_add() -> Vec<(u256, u256)> { + let mut v = Vec::new(); + for (x, y) in setup_u128_mul() { + // square the u128 inputs to cover most of the u256 range + v.push((x.widen_mul(x), y.widen_mul(y))); + } + // Doesn't get covered by `u128:MAX^2` + v.push((u256::MAX, u256::MAX)); + v +} + +fn setup_u256_shift() -> Vec<(u256, u32)> { + let mut v = Vec::new(); + + for (x, _) in setup_u128_mul() { + let x2 = x.widen_mul(x); + for y in 0u32..256 { + v.push((x2, y)); + } + } + + v +} + +#[library_benchmark] +#[bench::linspace(setup_u128_mul())] +fn icount_bench_u128_widen_mul(cases: Vec<(u128, u128)>) { + for (x, y) in cases.iter().copied() { + black_box(black_box(x).zero_widen_mul(black_box(y))); + } +} + +library_benchmark_group!( + name = icount_bench_u128_widen_mul_group; + benchmarks = icount_bench_u128_widen_mul +); + +#[library_benchmark] +#[bench::linspace(setup_u256_add())] +fn icount_bench_u256_add(cases: Vec<(u256, u256)>) { + for (x, y) in cases.iter().copied() { + black_box(black_box(x) + black_box(y)); + } +} + +library_benchmark_group!( + name = icount_bench_u256_add_group; + benchmarks = icount_bench_u256_add +); + +#[library_benchmark] +#[bench::linspace(setup_u256_shift())] +fn icount_bench_u256_shr(cases: Vec<(u256, u32)>) { + for (x, y) in cases.iter().copied() { + black_box(black_box(x) >> black_box(y)); + } +} + +library_benchmark_group!( + name = icount_bench_u256_shr_group; + benchmarks = icount_bench_u256_shr +); + +main!( + library_benchmark_groups = + // u256-related benchmarks + icount_bench_u128_widen_mul_group, + icount_bench_u256_add_group, + icount_bench_u256_shr_group, + // verify-apilist-start + // verify-sorted-start + icount_bench_acos_group, + icount_bench_acosf_group, + icount_bench_acosh_group, + icount_bench_acoshf_group, + icount_bench_asin_group, + icount_bench_asinf_group, + icount_bench_asinh_group, + icount_bench_asinhf_group, + icount_bench_atan2_group, + icount_bench_atan2f_group, + icount_bench_atan_group, + icount_bench_atanf_group, + icount_bench_atanh_group, + icount_bench_atanhf_group, + icount_bench_cbrt_group, + icount_bench_cbrtf_group, + icount_bench_ceil_group, + icount_bench_ceilf128_group, + icount_bench_ceilf16_group, + icount_bench_ceilf_group, + icount_bench_copysign_group, + icount_bench_copysignf128_group, + icount_bench_copysignf16_group, + icount_bench_copysignf_group, + icount_bench_cos_group, + icount_bench_cosf_group, + icount_bench_cosh_group, + icount_bench_coshf_group, + icount_bench_erf_group, + icount_bench_erfc_group, + icount_bench_erfcf_group, + icount_bench_erff_group, + icount_bench_exp10_group, + icount_bench_exp10f_group, + icount_bench_exp2_group, + icount_bench_exp2f_group, + icount_bench_exp_group, + icount_bench_expf_group, + icount_bench_expm1_group, + icount_bench_expm1f_group, + icount_bench_fabs_group, + icount_bench_fabsf128_group, + icount_bench_fabsf16_group, + icount_bench_fabsf_group, + icount_bench_fdim_group, + icount_bench_fdimf128_group, + icount_bench_fdimf16_group, + icount_bench_fdimf_group, + icount_bench_floor_group, + icount_bench_floorf128_group, + icount_bench_floorf16_group, + icount_bench_floorf_group, + icount_bench_fma_group, + icount_bench_fmaf128_group, + icount_bench_fmaf_group, + icount_bench_fmax_group, + icount_bench_fmaxf128_group, + icount_bench_fmaxf16_group, + icount_bench_fmaxf_group, + icount_bench_fmaximum_group, + icount_bench_fmaximum_num_group, + icount_bench_fmaximum_numf128_group, + icount_bench_fmaximum_numf16_group, + icount_bench_fmaximum_numf_group, + icount_bench_fmaximumf128_group, + icount_bench_fmaximumf16_group, + icount_bench_fmaximumf_group, + icount_bench_fmin_group, + icount_bench_fminf128_group, + icount_bench_fminf16_group, + icount_bench_fminf_group, + icount_bench_fminimum_group, + icount_bench_fminimum_num_group, + icount_bench_fminimum_numf128_group, + icount_bench_fminimum_numf16_group, + icount_bench_fminimum_numf_group, + icount_bench_fminimumf128_group, + icount_bench_fminimumf16_group, + icount_bench_fminimumf_group, + icount_bench_fmod_group, + icount_bench_fmodf128_group, + icount_bench_fmodf16_group, + icount_bench_fmodf_group, + icount_bench_frexp_group, + icount_bench_frexpf_group, + icount_bench_hypot_group, + icount_bench_hypotf_group, + icount_bench_ilogb_group, + icount_bench_ilogbf_group, + icount_bench_j0_group, + icount_bench_j0f_group, + icount_bench_j1_group, + icount_bench_j1f_group, + icount_bench_jn_group, + icount_bench_jnf_group, + icount_bench_ldexp_group, + icount_bench_ldexpf128_group, + icount_bench_ldexpf16_group, + icount_bench_ldexpf_group, + icount_bench_lgamma_group, + icount_bench_lgamma_r_group, + icount_bench_lgammaf_group, + icount_bench_lgammaf_r_group, + icount_bench_log10_group, + icount_bench_log10f_group, + icount_bench_log1p_group, + icount_bench_log1pf_group, + icount_bench_log2_group, + icount_bench_log2f_group, + icount_bench_log_group, + icount_bench_logf_group, + icount_bench_modf_group, + icount_bench_modff_group, + icount_bench_nextafter_group, + icount_bench_nextafterf_group, + icount_bench_pow_group, + icount_bench_powf_group, + icount_bench_remainder_group, + icount_bench_remainderf_group, + icount_bench_remquo_group, + icount_bench_remquof_group, + icount_bench_rint_group, + icount_bench_rintf128_group, + icount_bench_rintf16_group, + icount_bench_rintf_group, + icount_bench_round_group, + icount_bench_roundeven_group, + icount_bench_roundevenf128_group, + icount_bench_roundevenf16_group, + icount_bench_roundevenf_group, + icount_bench_roundf128_group, + icount_bench_roundf16_group, + icount_bench_roundf_group, + icount_bench_scalbn_group, + icount_bench_scalbnf128_group, + icount_bench_scalbnf16_group, + icount_bench_scalbnf_group, + icount_bench_sin_group, + icount_bench_sincos_group, + icount_bench_sincosf_group, + icount_bench_sinf_group, + icount_bench_sinh_group, + icount_bench_sinhf_group, + icount_bench_sqrt_group, + icount_bench_sqrtf128_group, + icount_bench_sqrtf16_group, + icount_bench_sqrtf_group, + icount_bench_tan_group, + icount_bench_tanf_group, + icount_bench_tanh_group, + icount_bench_tanhf_group, + icount_bench_tgamma_group, + icount_bench_tgammaf_group, + icount_bench_trunc_group, + icount_bench_truncf128_group, + icount_bench_truncf16_group, + icount_bench_truncf_group, + icount_bench_y0_group, + icount_bench_y0f_group, + icount_bench_y1_group, + icount_bench_y1f_group, + icount_bench_yn_group, + icount_bench_ynf_group, + // verify-sorted-end + // verify-apilist-end +); diff --git a/library/compiler-builtins/libm-test/benches/random.rs b/library/compiler-builtins/libm-test/benches/random.rs new file mode 100644 index 0000000000000..1b17f049ecac2 --- /dev/null +++ b/library/compiler-builtins/libm-test/benches/random.rs @@ -0,0 +1,179 @@ +use std::hint::black_box; +use std::time::Duration; + +use criterion::{Criterion, criterion_main}; +use libm_test::generate::random; +use libm_test::generate::random::RandomInput; +use libm_test::{CheckBasis, CheckCtx, GeneratorKind, MathOp, TupleCall}; + +/// Benchmark with this many items to get a variety +const BENCH_ITER_ITEMS: usize = if cfg!(feature = "short-benchmarks") { + 50 +} else { + 500 +}; + +/// Extra parameters we only care about if we are benchmarking against musl. +#[allow(dead_code)] +struct MuslExtra { + musl_fn: Option, + skip_on_i586: bool, +} + +macro_rules! musl_rand_benches { + ( + fn_name: $fn_name:ident, + attrs: [$($attr:meta),*], + fn_extra: ($skip_on_i586:expr, $musl_fn:expr), + ) => { + paste::paste! { + $(#[$attr])* + fn [< musl_bench_ $fn_name >](c: &mut Criterion) { + type Op = libm_test::op::$fn_name::Routine; + + #[cfg(feature = "build-musl")] + let musl_extra = MuslExtra::> { + musl_fn: $musl_fn, + skip_on_i586: $skip_on_i586, + }; + + #[cfg(not(feature = "build-musl"))] + let musl_extra = MuslExtra { + musl_fn: None, + skip_on_i586: $skip_on_i586, + }; + + bench_one::(c, musl_extra); + } + } + }; +} + +fn bench_one(c: &mut Criterion, musl_extra: MuslExtra) +where + Op: MathOp, + Op::RustArgs: RandomInput, +{ + let name = Op::NAME; + + let ctx = CheckCtx::new(Op::IDENTIFIER, CheckBasis::Musl, GeneratorKind::Random); + let benchvec: Vec<_> = random::get_test_cases::(&ctx) + .0 + .take(BENCH_ITER_ITEMS) + .collect(); + + // Perform a sanity check that we are benchmarking the same thing + // Don't test against musl if it is not available + #[cfg(feature = "build-musl")] + for input in benchvec.iter().copied() { + use anyhow::Context; + use libm_test::CheckOutput; + + if cfg!(x86_no_sse) && musl_extra.skip_on_i586 { + break; + } + + let Some(musl_fn) = musl_extra.musl_fn else { + continue; + }; + let musl_res = input.call(musl_fn); + let crate_res = input.call(Op::ROUTINE); + + crate_res + .validate(musl_res, input, &ctx) + .context(name) + .unwrap(); + } + + #[cfg(not(feature = "build-musl"))] + let _ = musl_extra; // silence unused warnings + + /* Option pointers are black boxed to avoid inlining in the benchmark loop */ + + let mut group = c.benchmark_group(name); + group.bench_function("crate", |b| { + b.iter(|| { + let f = black_box(Op::ROUTINE); + for input in benchvec.iter().copied() { + input.call(f); + } + }) + }); + + // Don't test against musl if it is not available + #[cfg(feature = "build-musl")] + { + if let Some(musl_fn) = musl_extra.musl_fn { + group.bench_function("musl", |b| { + b.iter(|| { + let f = black_box(musl_fn); + for input in benchvec.iter().copied() { + input.call(f); + } + }) + }); + } + } +} + +libm_macros::for_each_function! { + callback: musl_rand_benches, + skip: [], + fn_extra: match MACRO_FN_NAME { + // We pass a tuple of `(skip_on_i586, musl_fn)` + + // FIXME(correctness): exp functions have the wrong result on i586 + exp10 | exp10f | exp2 | exp2f => (true, Some(musl_math_sys::MACRO_FN_NAME)), + + // Musl does not provide `f16` and `f128` functions, as well as a handful of others + fmaximum + | fmaximum_num + | fmaximum_numf + | fmaximumf + | fminimum + | fminimum_num + | fminimum_numf + | fminimumf + | roundeven + | roundevenf + | ALL_F16 + | ALL_F128 => (false, None), + + // By default we never skip (false) and always have a musl function available + _ => (false, Some(musl_math_sys::MACRO_FN_NAME)) + } +} + +macro_rules! run_callback { + ( + fn_name: $fn_name:ident, + attrs: [$($attr:meta),*], + extra: [$criterion:ident], + ) => { + paste::paste! { + $(#[$attr])* + [< musl_bench_ $fn_name >](&mut $criterion) + } + }; +} + +pub fn musl_random() { + let mut criterion = Criterion::default(); + + // For CI, run a short 0.5s warmup and 1.0s tests. This makes benchmarks complete in + // about the same time as other tests. + if cfg!(feature = "short-benchmarks") { + criterion = criterion + .warm_up_time(Duration::from_millis(200)) + .measurement_time(Duration::from_millis(600)); + } + + criterion = criterion.configure_from_args(); + + libm_macros::for_each_function! { + callback: run_callback, + extra: [criterion], + }; +} + +criterion_main!(musl_random); diff --git a/library/compiler-builtins/libm-test/build.rs b/library/compiler-builtins/libm-test/build.rs new file mode 100644 index 0000000000000..510ba842f10ab --- /dev/null +++ b/library/compiler-builtins/libm-test/build.rs @@ -0,0 +1,9 @@ +#[path = "../libm/configure.rs"] +mod configure; +use configure::Config; + +fn main() { + println!("cargo:rerun-if-changed=../libm/configure.rs"); + let cfg = Config::from_env(); + configure::emit_test_config(&cfg); +} diff --git a/library/compiler-builtins/libm-test/examples/plot_domains.rs b/library/compiler-builtins/libm-test/examples/plot_domains.rs new file mode 100644 index 0000000000000..3563103b8cd7c --- /dev/null +++ b/library/compiler-builtins/libm-test/examples/plot_domains.rs @@ -0,0 +1,109 @@ +//! Program to write all inputs from a generator to a file, then invoke a Julia script to plot +//! them. Output is in `target/plots`. +//! +//! Requires Julia with the `CairoMakie` dependency. +//! +//! Note that running in release mode by default generates a _lot_ more datapoints, which +//! causes plotting to be extremely slow (some simplification to be done in the script). + +use std::fmt::Write as _; +use std::io::{BufWriter, Write}; +use std::path::Path; +use std::process::Command; +use std::{env, fs}; + +use libm_test::generate::spaced::SpacedInput; +use libm_test::generate::{edge_cases, spaced}; +use libm_test::{CheckBasis, CheckCtx, GeneratorKind, MathOp, op}; + +const JL_PLOT: &str = "examples/plot_file.jl"; + +fn main() { + let manifest_env = env::var("CARGO_MANIFEST_DIR").unwrap(); + let manifest_dir = Path::new(&manifest_env); + let out_dir = manifest_dir.join("../../target/plots"); + if !out_dir.exists() { + fs::create_dir(&out_dir).unwrap(); + } + + let jl_script = manifest_dir.join(JL_PLOT); + let mut config = format!(r#"out_dir = "{}""#, out_dir.display()); + config.write_str("\n\n").unwrap(); + + // Plot a few domains with some functions that use them. + plot_one_operator::(&out_dir, &mut config); + plot_one_operator::(&out_dir, &mut config); + plot_one_operator::(&out_dir, &mut config); + + let config_path = out_dir.join("config.toml"); + fs::write(&config_path, config).unwrap(); + + // The script expects a path to `config.toml` to be passed as its only argument + let mut cmd = Command::new("julia"); + if cfg!(optimizations_enabled) { + cmd.arg("-O3"); + } + cmd.arg(jl_script).arg(config_path); + + println!("launching script... {cmd:?}"); + cmd.status().unwrap(); +} + +/// Run multiple generators for a single operator. +fn plot_one_operator(out_dir: &Path, config: &mut String) +where + Op: MathOp, + Op::RustArgs: SpacedInput, +{ + let mut ctx = CheckCtx::new(Op::IDENTIFIER, CheckBasis::Mpfr, GeneratorKind::QuickSpaced); + plot_one_generator( + out_dir, + &ctx, + "logspace", + config, + spaced::get_test_cases::(&ctx).0, + ); + ctx.gen_kind = GeneratorKind::EdgeCases; + plot_one_generator( + out_dir, + &ctx, + "edge_cases", + config, + edge_cases::get_test_cases::(&ctx).0, + ); +} + +/// Plot the output of a single generator. +fn plot_one_generator( + out_dir: &Path, + ctx: &CheckCtx, + gen_name: &str, + config: &mut String, + generator: impl Iterator, +) { + let fn_name = ctx.base_name_str; + let text_file = out_dir.join(format!("input-{fn_name}-{gen_name}.txt")); + + let f = fs::File::create(&text_file).unwrap(); + let mut w = BufWriter::new(f); + let mut count = 0u64; + + for input in generator { + writeln!(w, "{:e}", input.0).unwrap(); + count += 1; + } + + w.flush().unwrap(); + println!("generated {count} inputs for {fn_name}-{gen_name}"); + + writeln!( + config, + r#"[[input]] +function = "{fn_name}" +generator = "{gen_name}" +input_file = "{}" +"#, + text_file.to_str().unwrap() + ) + .unwrap() +} diff --git a/library/compiler-builtins/libm-test/examples/plot_file.jl b/library/compiler-builtins/libm-test/examples/plot_file.jl new file mode 100644 index 0000000000000..acffd97569f5c --- /dev/null +++ b/library/compiler-builtins/libm-test/examples/plot_file.jl @@ -0,0 +1,171 @@ +"A quick script for plotting a list of floats. + +Takes a path to a TOML file (Julia has builtin TOML support but not JSON) which +specifies a list of source files to plot. Plots are done with both a linear and +a log scale. + +Requires [Makie] (specifically CairoMakie) for plotting. + +[Makie]: https://docs.makie.org/stable/ +" + +using CairoMakie +using TOML + +function main()::Nothing + CairoMakie.activate!(px_per_unit = 10) + config_path = ARGS[1] + + cfg = Dict() + open(config_path, "r") do f + cfg = TOML.parse(f) + end + + out_dir = cfg["out_dir"] + for input in cfg["input"] + fn_name = input["function"] + gen_name = input["generator"] + input_file = input["input_file"] + + plot_one(input_file, out_dir, fn_name, gen_name) + end +end + +"Read inputs from a file, create both linear and log plots for one function" +function plot_one( + input_file::String, + out_dir::String, + fn_name::String, + gen_name::String, +)::Nothing + fig = Figure() + + lin_out_file = joinpath(out_dir, "plot-$fn_name-$gen_name.png") + log_out_file = joinpath(out_dir, "plot-$fn_name-$gen_name-log.png") + + # Map string function names to callable functions + if fn_name == "cos" + orig_func = cos + xlims = (-6.0, 6.0) + xlims_log = (-pi * 10, pi * 10) + elseif fn_name == "cbrt" + orig_func = cbrt + xlims = (-2.0, 2.0) + xlims_log = (-1000.0, 1000.0) + elseif fn_name == "sqrt" + orig_func = sqrt + xlims = (-1.1, 6.0) + xlims_log = (-1.1, 5000.0) + else + println("unrecognized function name `$fn_name`; update plot_file.jl") + exit(1) + end + + # Edge cases don't do much beyond +/-1, except for infinity. + if gen_name == "edge_cases" + xlims = (-1.1, 1.1) + xlims_log = (-1.1, 1.1) + end + + # Turn domain errors into NaN + func(x) = map_or(x, orig_func, NaN) + + # Parse a series of X values produced by the generator + inputs = readlines(input_file) + gen_x = map((v) -> parse(Float32, v), inputs) + + do_plot( + fig, + gen_x, + func, + xlims[1], + xlims[2], + "$fn_name $gen_name (linear scale)", + lin_out_file, + false, + ) + + do_plot( + fig, + gen_x, + func, + xlims_log[1], + xlims_log[2], + "$fn_name $gen_name (log scale)", + log_out_file, + true, + ) +end + +"Create a single plot" +function do_plot( + fig::Figure, + gen_x::Vector{F}, + func::Function, + xmin::AbstractFloat, + xmax::AbstractFloat, + title::String, + out_file::String, + logscale::Bool, +)::Nothing where {F<:AbstractFloat} + println("plotting $title") + + # `gen_x` is the values the generator produces. `actual_x` is for plotting a + # continuous function. + input_min = xmin - 1.0 + input_max = xmax + 1.0 + gen_x = filter((v) -> v >= input_min && v <= input_max, gen_x) + markersize = length(gen_x) < 10_000 ? 6.0 : 4.0 + + steps = 10_000 + if logscale + r = LinRange(symlog10(input_min), symlog10(input_max), steps) + actual_x = sympow10.(r) + xscale = Makie.pseudolog10 + else + actual_x = LinRange(input_min, input_max, steps) + xscale = identity + end + + gen_y = @. func(gen_x) + actual_y = @. func(actual_x) + + ax = Axis(fig[1, 1], xscale = xscale, title = title) + + lines!( + ax, + actual_x, + actual_y, + color = (:lightblue, 0.6), + linewidth = 6.0, + label = "true function", + ) + scatter!( + ax, + gen_x, + gen_y, + color = (:darkblue, 0.9), + markersize = markersize, + label = "checked inputs", + ) + axislegend(ax, position = :rb, framevisible = false) + + save(out_file, fig) + delete!(ax) +end + +"Apply a function, returning the default if there is a domain error" +function map_or(input::AbstractFloat, f::Function, default::Any)::Union{AbstractFloat,Any} + try + return f(input) + catch + return default + end +end + +# Operations for logarithms that are symmetric about 0 +C = 10 +symlog10(x::Number) = sign(x) * (log10(1 + abs(x) / (10^C))) +sympow10(x::Number) = (10^C) * (10^x - 1) + +main() diff --git a/library/compiler-builtins/libm-test/src/domain.rs b/library/compiler-builtins/libm-test/src/domain.rs new file mode 100644 index 0000000000000..94641be9b5483 --- /dev/null +++ b/library/compiler-builtins/libm-test/src/domain.rs @@ -0,0 +1,292 @@ +//! Traits and operations related to bounds of a function. + +use std::fmt; +use std::ops::Bound; + +use libm::support::Int; + +use crate::{BaseName, Float, FloatExt, Identifier}; + +/// Representation of a single dimension of a function's domain. +#[derive(Clone, Debug)] +pub struct Domain { + /// Start of the region for which a function is defined (ignoring poles). + pub start: Bound, + /// Endof the region for which a function is defined (ignoring poles). + pub end: Bound, + /// Additional points to check closer around. These can be e.g. undefined asymptotes or + /// inflection points. + pub check_points: Option BoxIter>, +} + +type BoxIter = Box>; + +impl Domain { + /// The start of this domain, saturating at negative infinity. + pub fn range_start(&self) -> F { + match self.start { + Bound::Included(v) => v, + Bound::Excluded(v) => v.next_up(), + Bound::Unbounded => F::NEG_INFINITY, + } + } + + /// The end of this domain, saturating at infinity. + pub fn range_end(&self) -> F { + match self.end { + Bound::Included(v) => v, + Bound::Excluded(v) => v.next_down(), + Bound::Unbounded => F::INFINITY, + } + } +} + +/// A value that may be any float type or any integer type. +#[derive(Clone, Debug)] +pub enum EitherPrim { + Float(F), + Int(I), +} + +impl EitherPrim { + pub fn unwrap_float(self) -> F { + match self { + EitherPrim::Float(f) => f, + EitherPrim::Int(_) => panic!("expected float; got {self:?}"), + } + } + + pub fn unwrap_int(self) -> I { + match self { + EitherPrim::Float(_) => panic!("expected int; got {self:?}"), + EitherPrim::Int(i) => i, + } + } +} + +/// Convenience 1-dimensional float domains. +impl Domain { + /// x ∈ ℝ + const UNBOUNDED: Self = Self { + start: Bound::Unbounded, + end: Bound::Unbounded, + check_points: None, + }; + + /// x ∈ ℝ >= 0 + const POSITIVE: Self = Self { + start: Bound::Included(F::ZERO), + end: Bound::Unbounded, + check_points: None, + }; + + /// x ∈ ℝ > 0 + const STRICTLY_POSITIVE: Self = Self { + start: Bound::Excluded(F::ZERO), + end: Bound::Unbounded, + check_points: None, + }; + + /// Wrap in the float variant of [`EitherPrim`]. + const fn into_prim_float(self) -> EitherPrim> { + EitherPrim::Float(self) + } +} + +/// Convenience 1-dimensional integer domains. +impl Domain { + /// x ∈ ℝ + const UNBOUNDED_INT: Self = Self { + start: Bound::Unbounded, + end: Bound::Unbounded, + check_points: None, + }; + + /// Wrap in the int variant of [`EitherPrim`]. + const fn into_prim_int(self) -> EitherPrim, Self> { + EitherPrim::Int(self) + } +} + +/// Multidimensional domains, represented as an array of 1-D domains. +impl EitherPrim, Domain> { + /// x ∈ ℝ + const UNBOUNDED1: [Self; 1] = [Domain { + start: Bound::Unbounded, + end: Bound::Unbounded, + check_points: None, + } + .into_prim_float()]; + + /// {x1, x2} ∈ ℝ + const UNBOUNDED2: [Self; 2] = [ + Domain::UNBOUNDED.into_prim_float(), + Domain::UNBOUNDED.into_prim_float(), + ]; + + /// {x1, x2, x3} ∈ ℝ + const UNBOUNDED3: [Self; 3] = [ + Domain::UNBOUNDED.into_prim_float(), + Domain::UNBOUNDED.into_prim_float(), + Domain::UNBOUNDED.into_prim_float(), + ]; + + /// {x1, x2} ∈ ℝ, one float and one int + const UNBOUNDED_F_I: [Self; 2] = [ + Domain::UNBOUNDED.into_prim_float(), + Domain::UNBOUNDED_INT.into_prim_int(), + ]; + + /// x ∈ ℝ >= 0 + const POSITIVE: [Self; 1] = [Domain::POSITIVE.into_prim_float()]; + + /// x ∈ ℝ > 0 + const STRICTLY_POSITIVE: [Self; 1] = [Domain::STRICTLY_POSITIVE.into_prim_float()]; + + /// Used for versions of `asin` and `acos`. + const INVERSE_TRIG_PERIODIC: [Self; 1] = [Domain { + start: Bound::Included(F::NEG_ONE), + end: Bound::Included(F::ONE), + check_points: None, + } + .into_prim_float()]; + + /// Domain for `acosh` + const ACOSH: [Self; 1] = [Domain { + start: Bound::Included(F::ONE), + end: Bound::Unbounded, + check_points: None, + } + .into_prim_float()]; + + /// Domain for `atanh` + const ATANH: [Self; 1] = [Domain { + start: Bound::Excluded(F::NEG_ONE), + end: Bound::Excluded(F::ONE), + check_points: None, + } + .into_prim_float()]; + + /// Domain for `sin`, `cos`, and `tan` + const TRIG: [Self; 1] = [Domain { + // Trig functions have special behavior at fractions of π. + check_points: Some(|| Box::new([-F::PI, -F::FRAC_PI_2, F::FRAC_PI_2, F::PI].into_iter())), + ..Domain::UNBOUNDED + } + .into_prim_float()]; + + /// Domain for `log` in various bases + const LOG: [Self; 1] = Self::STRICTLY_POSITIVE; + + /// Domain for `log1p` i.e. `log(1 + x)` + const LOG1P: [Self; 1] = [Domain { + start: Bound::Excluded(F::NEG_ONE), + end: Bound::Unbounded, + check_points: None, + } + .into_prim_float()]; + + /// Domain for `sqrt` + const SQRT: [Self; 1] = Self::POSITIVE; + + /// Domain for `gamma` + const GAMMA: [Self; 1] = [Domain { + check_points: Some(|| { + // Negative integers are asymptotes + Box::new((0..u8::MAX).map(|scale| { + let mut base = F::ZERO; + for _ in 0..scale { + base = base - F::ONE; + } + base + })) + }), + // Whether or not gamma is defined for negative numbers is implementation dependent + ..Domain::UNBOUNDED + } + .into_prim_float()]; + + /// Domain for `loggamma` + const LGAMMA: [Self; 1] = Self::STRICTLY_POSITIVE; + + /// Domain for `jn` and `yn`. + // FIXME: the domain should provide some sort of "reasonable range" so we don't actually test + // the entire system unbounded. + const BESSEL_N: [Self; 2] = [ + Domain::UNBOUNDED_INT.into_prim_int(), + Domain::UNBOUNDED.into_prim_float(), + ]; +} + +/// Get the domain for a given function. +pub fn get_domain( + id: Identifier, + argnum: usize, +) -> EitherPrim, Domain> { + let x = match id.base_name() { + BaseName::Acos => &EitherPrim::INVERSE_TRIG_PERIODIC[..], + BaseName::Acosh => &EitherPrim::ACOSH[..], + BaseName::Asin => &EitherPrim::INVERSE_TRIG_PERIODIC[..], + BaseName::Asinh => &EitherPrim::UNBOUNDED1[..], + BaseName::Atan => &EitherPrim::UNBOUNDED1[..], + BaseName::Atan2 => &EitherPrim::UNBOUNDED2[..], + BaseName::Cbrt => &EitherPrim::UNBOUNDED1[..], + BaseName::Atanh => &EitherPrim::ATANH[..], + BaseName::Ceil => &EitherPrim::UNBOUNDED1[..], + BaseName::Cosh => &EitherPrim::UNBOUNDED1[..], + BaseName::Copysign => &EitherPrim::UNBOUNDED2[..], + BaseName::Cos => &EitherPrim::TRIG[..], + BaseName::Exp => &EitherPrim::UNBOUNDED1[..], + BaseName::Erf => &EitherPrim::UNBOUNDED1[..], + BaseName::Erfc => &EitherPrim::UNBOUNDED1[..], + BaseName::Expm1 => &EitherPrim::UNBOUNDED1[..], + BaseName::Exp10 => &EitherPrim::UNBOUNDED1[..], + BaseName::Exp2 => &EitherPrim::UNBOUNDED1[..], + BaseName::Frexp => &EitherPrim::UNBOUNDED1[..], + BaseName::Fabs => &EitherPrim::UNBOUNDED1[..], + BaseName::Fdim => &EitherPrim::UNBOUNDED2[..], + BaseName::Floor => &EitherPrim::UNBOUNDED1[..], + BaseName::Fma => &EitherPrim::UNBOUNDED3[..], + BaseName::Fmax => &EitherPrim::UNBOUNDED2[..], + BaseName::Fmaximum => &EitherPrim::UNBOUNDED2[..], + BaseName::FmaximumNum => &EitherPrim::UNBOUNDED2[..], + BaseName::Fmin => &EitherPrim::UNBOUNDED2[..], + BaseName::Fminimum => &EitherPrim::UNBOUNDED2[..], + BaseName::FminimumNum => &EitherPrim::UNBOUNDED2[..], + BaseName::Fmod => &EitherPrim::UNBOUNDED2[..], + BaseName::Hypot => &EitherPrim::UNBOUNDED2[..], + BaseName::Ilogb => &EitherPrim::UNBOUNDED1[..], + BaseName::J0 => &EitherPrim::UNBOUNDED1[..], + BaseName::J1 => &EitherPrim::UNBOUNDED1[..], + BaseName::Jn => &EitherPrim::BESSEL_N[..], + BaseName::Ldexp => &EitherPrim::UNBOUNDED_F_I[..], + BaseName::Lgamma => &EitherPrim::LGAMMA[..], + BaseName::LgammaR => &EitherPrim::LGAMMA[..], + BaseName::Log => &EitherPrim::LOG[..], + BaseName::Log10 => &EitherPrim::LOG[..], + BaseName::Log1p => &EitherPrim::LOG1P[..], + BaseName::Log2 => &EitherPrim::LOG[..], + BaseName::Modf => &EitherPrim::UNBOUNDED1[..], + BaseName::Nextafter => &EitherPrim::UNBOUNDED2[..], + BaseName::Pow => &EitherPrim::UNBOUNDED2[..], + BaseName::Remainder => &EitherPrim::UNBOUNDED2[..], + BaseName::Remquo => &EitherPrim::UNBOUNDED2[..], + BaseName::Rint => &EitherPrim::UNBOUNDED1[..], + BaseName::Round => &EitherPrim::UNBOUNDED1[..], + BaseName::Roundeven => &EitherPrim::UNBOUNDED1[..], + BaseName::Scalbn => &EitherPrim::UNBOUNDED_F_I[..], + BaseName::Sin => &EitherPrim::TRIG[..], + BaseName::Sincos => &EitherPrim::TRIG[..], + BaseName::Sinh => &EitherPrim::UNBOUNDED1[..], + BaseName::Sqrt => &EitherPrim::SQRT[..], + BaseName::Tan => &EitherPrim::TRIG[..], + BaseName::Tanh => &EitherPrim::UNBOUNDED1[..], + BaseName::Tgamma => &EitherPrim::GAMMA[..], + BaseName::Trunc => &EitherPrim::UNBOUNDED1[..], + BaseName::Y0 => &EitherPrim::UNBOUNDED1[..], + BaseName::Y1 => &EitherPrim::UNBOUNDED1[..], + BaseName::Yn => &EitherPrim::BESSEL_N[..], + }; + + x[argnum].clone() +} diff --git a/library/compiler-builtins/libm-test/src/f8_impl.rs b/library/compiler-builtins/libm-test/src/f8_impl.rs new file mode 100644 index 0000000000000..905c7d7fde92a --- /dev/null +++ b/library/compiler-builtins/libm-test/src/f8_impl.rs @@ -0,0 +1,505 @@ +//! An IEEE-compliant 8-bit float type for testing purposes. + +use std::cmp::{self, Ordering}; +use std::{fmt, ops}; + +use crate::Float; + +/// Sometimes verifying float logic is easiest when all values can quickly be checked exhaustively +/// or by hand. +/// +/// IEEE-754 compliant type that includes a 1 bit sign, 4 bit exponent, and 3 bit significand. +/// Bias is -7. +/// +/// Based on . +#[derive(Clone, Copy)] +#[repr(transparent)] +#[allow(non_camel_case_types)] +pub struct f8(u8); + +impl Float for f8 { + type Int = u8; + type SignedInt = i8; + + const ZERO: Self = Self(0b0_0000_000); + const NEG_ZERO: Self = Self(0b1_0000_000); + const ONE: Self = Self(0b0_0111_000); + const NEG_ONE: Self = Self(0b1_0111_000); + const MAX: Self = Self(0b0_1110_111); + const MIN: Self = Self(0b1_1110_111); + const INFINITY: Self = Self(0b0_1111_000); + const NEG_INFINITY: Self = Self(0b1_1111_000); + const NAN: Self = Self(0b0_1111_100); + const NEG_NAN: Self = Self(0b1_1111_100); + const MIN_POSITIVE_NORMAL: Self = Self(1 << Self::SIG_BITS); + // FIXME: incorrect values + const EPSILON: Self = Self::ZERO; + const PI: Self = Self::ZERO; + const NEG_PI: Self = Self::ZERO; + const FRAC_PI_2: Self = Self::ZERO; + + const BITS: u32 = 8; + const SIG_BITS: u32 = 3; + const SIGN_MASK: Self::Int = 0b1_0000_000; + const SIG_MASK: Self::Int = 0b0_0000_111; + const EXP_MASK: Self::Int = 0b0_1111_000; + const IMPLICIT_BIT: Self::Int = 0b0_0001_000; + + fn to_bits(self) -> Self::Int { + self.0 + } + + fn to_bits_signed(self) -> Self::SignedInt { + self.0 as i8 + } + + fn is_nan(self) -> bool { + self.0 & Self::EXP_MASK == Self::EXP_MASK && self.0 & Self::SIG_MASK != 0 + } + + fn is_infinite(self) -> bool { + self.0 & Self::EXP_MASK == Self::EXP_MASK && self.0 & Self::SIG_MASK == 0 + } + + fn is_sign_negative(self) -> bool { + self.0 & Self::SIGN_MASK != 0 + } + + fn from_bits(a: Self::Int) -> Self { + Self(a) + } + + fn abs(self) -> Self { + libm::generic::fabs(self) + } + + fn copysign(self, other: Self) -> Self { + libm::generic::copysign(self, other) + } + + fn fma(self, _y: Self, _z: Self) -> Self { + unimplemented!() + } + + fn normalize(_significand: Self::Int) -> (i32, Self::Int) { + unimplemented!() + } +} + +impl f8 { + pub const ALL_LEN: usize = 240; + + /// All non-infinite non-NaN values of `f8` + pub const ALL: [Self; Self::ALL_LEN] = [ + // -m*2^7 + Self(0b1_1110_111), // -240 + Self(0b1_1110_110), + Self(0b1_1110_101), + Self(0b1_1110_100), + Self(0b1_1110_011), + Self(0b1_1110_010), + Self(0b1_1110_001), + Self(0b1_1110_000), // -128 + // -m*2^6 + Self(0b1_1101_111), // -120 + Self(0b1_1101_110), + Self(0b1_1101_101), + Self(0b1_1101_100), + Self(0b1_1101_011), + Self(0b1_1101_010), + Self(0b1_1101_001), + Self(0b1_1101_000), // -64 + // -m*2^5 + Self(0b1_1100_111), // -60 + Self(0b1_1100_110), + Self(0b1_1100_101), + Self(0b1_1100_100), + Self(0b1_1100_011), + Self(0b1_1100_010), + Self(0b1_1100_001), + Self(0b1_1100_000), // -32 + // -m*2^4 + Self(0b1_1011_111), // -30 + Self(0b1_1011_110), + Self(0b1_1011_101), + Self(0b1_1011_100), + Self(0b1_1011_011), + Self(0b1_1011_010), + Self(0b1_1011_001), + Self(0b1_1011_000), // -16 + // -m*2^3 + Self(0b1_1010_111), // -15 + Self(0b1_1010_110), + Self(0b1_1010_101), + Self(0b1_1010_100), + Self(0b1_1010_011), + Self(0b1_1010_010), + Self(0b1_1010_001), + Self(0b1_1010_000), // -8 + // -m*2^2 + Self(0b1_1001_111), // -7.5 + Self(0b1_1001_110), + Self(0b1_1001_101), + Self(0b1_1001_100), + Self(0b1_1001_011), + Self(0b1_1001_010), + Self(0b1_1001_001), + Self(0b1_1001_000), // -4 + // -m*2^1 + Self(0b1_1000_111), // -3.75 + Self(0b1_1000_110), + Self(0b1_1000_101), + Self(0b1_1000_100), + Self(0b1_1000_011), + Self(0b1_1000_010), + Self(0b1_1000_001), + Self(0b1_1000_000), // -2 + // -m*2^0 + Self(0b1_0111_111), // -1.875 + Self(0b1_0111_110), + Self(0b1_0111_101), + Self(0b1_0111_100), + Self(0b1_0111_011), + Self(0b1_0111_010), + Self(0b1_0111_001), + Self(0b1_0111_000), // -1 + // -m*2^-1 + Self(0b1_0110_111), // −0.9375 + Self(0b1_0110_110), + Self(0b1_0110_101), + Self(0b1_0110_100), + Self(0b1_0110_011), + Self(0b1_0110_010), + Self(0b1_0110_001), + Self(0b1_0110_000), // -0.5 + // -m*2^-2 + Self(0b1_0101_111), // −0.46875 + Self(0b1_0101_110), + Self(0b1_0101_101), + Self(0b1_0101_100), + Self(0b1_0101_011), + Self(0b1_0101_010), + Self(0b1_0101_001), + Self(0b1_0101_000), // -0.25 + // -m*2^-3 + Self(0b1_0100_111), // −0.234375 + Self(0b1_0100_110), + Self(0b1_0100_101), + Self(0b1_0100_100), + Self(0b1_0100_011), + Self(0b1_0100_010), + Self(0b1_0100_001), + Self(0b1_0100_000), // -0.125 + // -m*2^-4 + Self(0b1_0011_111), // −0.1171875 + Self(0b1_0011_110), + Self(0b1_0011_101), + Self(0b1_0011_100), + Self(0b1_0011_011), + Self(0b1_0011_010), + Self(0b1_0011_001), + Self(0b1_0011_000), // −0.0625 + // -m*2^-5 + Self(0b1_0010_111), // −0.05859375 + Self(0b1_0010_110), + Self(0b1_0010_101), + Self(0b1_0010_100), + Self(0b1_0010_011), + Self(0b1_0010_010), + Self(0b1_0010_001), + Self(0b1_0010_000), // −0.03125 + // -m*2^-6 + Self(0b1_0001_111), // −0.029296875 + Self(0b1_0001_110), + Self(0b1_0001_101), + Self(0b1_0001_100), + Self(0b1_0001_011), + Self(0b1_0001_010), + Self(0b1_0001_001), + Self(0b1_0001_000), // −0.015625 + // -m*2^-7 subnormal numbers + Self(0b1_0000_111), // −0.013671875 + Self(0b1_0000_110), + Self(0b1_0000_101), + Self(0b1_0000_100), + Self(0b1_0000_011), + Self(0b1_0000_010), + Self(0b1_0000_001), // −0.001953125 + // Zeroes + Self(0b1_0000_000), // -0.0 + Self(0b0_0000_000), // 0.0 + // m*2^-7 // subnormal numbers + Self(0b0_0000_001), + Self(0b0_0000_010), + Self(0b0_0000_011), + Self(0b0_0000_100), + Self(0b0_0000_101), + Self(0b0_0000_110), + Self(0b0_0000_111), // 0.013671875 + // m*2^-6 + Self(0b0_0001_000), // 0.015625 + Self(0b0_0001_001), + Self(0b0_0001_010), + Self(0b0_0001_011), + Self(0b0_0001_100), + Self(0b0_0001_101), + Self(0b0_0001_110), + Self(0b0_0001_111), // 0.029296875 + // m*2^-5 + Self(0b0_0010_000), // 0.03125 + Self(0b0_0010_001), + Self(0b0_0010_010), + Self(0b0_0010_011), + Self(0b0_0010_100), + Self(0b0_0010_101), + Self(0b0_0010_110), + Self(0b0_0010_111), // 0.05859375 + // m*2^-4 + Self(0b0_0011_000), // 0.0625 + Self(0b0_0011_001), + Self(0b0_0011_010), + Self(0b0_0011_011), + Self(0b0_0011_100), + Self(0b0_0011_101), + Self(0b0_0011_110), + Self(0b0_0011_111), // 0.1171875 + // m*2^-3 + Self(0b0_0100_000), // 0.125 + Self(0b0_0100_001), + Self(0b0_0100_010), + Self(0b0_0100_011), + Self(0b0_0100_100), + Self(0b0_0100_101), + Self(0b0_0100_110), + Self(0b0_0100_111), // 0.234375 + // m*2^-2 + Self(0b0_0101_000), // 0.25 + Self(0b0_0101_001), + Self(0b0_0101_010), + Self(0b0_0101_011), + Self(0b0_0101_100), + Self(0b0_0101_101), + Self(0b0_0101_110), + Self(0b0_0101_111), // 0.46875 + // m*2^-1 + Self(0b0_0110_000), // 0.5 + Self(0b0_0110_001), + Self(0b0_0110_010), + Self(0b0_0110_011), + Self(0b0_0110_100), + Self(0b0_0110_101), + Self(0b0_0110_110), + Self(0b0_0110_111), // 0.9375 + // m*2^0 + Self(0b0_0111_000), // 1 + Self(0b0_0111_001), + Self(0b0_0111_010), + Self(0b0_0111_011), + Self(0b0_0111_100), + Self(0b0_0111_101), + Self(0b0_0111_110), + Self(0b0_0111_111), // 1.875 + // m*2^1 + Self(0b0_1000_000), // 2 + Self(0b0_1000_001), + Self(0b0_1000_010), + Self(0b0_1000_011), + Self(0b0_1000_100), + Self(0b0_1000_101), + Self(0b0_1000_110), + Self(0b0_1000_111), // 3.75 + // m*2^2 + Self(0b0_1001_000), // 4 + Self(0b0_1001_001), + Self(0b0_1001_010), + Self(0b0_1001_011), + Self(0b0_1001_100), + Self(0b0_1001_101), + Self(0b0_1001_110), + Self(0b0_1001_111), // 7.5 + // m*2^3 + Self(0b0_1010_000), // 8 + Self(0b0_1010_001), + Self(0b0_1010_010), + Self(0b0_1010_011), + Self(0b0_1010_100), + Self(0b0_1010_101), + Self(0b0_1010_110), + Self(0b0_1010_111), // 15 + // m*2^4 + Self(0b0_1011_000), // 16 + Self(0b0_1011_001), + Self(0b0_1011_010), + Self(0b0_1011_011), + Self(0b0_1011_100), + Self(0b0_1011_101), + Self(0b0_1011_110), + Self(0b0_1011_111), // 30 + // m*2^5 + Self(0b0_1100_000), // 32 + Self(0b0_1100_001), + Self(0b0_1100_010), + Self(0b0_1100_011), + Self(0b0_1100_100), + Self(0b0_1100_101), + Self(0b0_1100_110), + Self(0b0_1100_111), // 60 + // m*2^6 + Self(0b0_1101_000), // 64 + Self(0b0_1101_001), + Self(0b0_1101_010), + Self(0b0_1101_011), + Self(0b0_1101_100), + Self(0b0_1101_101), + Self(0b0_1101_110), + Self(0b0_1101_111), // 120 + // m*2^7 + Self(0b0_1110_000), // 128 + Self(0b0_1110_001), + Self(0b0_1110_010), + Self(0b0_1110_011), + Self(0b0_1110_100), + Self(0b0_1110_101), + Self(0b0_1110_110), + Self(0b0_1110_111), // 240 + ]; +} + +impl ops::Add for f8 { + type Output = Self; + fn add(self, _rhs: Self) -> Self::Output { + unimplemented!() + } +} + +impl ops::Sub for f8 { + type Output = Self; + fn sub(self, _rhs: Self) -> Self::Output { + unimplemented!() + } +} +impl ops::Mul for f8 { + type Output = Self; + fn mul(self, _rhs: Self) -> Self::Output { + unimplemented!() + } +} +impl ops::Div for f8 { + type Output = Self; + fn div(self, _rhs: Self) -> Self::Output { + unimplemented!() + } +} + +impl ops::Neg for f8 { + type Output = Self; + fn neg(self) -> Self::Output { + Self(self.0 ^ Self::SIGN_MASK) + } +} + +impl ops::Rem for f8 { + type Output = Self; + fn rem(self, _rhs: Self) -> Self::Output { + unimplemented!() + } +} + +impl ops::AddAssign for f8 { + fn add_assign(&mut self, _rhs: Self) { + unimplemented!() + } +} + +impl ops::SubAssign for f8 { + fn sub_assign(&mut self, _rhs: Self) { + unimplemented!() + } +} + +impl ops::MulAssign for f8 { + fn mul_assign(&mut self, _rhs: Self) { + unimplemented!() + } +} + +impl cmp::PartialEq for f8 { + fn eq(&self, other: &Self) -> bool { + if self.is_nan() || other.is_nan() { + false + } else if self.abs().to_bits() | other.abs().to_bits() == 0 { + true + } else { + self.0 == other.0 + } + } +} +impl cmp::PartialOrd for f8 { + fn partial_cmp(&self, other: &Self) -> Option { + let inf_rep = f8::EXP_MASK; + + let a_abs = self.abs().to_bits(); + let b_abs = other.abs().to_bits(); + + // If either a or b is NaN, they are unordered. + if a_abs > inf_rep || b_abs > inf_rep { + return None; + } + + // If a and b are both zeros, they are equal. + if a_abs | b_abs == 0 { + return Some(Ordering::Equal); + } + + let a_srep = self.to_bits_signed(); + let b_srep = other.to_bits_signed(); + let res = a_srep.cmp(&b_srep); + + if a_srep & b_srep >= 0 { + // If at least one of a and b is positive, we get the same result comparing + // a and b as signed integers as we would with a fp_ting-point compare. + Some(res) + } else { + // Otherwise, both are negative, so we need to flip the sense of the + // comparison to get the correct result. + Some(res.reverse()) + } + } +} +impl fmt::Display for f8 { + fn fmt(&self, _f: &mut fmt::Formatter<'_>) -> fmt::Result { + unimplemented!() + } +} + +impl fmt::Debug for f8 { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Binary::fmt(self, f) + } +} + +impl fmt::Binary for f8 { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let v = self.0; + write!( + f, + "0b{:b}_{:04b}_{:03b}", + v >> 7, + (v & Self::EXP_MASK) >> Self::SIG_BITS, + v & Self::SIG_MASK + ) + } +} + +impl fmt::LowerHex for f8 { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.0.fmt(f) + } +} + +pub const fn hf8(s: &str) -> f8 { + let Ok(bits) = libm::support::hex_float::parse_hex_exact(s, 8, 3) else { + panic!() + }; + f8(bits as u8) +} diff --git a/library/compiler-builtins/libm-test/src/generate.rs b/library/compiler-builtins/libm-test/src/generate.rs new file mode 100644 index 0000000000000..da080d23fa79c --- /dev/null +++ b/library/compiler-builtins/libm-test/src/generate.rs @@ -0,0 +1,50 @@ +//! Different generators that can create random or systematic bit patterns. + +pub mod case_list; +pub mod edge_cases; +pub mod random; +pub mod spaced; + +/// A wrapper to turn any iterator into an `ExactSizeIterator`. Asserts the final result to ensure +/// the provided size was correct. +#[derive(Debug)] +pub struct KnownSize { + total: u64, + current: u64, + iter: I, +} + +impl KnownSize { + pub fn new(iter: I, total: u64) -> Self { + Self { + total, + current: 0, + iter, + } + } +} + +impl Iterator for KnownSize { + type Item = I::Item; + + fn next(&mut self) -> Option { + let next = self.iter.next(); + if next.is_some() { + self.current += 1; + return next; + } + + assert_eq!( + self.current, self.total, + "total items did not match expected" + ); + None + } + + fn size_hint(&self) -> (usize, Option) { + let remaining = usize::try_from(self.total - self.current).unwrap(); + (remaining, Some(remaining)) + } +} + +impl ExactSizeIterator for KnownSize {} diff --git a/library/compiler-builtins/libm-test/src/generate/case_list.rs b/library/compiler-builtins/libm-test/src/generate/case_list.rs new file mode 100644 index 0000000000000..43b28722f2dd2 --- /dev/null +++ b/library/compiler-builtins/libm-test/src/generate/case_list.rs @@ -0,0 +1,896 @@ +//! Test cases to verify specific values. +//! +//! Each routine can have a set of inputs and, optinoally, outputs. If an output is provided, it +//! will be used to check against. If only inputs are provided, the case will be checked against +//! a basis. +//! +//! This is useful for adding regression tests or expected failures. + +use libm::hf64; +#[cfg(f128_enabled)] +use libm::hf128; + +use crate::{CheckBasis, CheckCtx, GeneratorKind, MathOp, op}; + +pub struct TestCase { + pub input: Op::RustArgs, + pub output: Option, +} + +impl TestCase { + #[expect(dead_code)] + fn append_inputs(v: &mut Vec, l: &[Op::RustArgs]) { + v.extend(l.iter().copied().map(|input| Self { + input, + output: None, + })); + } + + fn append_pairs(v: &mut Vec, l: &[(Op::RustArgs, Option)]) + where + Op::RustRet: Copy, + { + v.extend( + l.iter() + .copied() + .map(|(input, output)| Self { input, output }), + ); + } +} + +fn acos_cases() -> Vec> { + vec![] +} + +fn acosf_cases() -> Vec> { + vec![] +} + +fn acosh_cases() -> Vec> { + vec![] +} + +fn acoshf_cases() -> Vec> { + vec![] +} + +fn asin_cases() -> Vec> { + vec![] +} + +fn asinf_cases() -> Vec> { + vec![] +} + +fn asinh_cases() -> Vec> { + vec![] +} + +fn asinhf_cases() -> Vec> { + vec![] +} + +fn atan_cases() -> Vec> { + vec![] +} + +fn atan2_cases() -> Vec> { + vec![] +} + +fn atan2f_cases() -> Vec> { + vec![] +} + +fn atanf_cases() -> Vec> { + vec![] +} + +fn atanh_cases() -> Vec> { + vec![] +} + +fn atanhf_cases() -> Vec> { + vec![] +} + +fn cbrt_cases() -> Vec> { + vec![] +} + +fn cbrtf_cases() -> Vec> { + vec![] +} + +fn ceil_cases() -> Vec> { + vec![] +} + +fn ceilf_cases() -> Vec> { + vec![] +} + +#[cfg(f128_enabled)] +fn ceilf128_cases() -> Vec> { + vec![] +} + +#[cfg(f16_enabled)] +fn ceilf16_cases() -> Vec> { + vec![] +} + +fn copysign_cases() -> Vec> { + vec![] +} + +fn copysignf_cases() -> Vec> { + vec![] +} + +#[cfg(f128_enabled)] +fn copysignf128_cases() -> Vec> { + vec![] +} + +#[cfg(f16_enabled)] +fn copysignf16_cases() -> Vec> { + vec![] +} + +fn cos_cases() -> Vec> { + vec![] +} + +fn cosf_cases() -> Vec> { + vec![] +} + +fn cosh_cases() -> Vec> { + vec![] +} + +fn coshf_cases() -> Vec> { + vec![] +} + +fn erf_cases() -> Vec> { + vec![] +} + +fn erfc_cases() -> Vec> { + vec![] +} + +fn erfcf_cases() -> Vec> { + vec![] +} + +fn erff_cases() -> Vec> { + vec![] +} + +fn exp_cases() -> Vec> { + vec![] +} + +fn exp10_cases() -> Vec> { + vec![] +} + +fn exp10f_cases() -> Vec> { + vec![] +} + +fn exp2_cases() -> Vec> { + vec![] +} + +fn exp2f_cases() -> Vec> { + vec![] +} + +fn expf_cases() -> Vec> { + vec![] +} + +fn expm1_cases() -> Vec> { + vec![] +} + +fn expm1f_cases() -> Vec> { + vec![] +} + +fn fabs_cases() -> Vec> { + vec![] +} + +fn fabsf_cases() -> Vec> { + vec![] +} + +#[cfg(f128_enabled)] +fn fabsf128_cases() -> Vec> { + vec![] +} + +#[cfg(f16_enabled)] +fn fabsf16_cases() -> Vec> { + vec![] +} + +fn fdim_cases() -> Vec> { + vec![] +} + +fn fdimf_cases() -> Vec> { + vec![] +} + +#[cfg(f128_enabled)] +fn fdimf128_cases() -> Vec> { + vec![] +} + +#[cfg(f16_enabled)] +fn fdimf16_cases() -> Vec> { + vec![] +} + +fn floor_cases() -> Vec> { + vec![] +} + +fn floorf_cases() -> Vec> { + vec![] +} + +#[cfg(f128_enabled)] +fn floorf128_cases() -> Vec> { + vec![] +} + +#[cfg(f16_enabled)] +fn floorf16_cases() -> Vec> { + vec![] +} + +fn fma_cases() -> Vec> { + let mut v = vec![]; + TestCase::append_pairs( + &mut v, + &[ + // Previous failure with incorrect sign + ((5e-324, -5e-324, 0.0), Some(-0.0)), + ], + ); + v +} + +fn fmaf_cases() -> Vec> { + let mut v = vec![]; + TestCase::append_pairs( + &mut v, + &[ + // Known rounding error for some implementations (notably MinGW) + ( + (-1.9369631e13f32, 2.1513551e-7, -1.7354427e-24), + Some(-4167095.8), + ), + ], + ); + v +} + +#[cfg(f128_enabled)] +fn fmaf128_cases() -> Vec> { + let mut v = vec![]; + TestCase::append_pairs( + &mut v, + &[ + ( + // Tricky rounding case that previously failed in extensive tests + ( + hf128!("-0x1.1966cc01966cc01966cc01966f06p-25"), + hf128!("-0x1.669933fe69933fe69933fe6997c9p-16358"), + hf128!("-0x0.000000000000000000000000048ap-16382"), + ), + Some(hf128!("0x0.c5171470a3ff5e0f68d751491b18p-16382")), + ), + ( + // Subnormal edge case that caused a failure + ( + hf128!("0x0.7ffffffffffffffffffffffffff7p-16382"), + hf128!("0x1.ffffffffffffffffffffffffffffp-1"), + hf128!("0x0.8000000000000000000000000009p-16382"), + ), + Some(hf128!("0x1.0000000000000000000000000000p-16382")), + ), + ], + ); + v +} + +#[cfg(f16_enabled)] +fn fmaxf16_cases() -> Vec> { + vec![] +} + +fn fmaxf_cases() -> Vec> { + vec![] +} + +fn fmax_cases() -> Vec> { + vec![] +} + +#[cfg(f128_enabled)] +fn fmaxf128_cases() -> Vec> { + vec![] +} + +#[cfg(f16_enabled)] +fn fmaximumf16_cases() -> Vec> { + vec![] +} + +fn fmaximumf_cases() -> Vec> { + vec![] +} + +fn fmaximum_cases() -> Vec> { + vec![] +} + +#[cfg(f128_enabled)] +fn fmaximumf128_cases() -> Vec> { + vec![] +} + +#[cfg(f16_enabled)] +fn fmaximum_numf16_cases() -> Vec> { + vec![] +} + +fn fmaximum_numf_cases() -> Vec> { + vec![] +} + +fn fmaximum_num_cases() -> Vec> { + vec![] +} + +#[cfg(f128_enabled)] +fn fmaximum_numf128_cases() -> Vec> { + vec![] +} + +#[cfg(f16_enabled)] +fn fminf16_cases() -> Vec> { + vec![] +} + +fn fminf_cases() -> Vec> { + vec![] +} + +fn fmin_cases() -> Vec> { + vec![] +} + +#[cfg(f128_enabled)] +fn fminf128_cases() -> Vec> { + vec![] +} + +#[cfg(f16_enabled)] +fn fminimumf16_cases() -> Vec> { + vec![] +} + +fn fminimumf_cases() -> Vec> { + vec![] +} + +fn fminimum_cases() -> Vec> { + vec![] +} + +#[cfg(f128_enabled)] +fn fminimumf128_cases() -> Vec> { + vec![] +} + +#[cfg(f16_enabled)] +fn fminimum_numf16_cases() -> Vec> { + vec![] +} + +fn fminimum_numf_cases() -> Vec> { + vec![] +} + +fn fminimum_num_cases() -> Vec> { + vec![] +} + +#[cfg(f128_enabled)] +fn fminimum_numf128_cases() -> Vec> { + vec![] +} + +fn fmod_cases() -> Vec> { + let mut v = vec![]; + TestCase::append_pairs( + &mut v, + &[ + // Previous failure with incorrect loop iteration + // + ((2.1, 3.123e-320), Some(2.0696e-320)), + ((2.1, 2.253547e-318), Some(1.772535e-318)), + ], + ); + v +} + +fn fmodf_cases() -> Vec> { + let mut v = vec![]; + TestCase::append_pairs( + &mut v, + &[ + // Previous failure with incorrect loop iteration + // + ((2.1, 8.858e-42), Some(8.085e-42)), + ((2.1, 6.39164e-40), Some(6.1636e-40)), + ((5.5, 6.39164e-40), Some(4.77036e-40)), + ((-151.189, 6.39164e-40), Some(-5.64734e-40)), + ], + ); + v +} + +#[cfg(f128_enabled)] +fn fmodf128_cases() -> Vec> { + vec![] +} + +#[cfg(f16_enabled)] +fn fmodf16_cases() -> Vec> { + vec![] +} + +fn frexp_cases() -> Vec> { + vec![] +} + +fn frexpf_cases() -> Vec> { + vec![] +} + +fn hypot_cases() -> Vec> { + vec![] +} + +fn hypotf_cases() -> Vec> { + vec![] +} + +fn ilogb_cases() -> Vec> { + vec![] +} + +fn ilogbf_cases() -> Vec> { + vec![] +} + +fn j0_cases() -> Vec> { + vec![] +} + +fn j0f_cases() -> Vec> { + vec![] +} + +fn j1_cases() -> Vec> { + vec![] +} + +fn j1f_cases() -> Vec> { + vec![] +} + +fn jn_cases() -> Vec> { + vec![] +} + +fn jnf_cases() -> Vec> { + vec![] +} + +fn ldexp_cases() -> Vec> { + vec![] +} + +fn ldexpf_cases() -> Vec> { + vec![] +} + +#[cfg(f128_enabled)] +fn ldexpf128_cases() -> Vec> { + vec![] +} + +#[cfg(f16_enabled)] +fn ldexpf16_cases() -> Vec> { + vec![] +} + +fn lgamma_cases() -> Vec> { + vec![] +} + +fn lgamma_r_cases() -> Vec> { + vec![] +} + +fn lgammaf_cases() -> Vec> { + vec![] +} + +fn lgammaf_r_cases() -> Vec> { + vec![] +} + +fn log_cases() -> Vec> { + vec![] +} + +fn log10_cases() -> Vec> { + vec![] +} + +fn log10f_cases() -> Vec> { + vec![] +} + +fn log1p_cases() -> Vec> { + vec![] +} + +fn log1pf_cases() -> Vec> { + vec![] +} + +fn log2_cases() -> Vec> { + vec![] +} + +fn log2f_cases() -> Vec> { + vec![] +} + +fn logf_cases() -> Vec> { + vec![] +} + +fn modf_cases() -> Vec> { + vec![] +} + +fn modff_cases() -> Vec> { + vec![] +} + +fn nextafter_cases() -> Vec> { + vec![] +} + +fn nextafterf_cases() -> Vec> { + vec![] +} + +fn pow_cases() -> Vec> { + vec![] +} + +fn powf_cases() -> Vec> { + vec![] +} + +fn remainder_cases() -> Vec> { + vec![] +} + +fn remainderf_cases() -> Vec> { + vec![] +} + +fn remquo_cases() -> Vec> { + vec![] +} + +fn remquof_cases() -> Vec> { + vec![] +} + +fn rint_cases() -> Vec> { + let mut v = vec![]; + TestCase::append_pairs( + &mut v, + &[ + // Known failure on i586 + #[cfg(not(x86_no_sse))] + ( + (hf64!("-0x1.e3f13ff995ffcp+38"),), + Some(hf64!("-0x1.e3f13ff994000p+38")), + ), + #[cfg(x86_no_sse)] + ( + (hf64!("-0x1.e3f13ff995ffcp+38"),), + Some(hf64!("-0x1.e3f13ff998000p+38")), + ), + ], + ); + v +} + +fn rintf_cases() -> Vec> { + vec![] +} + +#[cfg(f128_enabled)] +fn rintf128_cases() -> Vec> { + vec![] +} + +#[cfg(f16_enabled)] +fn rintf16_cases() -> Vec> { + vec![] +} + +#[cfg(f16_enabled)] +fn roundf16_cases() -> Vec> { + vec![] +} + +fn round_cases() -> Vec> { + vec![] +} + +fn roundf_cases() -> Vec> { + vec![] +} + +#[cfg(f128_enabled)] +fn roundf128_cases() -> Vec> { + vec![] +} + +#[cfg(f16_enabled)] +fn roundevenf16_cases() -> Vec> { + vec![] +} + +fn roundeven_cases() -> Vec> { + let mut v = vec![]; + TestCase::append_pairs( + &mut v, + &[ + // Known failure on i586 + #[cfg(not(x86_no_sse))] + ( + (hf64!("-0x1.e3f13ff995ffcp+38"),), + Some(hf64!("-0x1.e3f13ff994000p+38")), + ), + #[cfg(x86_no_sse)] + ( + (hf64!("-0x1.e3f13ff995ffcp+38"),), + Some(hf64!("-0x1.e3f13ff998000p+38")), + ), + ], + ); + v +} + +fn roundevenf_cases() -> Vec> { + vec![] +} + +#[cfg(f128_enabled)] +fn roundevenf128_cases() -> Vec> { + vec![] +} + +fn scalbn_cases() -> Vec> { + vec![] +} + +fn scalbnf_cases() -> Vec> { + vec![] +} + +#[cfg(f128_enabled)] +fn scalbnf128_cases() -> Vec> { + vec![] +} + +#[cfg(f16_enabled)] +fn scalbnf16_cases() -> Vec> { + vec![] +} + +fn sin_cases() -> Vec> { + vec![] +} + +fn sincos_cases() -> Vec> { + vec![] +} + +fn sincosf_cases() -> Vec> { + vec![] +} + +fn sinf_cases() -> Vec> { + vec![] +} + +fn sinh_cases() -> Vec> { + vec![] +} + +fn sinhf_cases() -> Vec> { + vec![] +} + +fn sqrt_cases() -> Vec> { + vec![] +} + +fn sqrtf_cases() -> Vec> { + vec![] +} + +#[cfg(f128_enabled)] +fn sqrtf128_cases() -> Vec> { + vec![] +} + +#[cfg(f16_enabled)] +fn sqrtf16_cases() -> Vec> { + vec![] +} + +fn tan_cases() -> Vec> { + vec![] +} + +fn tanf_cases() -> Vec> { + vec![] +} + +fn tanh_cases() -> Vec> { + vec![] +} + +fn tanhf_cases() -> Vec> { + vec![] +} + +fn tgamma_cases() -> Vec> { + vec![] +} + +fn tgammaf_cases() -> Vec> { + vec![] +} + +fn trunc_cases() -> Vec> { + vec![] +} + +fn truncf_cases() -> Vec> { + vec![] +} + +#[cfg(f128_enabled)] +fn truncf128_cases() -> Vec> { + vec![] +} + +#[cfg(f16_enabled)] +fn truncf16_cases() -> Vec> { + vec![] +} + +fn y0_cases() -> Vec> { + vec![] +} + +fn y0f_cases() -> Vec> { + vec![] +} + +fn y1_cases() -> Vec> { + vec![] +} + +fn y1f_cases() -> Vec> { + vec![] +} + +fn yn_cases() -> Vec> { + vec![] +} + +fn ynf_cases() -> Vec> { + vec![] +} + +pub trait CaseListInput: MathOp + Sized { + fn get_cases() -> Vec>; +} + +macro_rules! impl_case_list { + ( + fn_name: $fn_name:ident, + attrs: [$($attr:meta),*], + ) => { + paste::paste! { + $(#[$attr])* + impl CaseListInput for crate::op::$fn_name::Routine { + fn get_cases() -> Vec> { + [< $fn_name _cases >]() + } + } + } + }; +} + +libm_macros::for_each_function! { + callback: impl_case_list, +} + +/// This is the test generator for standalone tests, i.e. those with no basis. For this, it +/// only extracts tests with a known output. +pub fn get_test_cases_standalone( + ctx: &CheckCtx, +) -> impl Iterator + use<'_, Op> +where + Op: MathOp + CaseListInput, +{ + assert_eq!(ctx.basis, CheckBasis::None); + assert_eq!(ctx.gen_kind, GeneratorKind::List); + Op::get_cases() + .into_iter() + .filter_map(|x| x.output.map(|o| (x.input, o))) +} + +/// Opposite of the above; extract only test cases that don't have a known output, to be run +/// against a basis. +pub fn get_test_cases_basis( + ctx: &CheckCtx, +) -> (impl Iterator + use<'_, Op>, u64) +where + Op: MathOp + CaseListInput, +{ + assert_ne!(ctx.basis, CheckBasis::None); + assert_eq!(ctx.gen_kind, GeneratorKind::List); + + let cases = Op::get_cases(); + let count: u64 = cases + .iter() + .filter(|case| case.output.is_none()) + .count() + .try_into() + .unwrap(); + + ( + cases + .into_iter() + .filter(|x| x.output.is_none()) + .map(|x| x.input), + count, + ) +} diff --git a/library/compiler-builtins/libm-test/src/generate/edge_cases.rs b/library/compiler-builtins/libm-test/src/generate/edge_cases.rs new file mode 100644 index 0000000000000..2fb0746388c15 --- /dev/null +++ b/library/compiler-builtins/libm-test/src/generate/edge_cases.rs @@ -0,0 +1,314 @@ +//! A generator that checks a handful of cases near infinities, zeros, asymptotes, and NaNs. + +use libm::support::{CastInto, Float, Int, MinInt}; + +use crate::domain::get_domain; +use crate::generate::KnownSize; +use crate::op::OpITy; +use crate::run_cfg::{check_near_count, check_point_count}; +use crate::{BaseName, CheckCtx, FloatExt, FloatTy, MathOp, test_log}; + +/// Generate a sequence of edge cases, e.g. numbers near zeroes and infiniteis. +pub trait EdgeCaseInput { + fn get_cases(ctx: &CheckCtx) -> (impl Iterator + Send, u64); +} + +/// Create a list of values around interesting points (infinities, zeroes, NaNs). +fn float_edge_cases( + ctx: &CheckCtx, + argnum: usize, +) -> (impl Iterator + Clone, u64) +where + Op: MathOp, +{ + let mut ret = Vec::new(); + let one = OpITy::::ONE; + let values = &mut ret; + let domain = get_domain::<_, i8>(ctx.fn_ident, argnum).unwrap_float(); + let domain_start = domain.range_start(); + let domain_end = domain.range_end(); + + let check_points = check_point_count(ctx); + let near_points = check_near_count(ctx); + + // Check near some notable constants + count_up(Op::FTy::ONE, near_points, values); + count_up(Op::FTy::ZERO, near_points, values); + count_up(Op::FTy::NEG_ONE, near_points, values); + count_down(Op::FTy::ONE, near_points, values); + count_down(Op::FTy::ZERO, near_points, values); + count_down(Op::FTy::NEG_ONE, near_points, values); + values.push(Op::FTy::NEG_ZERO); + + // Check values near the extremes + count_up(Op::FTy::NEG_INFINITY, near_points, values); + count_down(Op::FTy::INFINITY, near_points, values); + count_down(domain_end, near_points, values); + count_up(domain_start, near_points, values); + count_down(domain_start, near_points, values); + count_up(domain_end, near_points, values); + count_down(domain_end, near_points, values); + + // Check some special values that aren't included in the above ranges + values.push(Op::FTy::NAN); + values.extend(Op::FTy::consts().iter()); + + // Check around the maximum subnormal value + let sub_max = Op::FTy::from_bits(Op::FTy::SIG_MASK); + count_up(sub_max, near_points, values); + count_down(sub_max, near_points, values); + count_up(-sub_max, near_points, values); + count_down(-sub_max, near_points, values); + + // Check a few values around the subnormal range + for shift in (0..Op::FTy::SIG_BITS).step_by(Op::FTy::SIG_BITS as usize / 5) { + let v = Op::FTy::from_bits(one << shift); + count_up(v, 2, values); + count_down(v, 2, values); + count_up(-v, 2, values); + count_down(-v, 2, values); + } + + // Check around asymptotes + if let Some(f) = domain.check_points { + let iter = f(); + for x in iter.take(check_points) { + count_up(x, near_points, values); + count_down(x, near_points, values); + } + } + + // Some results may overlap so deduplicate the vector to save test cycles. + values.sort_by_key(|x| x.to_bits()); + values.dedup_by_key(|x| x.to_bits()); + + let count = ret.len().try_into().unwrap(); + + test_log(&format!( + "{gen_kind:?} {basis:?} {fn_ident} arg {arg}/{args}: {count} edge cases", + gen_kind = ctx.gen_kind, + basis = ctx.basis, + fn_ident = ctx.fn_ident, + arg = argnum + 1, + args = ctx.input_count(), + )); + + (ret.into_iter(), count) +} + +/// Add `points` values starting at and including `x` and counting up. Uses the smallest possible +/// increments (1 ULP). +fn count_up(mut x: F, points: u64, values: &mut Vec) { + assert!(!x.is_nan()); + + let mut count = 0; + while x < F::INFINITY && count < points { + values.push(x); + x = x.next_up(); + count += 1; + } +} + +/// Add `points` values starting at and including `x` and counting down. Uses the smallest possible +/// increments (1 ULP). +fn count_down(mut x: F, points: u64, values: &mut Vec) { + assert!(!x.is_nan()); + + let mut count = 0; + while x > F::NEG_INFINITY && count < points { + values.push(x); + x = x.next_down(); + count += 1; + } +} + +/// Create a list of values around interesting integer points (min, zero, max). +pub fn int_edge_cases( + ctx: &CheckCtx, + argnum: usize, +) -> (impl Iterator + Clone, u64) +where + i32: CastInto, +{ + let mut values = Vec::new(); + let near_points = check_near_count(ctx); + + // Check around max/min and zero + int_count_around(I::MIN, near_points, &mut values); + int_count_around(I::MAX, near_points, &mut values); + int_count_around(I::ZERO, near_points, &mut values); + int_count_around(I::ZERO, near_points, &mut values); + + if matches!(ctx.base_name, BaseName::Scalbn | BaseName::Ldexp) { + assert_eq!(argnum, 1, "scalbn integer argument should be arg1"); + let (emax, emin, emin_sn) = match ctx.fn_ident.math_op().float_ty { + FloatTy::F16 => { + #[cfg(not(f16_enabled))] + unreachable!(); + #[cfg(f16_enabled)] + (f16::EXP_MAX, f16::EXP_MIN, f16::EXP_MIN_SUBNORM) + } + FloatTy::F32 => (f32::EXP_MAX, f32::EXP_MIN, f32::EXP_MIN_SUBNORM), + FloatTy::F64 => (f64::EXP_MAX, f64::EXP_MIN, f64::EXP_MIN_SUBNORM), + FloatTy::F128 => { + #[cfg(not(f128_enabled))] + unreachable!(); + #[cfg(f128_enabled)] + (f128::EXP_MAX, f128::EXP_MIN, f128::EXP_MIN_SUBNORM) + } + }; + + // `scalbn`/`ldexp` have their trickiest behavior around exponent limits + int_count_around(emax.cast(), near_points, &mut values); + int_count_around(emin.cast(), near_points, &mut values); + int_count_around(emin_sn.cast(), near_points, &mut values); + int_count_around((-emin_sn).cast(), near_points, &mut values); + + // Also check values that cause the maximum possible difference in exponents + int_count_around((emax - emin).cast(), near_points, &mut values); + int_count_around((emin - emax).cast(), near_points, &mut values); + int_count_around((emax - emin_sn).cast(), near_points, &mut values); + int_count_around((emin_sn - emax).cast(), near_points, &mut values); + } + + values.sort(); + values.dedup(); + let count = values.len().try_into().unwrap(); + + test_log(&format!( + "{gen_kind:?} {basis:?} {fn_ident} arg {arg}/{args}: {count} edge cases", + gen_kind = ctx.gen_kind, + basis = ctx.basis, + fn_ident = ctx.fn_ident, + arg = argnum + 1, + args = ctx.input_count(), + )); + + (values.into_iter(), count) +} + +/// Add `points` values both up and down, starting at and including `x`. +fn int_count_around(x: I, points: u64, values: &mut Vec) { + let mut current = x; + for _ in 0..points { + values.push(current); + current = match current.checked_add(I::ONE) { + Some(v) => v, + None => break, + }; + } + + current = x; + for _ in 0..points { + values.push(current); + current = match current.checked_sub(I::ONE) { + Some(v) => v, + None => break, + }; + } +} + +macro_rules! impl_edge_case_input { + ($fty:ty) => { + impl EdgeCaseInput for ($fty,) + where + Op: MathOp, + { + fn get_cases(ctx: &CheckCtx) -> (impl Iterator, u64) { + let (iter0, steps0) = float_edge_cases::(ctx, 0); + let iter0 = iter0.map(|v| (v,)); + (iter0, steps0) + } + } + + impl EdgeCaseInput for ($fty, $fty) + where + Op: MathOp, + { + fn get_cases(ctx: &CheckCtx) -> (impl Iterator, u64) { + let (iter0, steps0) = float_edge_cases::(ctx, 0); + let (iter1, steps1) = float_edge_cases::(ctx, 1); + let iter = + iter0.flat_map(move |first| iter1.clone().map(move |second| (first, second))); + let count = steps0.checked_mul(steps1).unwrap(); + (iter, count) + } + } + + impl EdgeCaseInput for ($fty, $fty, $fty) + where + Op: MathOp, + { + fn get_cases(ctx: &CheckCtx) -> (impl Iterator, u64) { + let (iter0, steps0) = float_edge_cases::(ctx, 0); + let (iter1, steps1) = float_edge_cases::(ctx, 1); + let (iter2, steps2) = float_edge_cases::(ctx, 2); + + let iter = iter0 + .flat_map(move |first| iter1.clone().map(move |second| (first, second))) + .flat_map(move |(first, second)| { + iter2.clone().map(move |third| (first, second, third)) + }); + let count = steps0 + .checked_mul(steps1) + .unwrap() + .checked_mul(steps2) + .unwrap(); + + (iter, count) + } + } + + impl EdgeCaseInput for (i32, $fty) + where + Op: MathOp, + { + fn get_cases(ctx: &CheckCtx) -> (impl Iterator, u64) { + let (iter0, steps0) = int_edge_cases(ctx, 0); + let (iter1, steps1) = float_edge_cases::(ctx, 1); + + let iter = + iter0.flat_map(move |first| iter1.clone().map(move |second| (first, second))); + let count = steps0.checked_mul(steps1).unwrap(); + + (iter, count) + } + } + + impl EdgeCaseInput for ($fty, i32) + where + Op: MathOp, + { + fn get_cases(ctx: &CheckCtx) -> (impl Iterator, u64) { + let (iter0, steps0) = float_edge_cases::(ctx, 0); + let (iter1, steps1) = int_edge_cases(ctx, 1); + + let iter = + iter0.flat_map(move |first| iter1.clone().map(move |second| (first, second))); + let count = steps0.checked_mul(steps1).unwrap(); + + (iter, count) + } + } + }; +} + +#[cfg(f16_enabled)] +impl_edge_case_input!(f16); +impl_edge_case_input!(f32); +impl_edge_case_input!(f64); +#[cfg(f128_enabled)] +impl_edge_case_input!(f128); + +pub fn get_test_cases( + ctx: &CheckCtx, +) -> (impl Iterator + Send + use<'_, Op>, u64) +where + Op: MathOp, + Op::RustArgs: EdgeCaseInput, +{ + let (iter, count) = Op::RustArgs::get_cases(ctx); + + // Wrap in `KnownSize` so we get an assertion if the cuunt is wrong. + (KnownSize::new(iter, count), count) +} diff --git a/library/compiler-builtins/libm-test/src/generate/random.rs b/library/compiler-builtins/libm-test/src/generate/random.rs new file mode 100644 index 0000000000000..4ee88946d8eaf --- /dev/null +++ b/library/compiler-builtins/libm-test/src/generate/random.rs @@ -0,0 +1,128 @@ +use std::env; +use std::ops::RangeInclusive; +use std::sync::LazyLock; + +use libm::support::Float; +use rand::distr::{Alphanumeric, StandardUniform}; +use rand::prelude::Distribution; +use rand::{Rng, SeedableRng}; +use rand_chacha::ChaCha8Rng; + +use super::KnownSize; +use crate::CheckCtx; +use crate::run_cfg::{int_range, iteration_count}; + +pub(crate) const SEED_ENV: &str = "LIBM_SEED"; + +pub static SEED: LazyLock<[u8; 32]> = LazyLock::new(|| { + let s = env::var(SEED_ENV).unwrap_or_else(|_| { + let mut rng = rand::rng(); + (0..32).map(|_| rng.sample(Alphanumeric) as char).collect() + }); + + s.as_bytes().try_into().unwrap_or_else(|_| { + panic!("Seed must be 32 characters, got `{s}`"); + }) +}); + +/// Generate a sequence of random values of this type. +pub trait RandomInput: Sized { + fn get_cases(ctx: &CheckCtx) -> (impl Iterator + Send, u64); +} + +/// Generate a sequence of deterministically random floats. +fn random_floats(count: u64) -> impl Iterator +where + StandardUniform: Distribution, +{ + let mut rng = ChaCha8Rng::from_seed(*SEED); + + // Generate integers to get a full range of bitpatterns (including NaNs), then convert back + // to the float type. + (0..count).map(move |_| F::from_bits(rng.random::())) +} + +/// Generate a sequence of deterministically random `i32`s within a specified range. +fn random_ints(count: u64, range: RangeInclusive) -> impl Iterator { + let mut rng = ChaCha8Rng::from_seed(*SEED); + (0..count).map(move |_| rng.random_range::(range.clone())) +} + +macro_rules! impl_random_input { + ($fty:ty) => { + impl RandomInput for ($fty,) { + fn get_cases(ctx: &CheckCtx) -> (impl Iterator, u64) { + let count = iteration_count(ctx, 0); + let iter = random_floats(count).map(|f: $fty| (f,)); + (iter, count) + } + } + + impl RandomInput for ($fty, $fty) { + fn get_cases(ctx: &CheckCtx) -> (impl Iterator, u64) { + let count0 = iteration_count(ctx, 0); + let count1 = iteration_count(ctx, 1); + let iter = random_floats(count0) + .flat_map(move |f1: $fty| random_floats(count1).map(move |f2: $fty| (f1, f2))); + (iter, count0 * count1) + } + } + + impl RandomInput for ($fty, $fty, $fty) { + fn get_cases(ctx: &CheckCtx) -> (impl Iterator, u64) { + let count0 = iteration_count(ctx, 0); + let count1 = iteration_count(ctx, 1); + let count2 = iteration_count(ctx, 2); + let iter = random_floats(count0).flat_map(move |f1: $fty| { + random_floats(count1).flat_map(move |f2: $fty| { + random_floats(count2).map(move |f3: $fty| (f1, f2, f3)) + }) + }); + (iter, count0 * count1 * count2) + } + } + + impl RandomInput for (i32, $fty) { + fn get_cases(ctx: &CheckCtx) -> (impl Iterator, u64) { + let count0 = iteration_count(ctx, 0); + let count1 = iteration_count(ctx, 1); + let range0 = int_range(ctx, 0); + let iter = random_ints(count0, range0) + .flat_map(move |f1: i32| random_floats(count1).map(move |f2: $fty| (f1, f2))); + (iter, count0 * count1) + } + } + + impl RandomInput for ($fty, i32) { + fn get_cases(ctx: &CheckCtx) -> (impl Iterator, u64) { + let count0 = iteration_count(ctx, 0); + let count1 = iteration_count(ctx, 1); + let range1 = int_range(ctx, 1); + let iter = random_floats(count0).flat_map(move |f1: $fty| { + random_ints(count1, range1.clone()).map(move |f2: i32| (f1, f2)) + }); + (iter, count0 * count1) + } + } + }; +} + +#[cfg(f16_enabled)] +impl_random_input!(f16); +impl_random_input!(f32); +impl_random_input!(f64); +#[cfg(f128_enabled)] +impl_random_input!(f128); + +/// Create a test case iterator. +pub fn get_test_cases( + ctx: &CheckCtx, +) -> ( + impl Iterator + Send + use<'_, RustArgs>, + u64, +) { + let (iter, count) = RustArgs::get_cases(ctx); + + // Wrap in `KnownSize` so we get an assertion if the cuunt is wrong. + (KnownSize::new(iter, count), count) +} diff --git a/library/compiler-builtins/libm-test/src/generate/spaced.rs b/library/compiler-builtins/libm-test/src/generate/spaced.rs new file mode 100644 index 0000000000000..8e6b376ebd1e9 --- /dev/null +++ b/library/compiler-builtins/libm-test/src/generate/spaced.rs @@ -0,0 +1,258 @@ +use std::fmt; +use std::ops::RangeInclusive; + +use libm::support::{Float, MinInt}; + +use crate::domain::get_domain; +use crate::op::OpITy; +use crate::run_cfg::{int_range, iteration_count}; +use crate::{CheckCtx, MathOp, linear_ints, logspace}; + +/// Generate a sequence of inputs that eiher cover the domain in completeness (for smaller float +/// types and single argument functions) or provide evenly spaced inputs across the domain with +/// approximately `u32::MAX` total iterations. +pub trait SpacedInput { + fn get_cases(ctx: &CheckCtx) -> (impl Iterator + Send, u64); +} + +/// Construct an iterator from `logspace` and also calculate the total number of steps expected +/// for that iterator. +fn logspace_steps( + ctx: &CheckCtx, + argnum: usize, + max_steps: u64, +) -> (impl Iterator + Clone, u64) +where + Op: MathOp, + OpITy: TryFrom, + u64: TryFrom, Error: fmt::Debug>, + RangeInclusive>: Iterator, +{ + // i8 is a dummy type here, it can be any integer. + let domain = get_domain::(ctx.fn_ident, argnum).unwrap_float(); + let start = domain.range_start(); + let end = domain.range_end(); + + let max_steps = OpITy::::try_from(max_steps).unwrap_or(OpITy::::MAX); + let (iter, steps) = logspace(start, end, max_steps); + + // `steps` will be <= the original `max_steps`, which is a `u64`. + (iter, steps.try_into().unwrap()) +} + +/// Represents the iterator in either `Left` or `Right`. +enum EitherIter { + A(A), + B(B), +} + +impl, B: Iterator> Iterator for EitherIter { + type Item = T; + + fn next(&mut self) -> Option { + match self { + Self::A(iter) => iter.next(), + Self::B(iter) => iter.next(), + } + } + + fn size_hint(&self) -> (usize, Option) { + match self { + Self::A(iter) => iter.size_hint(), + Self::B(iter) => iter.size_hint(), + } + } +} + +/// Gets the total number of possible values, returning `None` if that number doesn't fit in a +/// `u64`. +fn value_count() -> Option +where + u64: TryFrom, +{ + u64::try_from(F::Int::MAX) + .ok() + .and_then(|max| max.checked_add(1)) +} + +/// Returns an iterator of every possible value of type `F`. +fn all_values() -> impl Iterator +where + RangeInclusive: Iterator, +{ + (F::Int::MIN..=F::Int::MAX).map(|bits| F::from_bits(bits)) +} + +macro_rules! impl_spaced_input { + ($fty:ty) => { + impl SpacedInput for ($fty,) + where + Op: MathOp, + { + fn get_cases(ctx: &CheckCtx) -> (impl Iterator, u64) { + let max_steps0 = iteration_count(ctx, 0); + // `f16` and `f32` can have exhaustive tests. + match value_count::() { + Some(steps0) if steps0 <= max_steps0 => { + let iter0 = all_values(); + let iter0 = iter0.map(|v| (v,)); + (EitherIter::A(iter0), steps0) + } + _ => { + let (iter0, steps0) = logspace_steps::(ctx, 0, max_steps0); + let iter0 = iter0.map(|v| (v,)); + (EitherIter::B(iter0), steps0) + } + } + } + } + + impl SpacedInput for ($fty, $fty) + where + Op: MathOp, + { + fn get_cases(ctx: &CheckCtx) -> (impl Iterator, u64) { + let max_steps0 = iteration_count(ctx, 0); + let max_steps1 = iteration_count(ctx, 1); + // `f16` can have exhaustive tests. + match value_count::() { + Some(count) if count <= max_steps0 && count <= max_steps1 => { + let iter = all_values() + .flat_map(|first| all_values().map(move |second| (first, second))); + (EitherIter::A(iter), count.checked_mul(count).unwrap()) + } + _ => { + let (iter0, steps0) = logspace_steps::(ctx, 0, max_steps0); + let (iter1, steps1) = logspace_steps::(ctx, 1, max_steps1); + let iter = iter0.flat_map(move |first| { + iter1.clone().map(move |second| (first, second)) + }); + let count = steps0.checked_mul(steps1).unwrap(); + (EitherIter::B(iter), count) + } + } + } + } + + impl SpacedInput for ($fty, $fty, $fty) + where + Op: MathOp, + { + fn get_cases(ctx: &CheckCtx) -> (impl Iterator, u64) { + let max_steps0 = iteration_count(ctx, 0); + let max_steps1 = iteration_count(ctx, 1); + let max_steps2 = iteration_count(ctx, 2); + // `f16` can be exhaustive tested if `LIBM_EXTENSIVE_TESTS` is incresed. + match value_count::() { + Some(count) + if count <= max_steps0 && count <= max_steps1 && count <= max_steps2 => + { + let iter = all_values().flat_map(|first| { + all_values().flat_map(move |second| { + all_values().map(move |third| (first, second, third)) + }) + }); + (EitherIter::A(iter), count.checked_pow(3).unwrap()) + } + _ => { + let (iter0, steps0) = logspace_steps::(ctx, 0, max_steps0); + let (iter1, steps1) = logspace_steps::(ctx, 1, max_steps1); + let (iter2, steps2) = logspace_steps::(ctx, 2, max_steps2); + + let iter = iter0 + .flat_map(move |first| iter1.clone().map(move |second| (first, second))) + .flat_map(move |(first, second)| { + iter2.clone().map(move |third| (first, second, third)) + }); + let count = steps0 + .checked_mul(steps1) + .unwrap() + .checked_mul(steps2) + .unwrap(); + + (EitherIter::B(iter), count) + } + } + } + } + + impl SpacedInput for (i32, $fty) + where + Op: MathOp, + { + fn get_cases(ctx: &CheckCtx) -> (impl Iterator, u64) { + let range0 = int_range(ctx, 0); + let max_steps0 = iteration_count(ctx, 0); + let max_steps1 = iteration_count(ctx, 1); + match value_count::() { + Some(count1) if count1 <= max_steps1 => { + let (iter0, steps0) = linear_ints(range0, max_steps0); + let iter = iter0 + .flat_map(move |first| all_values().map(move |second| (first, second))); + (EitherIter::A(iter), steps0.checked_mul(count1).unwrap()) + } + _ => { + let (iter0, steps0) = linear_ints(range0, max_steps0); + let (iter1, steps1) = logspace_steps::(ctx, 1, max_steps1); + + let iter = iter0.flat_map(move |first| { + iter1.clone().map(move |second| (first, second)) + }); + let count = steps0.checked_mul(steps1).unwrap(); + + (EitherIter::B(iter), count) + } + } + } + } + + impl SpacedInput for ($fty, i32) + where + Op: MathOp, + { + fn get_cases(ctx: &CheckCtx) -> (impl Iterator, u64) { + let max_steps0 = iteration_count(ctx, 0); + let range1 = int_range(ctx, 1); + let max_steps1 = iteration_count(ctx, 1); + match value_count::() { + Some(count0) if count0 <= max_steps0 => { + let (iter1, steps1) = linear_ints(range1, max_steps1); + let iter = all_values().flat_map(move |first| { + iter1.clone().map(move |second| (first, second)) + }); + (EitherIter::A(iter), count0.checked_mul(steps1).unwrap()) + } + _ => { + let (iter0, steps0) = logspace_steps::(ctx, 0, max_steps0); + let (iter1, steps1) = linear_ints(range1, max_steps1); + + let iter = iter0.flat_map(move |first| { + iter1.clone().map(move |second| (first, second)) + }); + let count = steps0.checked_mul(steps1).unwrap(); + + (EitherIter::B(iter), count) + } + } + } + } + }; +} + +#[cfg(f16_enabled)] +impl_spaced_input!(f16); +impl_spaced_input!(f32); +impl_spaced_input!(f64); +#[cfg(f128_enabled)] +impl_spaced_input!(f128); + +/// Create a test case iterator for extensive inputs. Also returns the total test case count. +pub fn get_test_cases( + ctx: &CheckCtx, +) -> (impl Iterator + Send + use<'_, Op>, u64) +where + Op: MathOp, + Op::RustArgs: SpacedInput, +{ + Op::RustArgs::get_cases(ctx) +} diff --git a/library/compiler-builtins/libm-test/src/lib.rs b/library/compiler-builtins/libm-test/src/lib.rs new file mode 100644 index 0000000000000..accb39654d15a --- /dev/null +++ b/library/compiler-builtins/libm-test/src/lib.rs @@ -0,0 +1,107 @@ +#![cfg_attr(f16_enabled, feature(f16))] +#![cfg_attr(f128_enabled, feature(f128))] +#![allow(clippy::unusual_byte_groupings)] // sometimes we group by sign_exp_sig + +pub mod domain; +mod f8_impl; +pub mod generate; +#[cfg(feature = "build-mpfr")] +pub mod mpfloat; +mod num; +pub mod op; +mod precision; +mod run_cfg; +mod test_traits; + +use std::env; +use std::fs::File; +use std::io::Write; +use std::path::PathBuf; +use std::sync::LazyLock; +use std::time::SystemTime; + +pub use f8_impl::{f8, hf8}; +pub use libm::support::{Float, Int, IntTy, MinInt}; +pub use num::{FloatExt, linear_ints, logspace}; +pub use op::{ + BaseName, FloatTy, Identifier, MathOp, OpCFn, OpCRet, OpFTy, OpRustArgs, OpRustFn, OpRustRet, + Ty, +}; +pub use precision::{MaybeOverride, SpecialCase, default_ulp}; +use run_cfg::extensive_max_iterations; +pub use run_cfg::{ + CheckBasis, CheckCtx, EXTENSIVE_ENV, GeneratorKind, bigint_fuzz_iteration_count, + skip_extensive_test, +}; +pub use test_traits::{CheckOutput, Hex, TupleCall}; + +/// Result type for tests is usually from `anyhow`. Most times there is no success value to +/// propagate. +pub type TestResult = Result; + +/// True if `EMULATED` is set and nonempty. Used to determine how many iterations to run. +pub const fn emulated() -> bool { + match option_env!("EMULATED") { + Some(s) if s.is_empty() => false, + None => false, + Some(_) => true, + } +} + +/// True if `CI` is set and nonempty. +pub const fn ci() -> bool { + match option_env!("CI") { + Some(s) if s.is_empty() => false, + None => false, + Some(_) => true, + } +} + +/// Print to stderr and additionally log it to `target/test-log.txt`. This is useful for saving +/// output that would otherwise be consumed by the test harness. +pub fn test_log(s: &str) { + // Handle to a file opened in append mode, unless a suitable path can't be determined. + static OUTFILE: LazyLock> = LazyLock::new(|| { + // If the target directory is overridden, use that environment variable. Otherwise, save + // at the default path `{workspace_root}/target`. + let target_dir = match env::var("CARGO_TARGET_DIR") { + Ok(s) => PathBuf::from(s), + Err(_) => { + let Ok(x) = env::var("CARGO_MANIFEST_DIR") else { + return None; + }; + + PathBuf::from(x).join("../target") + } + }; + let outfile = target_dir.join("test-log.txt"); + + let mut f = File::options() + .create(true) + .append(true) + .open(outfile) + .expect("failed to open logfile"); + let now = SystemTime::now() + .duration_since(SystemTime::UNIX_EPOCH) + .unwrap(); + + writeln!(f, "\n\nTest run at {}", now.as_secs()).unwrap(); + writeln!(f, "arch: {}", env::consts::ARCH).unwrap(); + writeln!(f, "os: {}", env::consts::OS).unwrap(); + writeln!(f, "bits: {}", usize::BITS).unwrap(); + writeln!(f, "emulated: {}", emulated()).unwrap(); + writeln!(f, "ci: {}", ci()).unwrap(); + writeln!(f, "cargo features: {}", env!("CFG_CARGO_FEATURES")).unwrap(); + writeln!(f, "opt level: {}", env!("CFG_OPT_LEVEL")).unwrap(); + writeln!(f, "target features: {}", env!("CFG_TARGET_FEATURES")).unwrap(); + writeln!(f, "extensive iterations {}", extensive_max_iterations()).unwrap(); + + Some(f) + }); + + eprintln!("{s}"); + + if let Some(mut f) = OUTFILE.as_ref() { + writeln!(f, "{s}").unwrap(); + } +} diff --git a/library/compiler-builtins/libm-test/src/mpfloat.rs b/library/compiler-builtins/libm-test/src/mpfloat.rs new file mode 100644 index 0000000000000..9b51dc6051d03 --- /dev/null +++ b/library/compiler-builtins/libm-test/src/mpfloat.rs @@ -0,0 +1,603 @@ +//! Interfaces needed to support testing with multi-precision floating point numbers. +//! +//! Within this module, the macros create a submodule for each `libm` function. These contain +//! a struct named `Operation` that implements [`MpOp`]. + +use std::cmp::Ordering; + +use rug::Assign; +pub use rug::Float as MpFloat; +use rug::az::{self, Az}; +use rug::float::Round::Nearest; +use rug::ops::{PowAssignRound, RemAssignRound}; + +use crate::{Float, MathOp}; + +/// Create a multiple-precision float with the correct number of bits for a concrete float type. +fn new_mpfloat() -> MpFloat { + MpFloat::new(F::SIG_BITS + 1) +} + +/// Set subnormal emulation and convert to a concrete float type. +fn prep_retval(mp: &mut MpFloat, ord: Ordering) -> F +where + for<'a> &'a MpFloat: az::Cast, +{ + mp.subnormalize_ieee_round(ord, Nearest); + (&*mp).az::() +} + +/// Structures that represent a float operation. +/// +pub trait MpOp: MathOp { + /// The struct itself should hold any context that can be reused among calls to `run` (allocated + /// `MpFloat`s). + type MpTy; + + /// Create a new instance. + fn new_mp() -> Self::MpTy; + + /// Perform the operation. + /// + /// Usually this means assigning inputs to cached floats, performing the operation, applying + /// subnormal approximation, and converting the result back to concrete values. + fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet; +} + +/// Implement `MpOp` for functions with a single return value. +macro_rules! impl_mp_op { + // Matcher for unary functions + ( + fn_name: $fn_name:ident, + RustFn: fn($_fty:ty,) -> $_ret:ty, + attrs: [$($attr:meta),*], + fn_extra: $fn_name_normalized:expr, + ) => { + paste::paste! { + $(#[$attr])* + impl MpOp for crate::op::$fn_name::Routine { + type MpTy = MpFloat; + + fn new_mp() -> Self::MpTy { + new_mpfloat::() + } + + fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet { + this.assign(input.0); + let ord = this.[< $fn_name_normalized _round >](Nearest); + prep_retval::(this, ord) + } + } + } + }; + // Matcher for binary functions + ( + fn_name: $fn_name:ident, + RustFn: fn($_fty:ty, $_fty2:ty,) -> $_ret:ty, + attrs: [$($attr:meta),*], + fn_extra: $fn_name_normalized:expr, + ) => { + paste::paste! { + $(#[$attr])* + impl MpOp for crate::op::$fn_name::Routine { + type MpTy = (MpFloat, MpFloat); + + fn new_mp() -> Self::MpTy { + (new_mpfloat::(), new_mpfloat::()) + } + + fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet { + this.0.assign(input.0); + this.1.assign(input.1); + let ord = this.0.[< $fn_name_normalized _round >](&this.1, Nearest); + prep_retval::(&mut this.0, ord) + } + } + } + }; + // Matcher for ternary functions + ( + fn_name: $fn_name:ident, + RustFn: fn($_fty:ty, $_fty2:ty, $_fty3:ty,) -> $_ret:ty, + attrs: [$($attr:meta),*], + fn_extra: $fn_name_normalized:expr, + ) => { + paste::paste! { + $(#[$attr])* + impl MpOp for crate::op::$fn_name::Routine { + type MpTy = (MpFloat, MpFloat, MpFloat); + + fn new_mp() -> Self::MpTy { + ( + new_mpfloat::(), + new_mpfloat::(), + new_mpfloat::(), + ) + } + + fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet { + this.0.assign(input.0); + this.1.assign(input.1); + this.2.assign(input.2); + let ord = this.0.[< $fn_name_normalized _round >](&this.1, &this.2, Nearest); + prep_retval::(&mut this.0, ord) + } + } + } + }; +} + +libm_macros::for_each_function! { + callback: impl_mp_op, + emit_types: [RustFn], + skip: [ + // Most of these need a manual implementation + // verify-sorted-start + ceil, + ceilf, + ceilf128, + ceilf16, + copysign, + copysignf, + copysignf128, + copysignf16, + fabs, + fabsf, + fabsf128, + fabsf16,floor, + floorf, + floorf128, + floorf16, + fmaximum, + fmaximumf, + fmaximumf128, + fmaximumf16, + fminimum, + fminimumf, + fminimumf128, + fminimumf16, + fmod, + fmodf, + fmodf128, + fmodf16, + frexp, + frexpf, + ilogb, + ilogbf, + jn, + jnf, + ldexp, + ldexpf, + ldexpf128, + ldexpf16, + lgamma_r, + lgammaf_r, + modf, + modff, + nextafter, + nextafterf, + pow, + powf,remquo, + remquof, + rint, + rintf, + rintf128, + rintf16, + round, + roundeven, + roundevenf, + roundevenf128, + roundevenf16, + roundf, + roundf128, + roundf16, + scalbn, + scalbnf, + scalbnf128, + scalbnf16, + sincos,sincosf, + trunc, + truncf, + truncf128, + truncf16,yn, + ynf, + // verify-sorted-end + ], + fn_extra: match MACRO_FN_NAME { + // Remap function names that are different between mpfr and libm + expm1 | expm1f => exp_m1, + fabs | fabsf => abs, + fdim | fdimf | fdimf16 | fdimf128 => positive_diff, + fma | fmaf | fmaf128 => mul_add, + fmax | fmaxf | fmaxf16 | fmaxf128 | + fmaximum_num | fmaximum_numf | fmaximum_numf16 | fmaximum_numf128 => max, + fmin | fminf | fminf16 | fminf128 | + fminimum_num | fminimum_numf | fminimum_numf16 | fminimum_numf128 => min, + lgamma | lgammaf => ln_gamma, + log | logf => ln, + log1p | log1pf => ln_1p, + tgamma | tgammaf => gamma, + _ => MACRO_FN_NAME_NORMALIZED + } +} + +/// Implement unary functions that don't have a `_round` version +macro_rules! impl_no_round { + // Unary matcher + ($($fn_name:ident => $rug_name:ident;)*) => { + paste::paste! { + $( impl_no_round!{ @inner_unary $fn_name, $rug_name } )* + } + }; + + (@inner_unary $fn_name:ident, $rug_name:ident) => { + impl MpOp for crate::op::$fn_name::Routine { + type MpTy = MpFloat; + + fn new_mp() -> Self::MpTy { + new_mpfloat::() + } + + fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet { + this.assign(input.0); + this.$rug_name(); + prep_retval::(this, Ordering::Equal) + } + } + }; +} + +impl_no_round! { + ceil => ceil_mut; + ceilf => ceil_mut; + fabs => abs_mut; + fabsf => abs_mut; + floor => floor_mut; + floorf => floor_mut; + rint => round_even_mut; // FIXME: respect rounding mode + rintf => round_even_mut; // FIXME: respect rounding mode + round => round_mut; + roundeven => round_even_mut; + roundevenf => round_even_mut; + roundf => round_mut; + trunc => trunc_mut; + truncf => trunc_mut; +} + +#[cfg(f16_enabled)] +impl_no_round! { + ceilf16 => ceil_mut; + fabsf16 => abs_mut; + floorf16 => floor_mut; + rintf16 => round_even_mut; // FIXME: respect rounding mode + roundf16 => round_mut; + roundevenf16 => round_even_mut; + truncf16 => trunc_mut; +} + +#[cfg(f128_enabled)] +impl_no_round! { + ceilf128 => ceil_mut; + fabsf128 => abs_mut; + floorf128 => floor_mut; + rintf128 => round_even_mut; // FIXME: respect rounding mode + roundf128 => round_mut; + roundevenf128 => round_even_mut; + truncf128 => trunc_mut; +} + +/// Some functions are difficult to do in a generic way. Implement them here. +macro_rules! impl_op_for_ty { + ($fty:ty, $suffix:literal) => { + paste::paste! { + impl MpOp for crate::op::[]::Routine { + type MpTy = (MpFloat, MpFloat); + + fn new_mp() -> Self::MpTy { + (new_mpfloat::(), new_mpfloat::()) + } + + fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet { + this.0.assign(input.0); + this.1.assign(&this.0); + let (ord0, ord1) = this.0.trunc_fract_round(&mut this.1, Nearest); + ( + prep_retval::(&mut this.1, ord0), + prep_retval::(&mut this.0, ord1), + ) + } + } + + impl MpOp for crate::op::[]::Routine { + type MpTy = (MpFloat, MpFloat); + + fn new_mp() -> Self::MpTy { + (new_mpfloat::(), new_mpfloat::()) + } + + fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet { + this.0.assign(input.0); + this.1.assign(input.1); + let ord = this.0.pow_assign_round(&this.1, Nearest); + prep_retval::(&mut this.0, ord) + } + } + + impl MpOp for crate::op::[]::Routine { + type MpTy = MpFloat; + + fn new_mp() -> Self::MpTy { + new_mpfloat::() + } + + fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet { + this.assign(input.0); + let exp = this.frexp_mut(); + (prep_retval::(this, Ordering::Equal), exp) + } + } + + impl MpOp for crate::op::[]::Routine { + type MpTy = MpFloat; + + fn new_mp() -> Self::MpTy { + new_mpfloat::() + } + + fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet { + this.assign(input.0); + + // `get_exp` follows `frexp` for `0.5 <= |m| < 1.0`. Adjust the exponent by + // one to scale the significand to `1.0 <= |m| < 2.0`. + this.get_exp().map(|v| v - 1).unwrap_or_else(|| { + if this.is_infinite() { + i32::MAX + } else { + // Zero or NaN + i32::MIN + } + }) + } + } + + impl MpOp for crate::op::[]::Routine { + type MpTy = MpFloat; + + fn new_mp() -> Self::MpTy { + new_mpfloat::() + } + + fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet { + let (n, x) = input; + this.assign(x); + let ord = this.jn_round(n, Nearest); + prep_retval::(this, ord) + } + } + + impl MpOp for crate::op::[]::Routine { + type MpTy = (MpFloat, MpFloat); + + fn new_mp() -> Self::MpTy { + (new_mpfloat::(), new_mpfloat::()) + } + + fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet { + this.0.assign(input.0); + this.1.assign(0.0); + let (sord, cord) = this.0.sin_cos_round(&mut this.1, Nearest); + ( + prep_retval::(&mut this.0, sord), + prep_retval::(&mut this.1, cord) + ) + } + } + + impl MpOp for crate::op::[]::Routine { + type MpTy = (MpFloat, MpFloat); + + fn new_mp() -> Self::MpTy { + ( + new_mpfloat::(), + new_mpfloat::(), + ) + } + + fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet { + this.0.assign(input.0); + this.1.assign(input.1); + let (ord, q) = this.0.remainder_quo31_round(&this.1, Nearest); + (prep_retval::(&mut this.0, ord), q) + } + } + + impl MpOp for crate::op::[]::Routine { + type MpTy = MpFloat; + + fn new_mp() -> Self::MpTy { + new_mpfloat::() + } + + fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet { + let (n, x) = input; + this.assign(x); + let ord = this.yn_round(n, Nearest); + prep_retval::(this, ord) + } + } + } + }; +} + +/// Version of `impl_op_for_ty` with only functions that have `f16` and `f128` implementations. +macro_rules! impl_op_for_ty_all { + ($fty:ty, $suffix:literal) => { + paste::paste! { + impl MpOp for crate::op::[]::Routine { + type MpTy = (MpFloat, MpFloat); + + fn new_mp() -> Self::MpTy { + (new_mpfloat::(), new_mpfloat::()) + } + + fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet { + this.0.assign(input.0); + this.1.assign(input.1); + this.0.copysign_mut(&this.1); + prep_retval::(&mut this.0, Ordering::Equal) + } + } + + impl MpOp for crate::op::[]::Routine { + type MpTy = (MpFloat, MpFloat); + + fn new_mp() -> Self::MpTy { + (new_mpfloat::(), new_mpfloat::()) + } + + fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet { + this.0.assign(input.0); + this.1.assign(input.1); + let ord = this.0.rem_assign_round(&this.1, Nearest); + prep_retval::(&mut this.0, ord) + + } + } + + impl MpOp for crate::op::[< fmaximum $suffix >]::Routine { + type MpTy = (MpFloat, MpFloat); + + fn new_mp() -> Self::MpTy { + (new_mpfloat::(), new_mpfloat::()) + } + + fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet { + this.0.assign(input.0); + this.1.assign(input.1); + let ord = if this.0.is_nan() || this.1.is_nan() { + this.0.assign($fty::NAN); + Ordering::Equal + } else { + this.0.max_round(&this.1, Nearest) + }; + prep_retval::(&mut this.0, ord) + } + } + + impl MpOp for crate::op::[< fminimum $suffix >]::Routine { + type MpTy = (MpFloat, MpFloat); + + fn new_mp() -> Self::MpTy { + (new_mpfloat::(), new_mpfloat::()) + } + + fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet { + this.0.assign(input.0); + this.1.assign(input.1); + let ord = if this.0.is_nan() || this.1.is_nan() { + this.0.assign($fty::NAN); + Ordering::Equal + } else { + this.0.min_round(&this.1, Nearest) + }; + prep_retval::(&mut this.0, ord) + } + } + + // `ldexp` and `scalbn` are the same for binary floating point, so just forward all + // methods. + impl MpOp for crate::op::[]::Routine { + type MpTy = ]::Routine as MpOp>::MpTy; + + fn new_mp() -> Self::MpTy { + ]::Routine as MpOp>::new_mp() + } + + fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet { + ]::Routine as MpOp>::run(this, input) + } + } + + impl MpOp for crate::op::[]::Routine { + type MpTy = MpFloat; + + fn new_mp() -> Self::MpTy { + new_mpfloat::() + } + + fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet { + this.assign(input.0); + *this <<= input.1; + prep_retval::(this, Ordering::Equal) + } + } + } + }; +} + +impl_op_for_ty!(f32, "f"); +impl_op_for_ty!(f64, ""); + +#[cfg(f16_enabled)] +impl_op_for_ty_all!(f16, "f16"); +impl_op_for_ty_all!(f32, "f"); +impl_op_for_ty_all!(f64, ""); +#[cfg(f128_enabled)] +impl_op_for_ty_all!(f128, "f128"); + +// `lgamma_r` is not a simple suffix so we can't use the above macro. +impl MpOp for crate::op::lgamma_r::Routine { + type MpTy = MpFloat; + + fn new_mp() -> Self::MpTy { + new_mpfloat::() + } + + fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet { + this.assign(input.0); + let (sign, ord) = this.ln_abs_gamma_round(Nearest); + let ret = prep_retval::(this, ord); + (ret, sign as i32) + } +} + +impl MpOp for crate::op::lgammaf_r::Routine { + type MpTy = MpFloat; + + fn new_mp() -> Self::MpTy { + new_mpfloat::() + } + + fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet { + this.assign(input.0); + let (sign, ord) = this.ln_abs_gamma_round(Nearest); + let ret = prep_retval::(this, ord); + (ret, sign as i32) + } +} + +/* stub implementations so we don't need to special case them */ + +impl MpOp for crate::op::nextafter::Routine { + type MpTy = MpFloat; + + fn new_mp() -> Self::MpTy { + unimplemented!("nextafter does not yet have a MPFR operation"); + } + + fn run(_this: &mut Self::MpTy, _input: Self::RustArgs) -> Self::RustRet { + unimplemented!("nextafter does not yet have a MPFR operation"); + } +} + +impl MpOp for crate::op::nextafterf::Routine { + type MpTy = MpFloat; + + fn new_mp() -> Self::MpTy { + unimplemented!("nextafter does not yet have a MPFR operation"); + } + + fn run(_this: &mut Self::MpTy, _input: Self::RustArgs) -> Self::RustRet { + unimplemented!("nextafter does not yet have a MPFR operation"); + } +} diff --git a/library/compiler-builtins/libm-test/src/num.rs b/library/compiler-builtins/libm-test/src/num.rs new file mode 100644 index 0000000000000..3237c85039d57 --- /dev/null +++ b/library/compiler-builtins/libm-test/src/num.rs @@ -0,0 +1,586 @@ +//! Helpful numeric operations. + +use std::cmp::min; +use std::ops::RangeInclusive; + +use libm::support::Float; + +use crate::{Int, MinInt}; + +/// Extension to `libm`'s `Float` trait with methods that are useful for tests but not +/// needed in `libm` itself. +pub trait FloatExt: Float { + /// The minimum subnormal number. + const TINY_BITS: Self::Int = Self::Int::ONE; + + /// Retrieve additional constants for this float type. + fn consts() -> Consts { + Consts::new() + } + + /// Increment by one ULP, saturating at infinity. + fn next_up(self) -> Self { + let bits = self.to_bits(); + if self.is_nan() || bits == Self::INFINITY.to_bits() { + return self; + } + + let abs = self.abs().to_bits(); + let next_bits = if abs == Self::Int::ZERO { + // Next up from 0 is the smallest subnormal + Self::TINY_BITS + } else if bits == abs { + // Positive: counting up is more positive + bits + Self::Int::ONE + } else { + // Negative: counting down is more positive + bits - Self::Int::ONE + }; + Self::from_bits(next_bits) + } + + /// A faster way to effectively call `next_up` `n` times. + fn n_up(self, n: Self::Int) -> Self { + let bits = self.to_bits(); + if self.is_nan() || bits == Self::INFINITY.to_bits() || n == Self::Int::ZERO { + return self; + } + + let abs = self.abs().to_bits(); + let is_positive = bits == abs; + let crosses_zero = !is_positive && n > abs; + let inf_bits = Self::INFINITY.to_bits(); + + let next_bits = if abs == Self::Int::ZERO { + min(n, inf_bits) + } else if crosses_zero { + min(n - abs, inf_bits) + } else if is_positive { + // Positive, counting up is more positive but this may overflow + match bits.checked_add(n) { + Some(v) if v >= inf_bits => inf_bits, + Some(v) => v, + None => inf_bits, + } + } else { + // Negative, counting down is more positive + bits - n + }; + Self::from_bits(next_bits) + } + + /// Decrement by one ULP, saturating at negative infinity. + fn next_down(self) -> Self { + let bits = self.to_bits(); + if self.is_nan() || bits == Self::NEG_INFINITY.to_bits() { + return self; + } + + let abs = self.abs().to_bits(); + let next_bits = if abs == Self::Int::ZERO { + // Next up from 0 is the smallest negative subnormal + Self::TINY_BITS | Self::SIGN_MASK + } else if bits == abs { + // Positive: counting down is more negative + bits - Self::Int::ONE + } else { + // Negative: counting up is more negative + bits + Self::Int::ONE + }; + Self::from_bits(next_bits) + } + + /// A faster way to effectively call `next_down` `n` times. + fn n_down(self, n: Self::Int) -> Self { + let bits = self.to_bits(); + if self.is_nan() || bits == Self::NEG_INFINITY.to_bits() || n == Self::Int::ZERO { + return self; + } + + let abs = self.abs().to_bits(); + let is_positive = bits == abs; + let crosses_zero = is_positive && n > abs; + let inf_bits = Self::INFINITY.to_bits(); + let ninf_bits = Self::NEG_INFINITY.to_bits(); + + let next_bits = if abs == Self::Int::ZERO { + min(n, inf_bits) | Self::SIGN_MASK + } else if crosses_zero { + min(n - abs, inf_bits) | Self::SIGN_MASK + } else if is_positive { + // Positive, counting down is more negative + bits - n + } else { + // Negative, counting up is more negative but this may overflow + match bits.checked_add(n) { + Some(v) if v > ninf_bits => ninf_bits, + Some(v) => v, + None => ninf_bits, + } + }; + Self::from_bits(next_bits) + } +} + +impl FloatExt for F where F: Float {} + +/// Extra constants that are useful for tests. +#[derive(Debug, Clone, Copy)] +pub struct Consts { + /// The default quiet NaN, which is also the minimum quiet NaN. + pub pos_nan: F, + /// The default quiet NaN with negative sign. + pub neg_nan: F, + /// NaN with maximum (unsigned) significand to be a quiet NaN. The significand is saturated. + pub max_qnan: F, + /// NaN with minimum (unsigned) significand to be a signaling NaN. + pub min_snan: F, + /// NaN with maximum (unsigned) significand to be a signaling NaN. + pub max_snan: F, + pub neg_max_qnan: F, + pub neg_min_snan: F, + pub neg_max_snan: F, +} + +impl Consts { + fn new() -> Self { + let top_sigbit_mask = F::Int::ONE << (F::SIG_BITS - 1); + let pos_nan = F::EXP_MASK | top_sigbit_mask; + let max_qnan = F::EXP_MASK | F::SIG_MASK; + let min_snan = F::EXP_MASK | F::Int::ONE; + let max_snan = (F::EXP_MASK | F::SIG_MASK) ^ top_sigbit_mask; + + let neg_nan = pos_nan | F::SIGN_MASK; + let neg_max_qnan = max_qnan | F::SIGN_MASK; + let neg_min_snan = min_snan | F::SIGN_MASK; + let neg_max_snan = max_snan | F::SIGN_MASK; + + Self { + pos_nan: F::from_bits(pos_nan), + neg_nan: F::from_bits(neg_nan), + max_qnan: F::from_bits(max_qnan), + min_snan: F::from_bits(min_snan), + max_snan: F::from_bits(max_snan), + neg_max_qnan: F::from_bits(neg_max_qnan), + neg_min_snan: F::from_bits(neg_min_snan), + neg_max_snan: F::from_bits(neg_max_snan), + } + } + + pub fn iter(self) -> impl Iterator { + // Destructure so we get unused warnings if we forget a list entry. + let Self { + pos_nan, + neg_nan, + max_qnan, + min_snan, + max_snan, + neg_max_qnan, + neg_min_snan, + neg_max_snan, + } = self; + + [ + pos_nan, + neg_nan, + max_qnan, + min_snan, + max_snan, + neg_max_qnan, + neg_min_snan, + neg_max_snan, + ] + .into_iter() + } +} + +/// Return the number of steps between two floats, returning `None` if either input is NaN. +/// +/// This is the number of steps needed for `n_up` or `n_down` to go between values. Infinities +/// are treated the same as those functions (will return the nearest finite value), and only one +/// of `-0` or `+0` is counted. It does not matter which value is greater. +pub fn ulp_between(x: F, y: F) -> Option { + let a = as_ulp_steps(x)?; + let b = as_ulp_steps(y)?; + Some(a.abs_diff(b)) +} + +/// Return the (signed) number of steps from zero to `x`. +fn as_ulp_steps(x: F) -> Option { + let s = x.to_bits_signed(); + let val = if s >= F::SignedInt::ZERO { + // each increment from `s = 0` is one step up from `x = 0.0` + s + } else { + // each increment from `s = F::SignedInt::MIN` is one step down from `x = -0.0` + F::SignedInt::MIN - s + }; + + // If `x` is NaN, return `None` + (!x.is_nan()).then_some(val) +} + +/// An iterator that returns floats with linearly spaced integer representations, which translates +/// to logarithmic spacing of their values. +/// +/// Note that this tends to skip negative zero, so that needs to be checked explicitly. +/// +/// Returns `(iterator, iterator_length)`. +pub fn logspace( + start: F, + end: F, + steps: F::Int, +) -> (impl Iterator + Clone, F::Int) +where + RangeInclusive: Iterator, +{ + assert!(!start.is_nan()); + assert!(!end.is_nan()); + assert!(end >= start); + + let steps = steps + .checked_sub(F::Int::ONE) + .expect("`steps` must be at least 2"); + let between = ulp_between(start, end).expect("`start` or `end` is NaN"); + let spacing = (between / steps).max(F::Int::ONE); + let steps = steps.min(between); // At maximum, one step per ULP + + let mut x = start; + ( + (F::Int::ZERO..=steps).map(move |_| { + let ret = x; + x = x.n_up(spacing); + ret + }), + steps + F::Int::ONE, + ) +} + +/// Returns an iterator of up to `steps` integers evenly distributed. +pub fn linear_ints( + range: RangeInclusive, + steps: u64, +) -> (impl Iterator + Clone, u64) { + let steps = steps.checked_sub(1).unwrap(); + let between = u64::from(range.start().abs_diff(*range.end())); + let spacing = i32::try_from((between / steps).max(1)).unwrap(); + let steps = steps.min(between); + let mut x: i32 = *range.start(); + ( + (0..=steps).map(move |_| { + let res = x; + // Wrapping add to avoid panic on last item (where `x` could overflow past i32::MAX as + // there is no next item). + x = x.wrapping_add(spacing); + res + }), + steps + 1, + ) +} + +#[cfg(test)] +mod tests { + use std::cmp::max; + + use super::*; + use crate::f8; + + #[test] + fn test_next_up_down() { + for (i, v) in f8::ALL.into_iter().enumerate() { + let down = v.next_down().to_bits(); + let up = v.next_up().to_bits(); + + if i == 0 { + assert_eq!(down, f8::NEG_INFINITY.to_bits(), "{i} next_down({v:#010b})"); + } else { + let expected = if v == f8::ZERO { + 1 | f8::SIGN_MASK + } else { + f8::ALL[i - 1].to_bits() + }; + assert_eq!(down, expected, "{i} next_down({v:#010b})"); + } + + if i == f8::ALL_LEN - 1 { + assert_eq!(up, f8::INFINITY.to_bits(), "{i} next_up({v:#010b})"); + } else { + let expected = if v == f8::NEG_ZERO { + 1 + } else { + f8::ALL[i + 1].to_bits() + }; + assert_eq!(up, expected, "{i} next_up({v:#010b})"); + } + } + } + + #[test] + fn test_next_up_down_inf_nan() { + assert_eq!(f8::NEG_INFINITY.next_up().to_bits(), f8::ALL[0].to_bits(),); + assert_eq!( + f8::NEG_INFINITY.next_down().to_bits(), + f8::NEG_INFINITY.to_bits(), + ); + assert_eq!( + f8::INFINITY.next_down().to_bits(), + f8::ALL[f8::ALL_LEN - 1].to_bits(), + ); + assert_eq!(f8::INFINITY.next_up().to_bits(), f8::INFINITY.to_bits(),); + assert_eq!(f8::NAN.next_up().to_bits(), f8::NAN.to_bits(),); + assert_eq!(f8::NAN.next_down().to_bits(), f8::NAN.to_bits(),); + } + + #[test] + fn test_n_up_down_quick() { + assert_eq!(f8::ALL[0].n_up(4).to_bits(), f8::ALL[4].to_bits(),); + assert_eq!( + f8::ALL[f8::ALL_LEN - 1].n_down(4).to_bits(), + f8::ALL[f8::ALL_LEN - 5].to_bits(), + ); + + // Check around zero + assert_eq!(f8::from_bits(0b0).n_up(7).to_bits(), 0b0_0000_111); + assert_eq!(f8::from_bits(0b0).n_down(7).to_bits(), 0b1_0000_111); + + // Check across zero + assert_eq!(f8::from_bits(0b1_0000_111).n_up(8).to_bits(), 0b0_0000_001); + assert_eq!( + f8::from_bits(0b0_0000_111).n_down(8).to_bits(), + 0b1_0000_001 + ); + } + + #[test] + fn test_n_up_down_one() { + // Verify that `n_up(1)` and `n_down(1)` are the same as `next_up()` and next_down()`.` + for i in 0..u8::MAX { + let v = f8::from_bits(i); + assert_eq!(v.next_up().to_bits(), v.n_up(1).to_bits()); + assert_eq!(v.next_down().to_bits(), v.n_down(1).to_bits()); + } + } + + #[test] + fn test_n_up_down_inf_nan_zero() { + assert_eq!(f8::NEG_INFINITY.n_up(1).to_bits(), f8::ALL[0].to_bits()); + assert_eq!( + f8::NEG_INFINITY.n_up(239).to_bits(), + f8::ALL[f8::ALL_LEN - 1].to_bits() + ); + assert_eq!(f8::NEG_INFINITY.n_up(240).to_bits(), f8::INFINITY.to_bits()); + assert_eq!( + f8::NEG_INFINITY.n_down(u8::MAX).to_bits(), + f8::NEG_INFINITY.to_bits() + ); + + assert_eq!( + f8::INFINITY.n_down(1).to_bits(), + f8::ALL[f8::ALL_LEN - 1].to_bits() + ); + assert_eq!(f8::INFINITY.n_down(239).to_bits(), f8::ALL[0].to_bits()); + assert_eq!( + f8::INFINITY.n_down(240).to_bits(), + f8::NEG_INFINITY.to_bits() + ); + assert_eq!(f8::INFINITY.n_up(u8::MAX).to_bits(), f8::INFINITY.to_bits()); + + assert_eq!(f8::NAN.n_up(u8::MAX).to_bits(), f8::NAN.to_bits()); + assert_eq!(f8::NAN.n_down(u8::MAX).to_bits(), f8::NAN.to_bits()); + + assert_eq!(f8::ZERO.n_down(1).to_bits(), f8::TINY_BITS | f8::SIGN_MASK); + assert_eq!(f8::NEG_ZERO.n_up(1).to_bits(), f8::TINY_BITS); + } + + /// True if the specified range of `f8::ALL` includes both +0 and -0 + fn crossed_zero(start: usize, end: usize) -> bool { + let crossed = &f8::ALL[start..=end]; + crossed.iter().any(|f| f8::eq_repr(*f, f8::ZERO)) + && crossed.iter().any(|f| f8::eq_repr(*f, f8::NEG_ZERO)) + } + + #[test] + fn test_n_up_down() { + for (i, v) in f8::ALL.into_iter().enumerate() { + for n in 0..f8::ALL_LEN { + let down = v.n_down(n as u8).to_bits(); + let up = v.n_up(n as u8).to_bits(); + + if let Some(down_exp_idx) = i.checked_sub(n) { + // No overflow + let mut expected = f8::ALL[down_exp_idx].to_bits(); + if n >= 1 && crossed_zero(down_exp_idx, i) { + // If both -0 and +0 are included, we need to adjust our expected value + match down_exp_idx.checked_sub(1) { + Some(v) => expected = f8::ALL[v].to_bits(), + // Saturate to -inf if we are out of values + None => expected = f8::NEG_INFINITY.to_bits(), + } + } + assert_eq!(down, expected, "{i} {n} n_down({v:#010b})"); + } else { + // Overflow to -inf + assert_eq!( + down, + f8::NEG_INFINITY.to_bits(), + "{i} {n} n_down({v:#010b})" + ); + } + + let mut up_exp_idx = i + n; + if up_exp_idx < f8::ALL_LEN { + // No overflow + if n >= 1 && up_exp_idx < f8::ALL_LEN && crossed_zero(i, up_exp_idx) { + // If both -0 and +0 are included, we need to adjust our expected value + up_exp_idx += 1; + } + + let expected = if up_exp_idx >= f8::ALL_LEN { + f8::INFINITY.to_bits() + } else { + f8::ALL[up_exp_idx].to_bits() + }; + + assert_eq!(up, expected, "{i} {n} n_up({v:#010b})"); + } else { + // Overflow to +inf + assert_eq!(up, f8::INFINITY.to_bits(), "{i} {n} n_up({v:#010b})"); + } + } + } + } + + #[test] + fn test_ulp_between() { + for (i, x) in f8::ALL.into_iter().enumerate() { + for (j, y) in f8::ALL.into_iter().enumerate() { + let ulp = ulp_between(x, y).unwrap(); + let make_msg = || format!("i: {i} j: {j} x: {x:b} y: {y:b} ulp {ulp}"); + + let i_low = min(i, j); + let i_hi = max(i, j); + let mut expected = u8::try_from(i_hi - i_low).unwrap(); + if crossed_zero(i_low, i_hi) { + expected -= 1; + } + + assert_eq!(ulp, expected, "{}", make_msg()); + + // Skip if either are zero since `next_{up,down}` will count over it + let either_zero = x == f8::ZERO || y == f8::ZERO; + if x < y && !either_zero { + assert_eq!(x.n_up(ulp).to_bits(), y.to_bits(), "{}", make_msg()); + assert_eq!(y.n_down(ulp).to_bits(), x.to_bits(), "{}", make_msg()); + } else if !either_zero { + assert_eq!(y.n_up(ulp).to_bits(), x.to_bits(), "{}", make_msg()); + assert_eq!(x.n_down(ulp).to_bits(), y.to_bits(), "{}", make_msg()); + } + } + } + } + + #[test] + fn test_ulp_between_inf_nan_zero() { + assert_eq!( + ulp_between(f8::NEG_INFINITY, f8::INFINITY).unwrap(), + f8::ALL_LEN as u8 + ); + assert_eq!( + ulp_between(f8::INFINITY, f8::NEG_INFINITY).unwrap(), + f8::ALL_LEN as u8 + ); + assert_eq!( + ulp_between(f8::NEG_INFINITY, f8::ALL[f8::ALL_LEN - 1]).unwrap(), + f8::ALL_LEN as u8 - 1 + ); + assert_eq!( + ulp_between(f8::INFINITY, f8::ALL[0]).unwrap(), + f8::ALL_LEN as u8 - 1 + ); + + assert_eq!(ulp_between(f8::ZERO, f8::NEG_ZERO).unwrap(), 0); + assert_eq!(ulp_between(f8::NAN, f8::ZERO), None); + assert_eq!(ulp_between(f8::ZERO, f8::NAN), None); + } + + #[test] + fn test_logspace() { + let (ls, count) = logspace(f8::from_bits(0x0), f8::from_bits(0x4), 2); + let ls: Vec<_> = ls.collect(); + let exp = [f8::from_bits(0x0), f8::from_bits(0x4)]; + assert_eq!(ls, exp); + assert_eq!(ls.len(), usize::from(count)); + + let (ls, count) = logspace(f8::from_bits(0x0), f8::from_bits(0x4), 3); + let ls: Vec<_> = ls.collect(); + let exp = [f8::from_bits(0x0), f8::from_bits(0x2), f8::from_bits(0x4)]; + assert_eq!(ls, exp); + assert_eq!(ls.len(), usize::from(count)); + + // Check that we include all values with no repeats if `steps` exceeds the maximum number + // of steps. + let (ls, count) = logspace(f8::from_bits(0x0), f8::from_bits(0x3), 10); + let ls: Vec<_> = ls.collect(); + let exp = [ + f8::from_bits(0x0), + f8::from_bits(0x1), + f8::from_bits(0x2), + f8::from_bits(0x3), + ]; + assert_eq!(ls, exp); + assert_eq!(ls.len(), usize::from(count)); + } + + #[test] + fn test_linear_ints() { + let (ints, count) = linear_ints(0..=4, 2); + let ints: Vec<_> = ints.collect(); + let exp = [0, 4]; + assert_eq!(ints, exp); + assert_eq!(ints.len(), usize::try_from(count).unwrap()); + + let (ints, count) = linear_ints(0..=4, 3); + let ints: Vec<_> = ints.collect(); + let exp = [0, 2, 4]; + assert_eq!(ints, exp); + assert_eq!(ints.len(), usize::try_from(count).unwrap()); + + // Check that we include all values with no repeats if `steps` exceeds the maximum number + // of steps. + let (ints, count) = linear_ints(0x0..=0x3, 10); + let ints: Vec<_> = ints.collect(); + let exp = [0, 1, 2, 3]; + assert_eq!(ints, exp); + assert_eq!(ints.len(), usize::try_from(count).unwrap()); + + // Check that there are no panics around `i32::MAX`. + let (ints, count) = linear_ints(i32::MAX - 1..=i32::MAX, 5); + let ints: Vec<_> = ints.collect(); + let exp = [i32::MAX - 1, i32::MAX]; + assert_eq!(ints, exp); + assert_eq!(ints.len(), usize::try_from(count).unwrap()); + } + + #[test] + fn test_consts() { + let Consts { + pos_nan, + neg_nan, + max_qnan, + min_snan, + max_snan, + neg_max_qnan, + neg_min_snan, + neg_max_snan, + } = f8::consts(); + + assert_eq!(pos_nan.to_bits(), 0b0_1111_100); + assert_eq!(neg_nan.to_bits(), 0b1_1111_100); + assert_eq!(max_qnan.to_bits(), 0b0_1111_111); + assert_eq!(min_snan.to_bits(), 0b0_1111_001); + assert_eq!(max_snan.to_bits(), 0b0_1111_011); + assert_eq!(neg_max_qnan.to_bits(), 0b1_1111_111); + assert_eq!(neg_min_snan.to_bits(), 0b1_1111_001); + assert_eq!(neg_max_snan.to_bits(), 0b1_1111_011); + } +} diff --git a/library/compiler-builtins/libm-test/src/op.rs b/library/compiler-builtins/libm-test/src/op.rs new file mode 100644 index 0000000000000..afd445ff9c5ae --- /dev/null +++ b/library/compiler-builtins/libm-test/src/op.rs @@ -0,0 +1,155 @@ +//! Types representing individual functions. +//! +//! Each routine gets a module with its name, e.g. `mod sinf { /* ... */ }`. The module +//! contains a unit struct `Routine` which implements `MathOp`. +//! +//! Basically everything could be called a "function" here, so we loosely use the following +//! terminology: +//! +//! - "Function": the math operation that does not have an associated precision. E.g. `f(x) = e^x`, +//! `f(x) = log(x)`. +//! - "Routine": A code implementation of a math operation with a specific precision. E.g. `exp`, +//! `expf`, `expl`, `log`, `logf`. +//! - "Operation" / "Op": Something that relates a routine to a function or is otherwise higher +//! level. `Op` is also used as the name for generic parameters since it is terse. + +use std::fmt; +use std::panic::{RefUnwindSafe, UnwindSafe}; + +pub use shared::{ALL_OPERATIONS, FloatTy, MathOpInfo, Ty}; + +use crate::{CheckOutput, Float, TupleCall}; + +mod shared { + include!("../../crates/libm-macros/src/shared.rs"); +} + +/// An enum representing each possible symbol name (`sin`, `sinf`, `sinl`, etc). +#[libm_macros::function_enum(BaseName)] +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub enum Identifier {} + +impl fmt::Display for Identifier { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str(self.as_str()) + } +} + +/// The name without any type specifier, e.g. `sin` and `sinf` both become `sin`. +#[libm_macros::base_name_enum] +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub enum BaseName {} + +impl fmt::Display for BaseName { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str(self.as_str()) + } +} + +/// Attributes ascribed to a `libm` routine including signature, type information, +/// and naming. +pub trait MathOp { + /// The float type used for this operation. + type FTy: Float; + + /// The function type representing the signature in a C library. + type CFn: Copy; + + /// Arguments passed to the C library function as a tuple. These may include `&mut` return + /// values. + type CArgs<'a> + where + Self: 'a; + + /// The type returned by C implementations. + type CRet; + + /// The signature of the Rust function as a `fn(...) -> ...` type. + type RustFn: Copy + UnwindSafe; + + /// Arguments passed to the Rust library function as a tuple. + /// + /// The required `TupleCall` bounds ensure this type can be passed either to the C function or + /// to the Rust function. + type RustArgs: Copy + + TupleCall + + TupleCall + + RefUnwindSafe; + + /// Type returned from the Rust function. + type RustRet: CheckOutput; + + /// The name of this function, including suffix (e.g. `sin`, `sinf`). + const IDENTIFIER: Identifier; + + /// The name as a string. + const NAME: &'static str = Self::IDENTIFIER.as_str(); + + /// The name of the function excluding the type suffix, e.g. `sin` and `sinf` are both `sin`. + const BASE_NAME: BaseName = Self::IDENTIFIER.base_name(); + + /// The function in `libm` which can be called. + const ROUTINE: Self::RustFn; + + /// Whether or not the function is part of libm public API. + const PUBLIC: bool; +} + +/// Access the associated `FTy` type from an op (helper to avoid ambiguous associated types). +pub type OpFTy = ::FTy; +/// Access the associated `FTy::Int` type from an op (helper to avoid ambiguous associated types). +pub type OpITy = <::FTy as Float>::Int; +/// Access the associated `CFn` type from an op (helper to avoid ambiguous associated types). +pub type OpCFn = ::CFn; +/// Access the associated `CRet` type from an op (helper to avoid ambiguous associated types). +pub type OpCRet = ::CRet; +/// Access the associated `RustFn` type from an op (helper to avoid ambiguous associated types). +pub type OpRustFn = ::RustFn; +/// Access the associated `RustArgs` type from an op (helper to avoid ambiguous associated types). +pub type OpRustArgs = ::RustArgs; +/// Access the associated `RustRet` type from an op (helper to avoid ambiguous associated types). +pub type OpRustRet = ::RustRet; + +macro_rules! create_op_modules { + // Matcher for unary functions + ( + fn_name: $fn_name:ident, + FTy: $FTy:ty, + CFn: $CFn:ty, + CArgs: $CArgs:ty, + CRet: $CRet:ty, + RustFn: $RustFn:ty, + RustArgs: $RustArgs:ty, + RustRet: $RustRet:ty, + public: $public:expr, + attrs: [$($attr:meta),*], + ) => { + paste::paste! { + $(#[$attr])* + pub mod $fn_name { + use super::*; + pub struct Routine; + + impl MathOp for Routine { + type FTy = $FTy; + type CFn = for<'a> $CFn; + type CArgs<'a> = $CArgs where Self: 'a; + type CRet = $CRet; + type RustFn = $RustFn; + type RustArgs = $RustArgs; + type RustRet = $RustRet; + + const IDENTIFIER: Identifier = Identifier::[< $fn_name:camel >]; + const ROUTINE: Self::RustFn = libm::$fn_name; + const PUBLIC: bool = $public; + } + } + + } + }; +} + +libm_macros::for_each_function! { + callback: create_op_modules, + emit_types: all, +} diff --git a/library/compiler-builtins/libm-test/src/precision.rs b/library/compiler-builtins/libm-test/src/precision.rs new file mode 100644 index 0000000000000..f5fb5f6707b08 --- /dev/null +++ b/library/compiler-builtins/libm-test/src/precision.rs @@ -0,0 +1,573 @@ +//! Configuration for skipping or changing the result for individual test cases (inputs) rather +//! than ignoring entire tests. + +use core::f32; + +use CheckBasis::{Mpfr, Musl}; +use libm::support::CastFrom; +use {BaseName as Bn, Identifier as Id}; + +use crate::{BaseName, CheckBasis, CheckCtx, Float, Identifier, Int, TestResult}; + +/// Type implementing [`IgnoreCase`]. +pub struct SpecialCase; + +/// ULP allowed to differ from the results returned by a test basis. +#[allow(clippy::single_match)] +pub fn default_ulp(ctx: &CheckCtx) -> u32 { + // ULP compared to the infinite (MPFR) result. + let mut ulp = match ctx.base_name { + // Operations that require exact results. This list should correlate with what we + // have documented at . + Bn::Ceil + | Bn::Copysign + | Bn::Fabs + | Bn::Fdim + | Bn::Floor + | Bn::Fma + | Bn::Fmax + | Bn::Fmaximum + | Bn::FmaximumNum + | Bn::Fmin + | Bn::Fminimum + | Bn::FminimumNum + | Bn::Fmod + | Bn::Frexp + | Bn::Ilogb + | Bn::Ldexp + | Bn::Modf + | Bn::Nextafter + | Bn::Remainder + | Bn::Remquo + | Bn::Rint + | Bn::Round + | Bn::Roundeven + | Bn::Scalbn + | Bn::Sqrt + | Bn::Trunc => 0, + + // Operations that aren't required to be exact, but our implementations are. + Bn::Cbrt => 0, + + // Bessel functions have large inaccuracies. + Bn::J0 | Bn::J1 | Bn::Y0 | Bn::Y1 | Bn::Jn | Bn::Yn => 8_000_000, + + // For all other operations, specify our implementation's worst case precision. + Bn::Acos => 1, + Bn::Acosh => 4, + Bn::Asin => 1, + Bn::Asinh => 2, + Bn::Atan => 1, + Bn::Atan2 => 2, + Bn::Atanh => 2, + Bn::Cos => 1, + Bn::Cosh => 1, + Bn::Erf => 1, + Bn::Erfc => 4, + Bn::Exp => 1, + Bn::Exp10 => 6, + Bn::Exp2 => 1, + Bn::Expm1 => 1, + Bn::Hypot => 1, + Bn::Lgamma | Bn::LgammaR => 16, + Bn::Log => 1, + Bn::Log10 => 1, + Bn::Log1p => 1, + Bn::Log2 => 1, + Bn::Pow => 1, + Bn::Sin => 1, + Bn::Sincos => 1, + Bn::Sinh => 2, + Bn::Tan => 1, + Bn::Tanh => 2, + // tgammaf has higher accuracy than tgamma. + Bn::Tgamma if ctx.fn_ident != Id::Tgamma => 1, + Bn::Tgamma => 20, + }; + + // There are some cases where musl's approximation is less accurate than ours. For these + // cases, increase the ULP. + if ctx.basis == Musl { + match ctx.base_name { + Bn::Cosh => ulp = 2, + Bn::Exp10 if usize::BITS < 64 => ulp = 4, + Bn::Lgamma | Bn::LgammaR => ulp = 400, + Bn::Tanh => ulp = 4, + _ => (), + } + + match ctx.fn_ident { + Id::Cbrt => ulp = 2, + // FIXME(#401): musl has an incorrect result here. + Id::Fdim => ulp = 2, + Id::Sincosf => ulp = 500, + Id::Tgamma => ulp = 20, + _ => (), + } + } + + if cfg!(target_arch = "x86") { + match ctx.fn_ident { + // Input `fma(0.999999999999999, 1.0000000000000013, 0.0) = 1.0000000000000002` is + // incorrect on i586 and i686. + Id::Fma => ulp = 1, + _ => (), + } + } + + // In some cases, our implementation is less accurate than musl on i586. + if cfg!(x86_no_sse) { + match ctx.fn_ident { + // FIXME(#401): these need to be correctly rounded but are not. + Id::Fmaf => ulp = 1, + Id::Fdim => ulp = 1, + Id::Round => ulp = 1, + + Id::Asinh => ulp = 3, + Id::Asinhf => ulp = 3, + Id::Cbrt => ulp = 1, + Id::Exp10 | Id::Exp10f => ulp = 1_000_000, + Id::Exp2 | Id::Exp2f => ulp = 10_000_000, + Id::Log1p | Id::Log1pf => ulp = 2, + Id::Tan => ulp = 2, + _ => (), + } + } + + ulp +} + +/// Result of checking for possible overrides. +#[derive(Debug, Default)] +pub enum CheckAction { + /// The check should pass. Default case. + #[default] + AssertSuccess, + + /// Override the ULP for this check. + AssertWithUlp(u32), + + /// Failure is expected, ensure this is the case (xfail). Takes a contxt string to help trace + /// back exactly why we expect this to fail. + AssertFailure(&'static str), + + /// The override somehow validated the result, here it is. + Custom(TestResult), + + /// Disregard the output. + Skip, +} + +/// Don't run further validation on this test case. +const SKIP: CheckAction = CheckAction::Skip; + +/// Return this to skip checks on a test that currently fails but shouldn't. Takes a description +/// of context. +const XFAIL: fn(&'static str) -> CheckAction = CheckAction::AssertFailure; + +/// Indicates that we expect a test to fail but we aren't asserting that it does (e.g. some results +/// within a range do actually pass). +/// +/// Same as `SKIP`, just indicates we have something to eventually fix. +const XFAIL_NOCHECK: CheckAction = CheckAction::Skip; + +/// By default, all tests should pass. +const DEFAULT: CheckAction = CheckAction::AssertSuccess; + +/// Allow overriding the outputs of specific test cases. +/// +/// There are some cases where we want to xfail specific cases or handle certain inputs +/// differently than the rest of calls to `validate`. This provides a hook to do that. +/// +/// If `None` is returned, checks will proceed as usual. If `Some(result)` is returned, checks +/// are skipped and the provided result is returned instead. +/// +/// This gets implemented once per input type, then the functions provide further filtering +/// based on function name and values. +/// +/// `ulp` can also be set to adjust the ULP for that specific test, even if `None` is still +/// returned. +pub trait MaybeOverride { + fn check_float( + _input: Input, + _actual: F, + _expected: F, + _ctx: &CheckCtx, + ) -> CheckAction { + DEFAULT + } + + fn check_int(_input: Input, _actual: I, _expected: I, _ctx: &CheckCtx) -> CheckAction { + DEFAULT + } +} + +#[cfg(f16_enabled)] +impl MaybeOverride<(f16,)> for SpecialCase {} + +impl MaybeOverride<(f32,)> for SpecialCase { + fn check_float(input: (f32,), actual: F, expected: F, ctx: &CheckCtx) -> CheckAction { + if ctx.base_name == BaseName::Expm1 + && !input.0.is_infinite() + && input.0 > 80.0 + && actual.is_infinite() + && !expected.is_infinite() + { + // we return infinity but the number is representable + if ctx.basis == CheckBasis::Musl { + return XFAIL_NOCHECK; + } + return XFAIL("expm1 representable numbers"); + } + + if cfg!(x86_no_sse) + && ctx.base_name == BaseName::Exp2 + && !expected.is_infinite() + && actual.is_infinite() + { + // We return infinity when there is a representable value. Test input: 127.97238 + return XFAIL("586 exp2 representable numbers"); + } + + if ctx.base_name == BaseName::Sinh && input.0.abs() > 80.0 && actual.is_nan() { + // we return some NaN that should be real values or infinite + if ctx.basis == CheckBasis::Musl { + return XFAIL_NOCHECK; + } + return XFAIL("sinh unexpected NaN"); + } + + if (ctx.base_name == BaseName::Lgamma || ctx.base_name == BaseName::LgammaR) + && input.0 > 4e36 + && expected.is_infinite() + && !actual.is_infinite() + { + // This result should saturate but we return a finite value. + return XFAIL_NOCHECK; + } + + if ctx.base_name == BaseName::J0 && input.0 < -1e34 { + // Errors get huge close to -inf + return XFAIL_NOCHECK; + } + + unop_common(input, actual, expected, ctx) + } + + fn check_int(input: (f32,), actual: I, expected: I, ctx: &CheckCtx) -> CheckAction { + // On MPFR for lgammaf_r, we set -1 as the integer result for negative infinity but MPFR + // sets +1 + if ctx.basis == CheckBasis::Mpfr + && ctx.base_name == BaseName::LgammaR + && input.0 == f32::NEG_INFINITY + && actual.abs() == expected.abs() + { + return XFAIL("lgammar integer result"); + } + + DEFAULT + } +} + +impl MaybeOverride<(f64,)> for SpecialCase { + fn check_float(input: (f64,), actual: F, expected: F, ctx: &CheckCtx) -> CheckAction { + if cfg!(x86_no_sse) + && ctx.base_name == BaseName::Ceil + && ctx.basis == CheckBasis::Musl + && input.0 < 0.0 + && input.0 > -1.0 + && expected == F::ZERO + && actual == F::ZERO + { + // musl returns -0.0, we return +0.0 + return XFAIL("i586 ceil signed zero"); + } + + if cfg!(x86_no_sse) + && (ctx.base_name == BaseName::Rint || ctx.base_name == BaseName::Roundeven) + && (expected - actual).abs() <= F::ONE + && (expected - actual).abs() > F::ZERO + { + // Our rounding mode is incorrect. + return XFAIL("i586 rint rounding mode"); + } + + if cfg!(x86_no_sse) + && (ctx.fn_ident == Identifier::Ceil || ctx.fn_ident == Identifier::Floor) + && expected.eq_repr(F::NEG_ZERO) + && actual.eq_repr(F::ZERO) + { + // FIXME: the x87 implementations do not keep the distinction between -0.0 and 0.0. + // See https://github.com/rust-lang/libm/pull/404#issuecomment-2572399955 + return XFAIL("i586 ceil/floor signed zero"); + } + + if cfg!(x86_no_sse) + && (ctx.fn_ident == Identifier::Exp10 || ctx.fn_ident == Identifier::Exp2) + { + // FIXME: i586 has very imprecise results with ULP > u32::MAX for these + // operations so we can't reasonably provide a limit. + return XFAIL_NOCHECK; + } + + if ctx.base_name == BaseName::J0 && input.0 < -1e300 { + // Errors get huge close to -inf + return XFAIL_NOCHECK; + } + + // maybe_check_nan_bits(actual, expected, ctx) + unop_common(input, actual, expected, ctx) + } + + fn check_int(input: (f64,), actual: I, expected: I, ctx: &CheckCtx) -> CheckAction { + // On MPFR for lgamma_r, we set -1 as the integer result for negative infinity but MPFR + // sets +1 + if ctx.basis == CheckBasis::Mpfr + && ctx.base_name == BaseName::LgammaR + && input.0 == f64::NEG_INFINITY + && actual.abs() == expected.abs() + { + return XFAIL("lgammar integer result"); + } + + DEFAULT + } +} + +#[cfg(f128_enabled)] +impl MaybeOverride<(f128,)> for SpecialCase {} + +// F1 and F2 are always the same type, this is just to please generics +fn unop_common( + input: (F1,), + actual: F2, + expected: F2, + ctx: &CheckCtx, +) -> CheckAction { + if ctx.base_name == BaseName::Acosh + && input.0 < F1::NEG_ONE + && !(expected.is_nan() && actual.is_nan()) + { + // acoshf is undefined for x <= 1.0, but we return a random result at lower values. + + if ctx.basis == CheckBasis::Musl { + return XFAIL_NOCHECK; + } + + return XFAIL("acoshf undefined"); + } + + if (ctx.base_name == BaseName::Lgamma || ctx.base_name == BaseName::LgammaR) + && input.0 < F1::ZERO + && !input.0.is_infinite() + { + // loggamma should not be defined for x < 0, yet we both return results + return XFAIL_NOCHECK; + } + + // fabs and copysign must leave NaNs untouched. + if ctx.base_name == BaseName::Fabs && input.0.is_nan() { + // LLVM currently uses x87 instructions which quieten signalling NaNs to handle the i686 + // `extern "C"` `f32`/`f64` return ABI. + // LLVM issue + // Rust issue + if cfg!(target_arch = "x86") && ctx.basis == CheckBasis::Musl && actual.is_nan() { + return XFAIL_NOCHECK; + } + + // MPFR only has one NaN bitpattern; allow the default `.is_nan()` checks to validate. + if ctx.basis == CheckBasis::Mpfr { + return DEFAULT; + } + + // abs and copysign require signaling NaNs to be propagated, so verify bit equality. + if actual.to_bits() == expected.to_bits() { + return CheckAction::Custom(Ok(())); + } else { + return CheckAction::Custom(Err(anyhow::anyhow!("NaNs have different bitpatterns"))); + } + } + + DEFAULT +} + +#[cfg(f16_enabled)] +impl MaybeOverride<(f16, f16)> for SpecialCase { + fn check_float( + input: (f16, f16), + actual: F, + expected: F, + ctx: &CheckCtx, + ) -> CheckAction { + binop_common(input, actual, expected, ctx) + } +} + +impl MaybeOverride<(f32, f32)> for SpecialCase { + fn check_float( + input: (f32, f32), + actual: F, + expected: F, + ctx: &CheckCtx, + ) -> CheckAction { + binop_common(input, actual, expected, ctx) + } +} + +impl MaybeOverride<(f64, f64)> for SpecialCase { + fn check_float( + input: (f64, f64), + actual: F, + expected: F, + ctx: &CheckCtx, + ) -> CheckAction { + binop_common(input, actual, expected, ctx) + } +} + +#[cfg(f128_enabled)] +impl MaybeOverride<(f128, f128)> for SpecialCase { + fn check_float( + input: (f128, f128), + actual: F, + expected: F, + ctx: &CheckCtx, + ) -> CheckAction { + binop_common(input, actual, expected, ctx) + } +} + +// F1 and F2 are always the same type, this is just to please generics +fn binop_common( + input: (F1, F1), + actual: F2, + expected: F2, + ctx: &CheckCtx, +) -> CheckAction { + // MPFR only has one NaN bitpattern; allow the default `.is_nan()` checks to validate. Skip if + // the first input (magnitude source) is NaN and the output is also a NaN, or if the second + // input (sign source) is NaN. + if ctx.basis == CheckBasis::Mpfr + && ((input.0.is_nan() && actual.is_nan() && expected.is_nan()) || input.1.is_nan()) + { + return SKIP; + } + + /* FIXME(#439): our fmin and fmax do not compare signed zeros */ + + if ctx.base_name == BaseName::Fmin + && input.0.biteq(F1::NEG_ZERO) + && input.1.biteq(F1::ZERO) + && expected.biteq(F2::NEG_ZERO) + && actual.biteq(F2::ZERO) + { + return XFAIL("fmin signed zeroes"); + } + + if ctx.base_name == BaseName::Fmax + && input.0.biteq(F1::NEG_ZERO) + && input.1.biteq(F1::ZERO) + && expected.biteq(F2::ZERO) + && actual.biteq(F2::NEG_ZERO) + { + return XFAIL("fmax signed zeroes"); + } + + // Musl propagates NaNs if one is provided as the input, but we return the other input. + if (ctx.base_name == BaseName::Fmax || ctx.base_name == BaseName::Fmin) + && ctx.basis == Musl + && (input.0.is_nan() ^ input.1.is_nan()) + && expected.is_nan() + { + return XFAIL("fmax/fmin musl NaN"); + } + + DEFAULT +} + +impl MaybeOverride<(i32, f32)> for SpecialCase { + fn check_float( + input: (i32, f32), + actual: F, + expected: F, + ctx: &CheckCtx, + ) -> CheckAction { + // `ynf(213, 109.15641) = -inf` with our library, should be finite. + if ctx.basis == Mpfr + && ctx.base_name == BaseName::Yn + && input.0 > 200 + && !expected.is_infinite() + && actual.is_infinite() + { + return XFAIL("ynf infinity mismatch"); + } + + int_float_common(input, actual, expected, ctx) + } +} + +impl MaybeOverride<(i32, f64)> for SpecialCase { + fn check_float( + input: (i32, f64), + actual: F, + expected: F, + ctx: &CheckCtx, + ) -> CheckAction { + int_float_common(input, actual, expected, ctx) + } +} + +fn int_float_common( + input: (i32, F1), + actual: F2, + expected: F2, + ctx: &CheckCtx, +) -> CheckAction { + if ctx.basis == Mpfr + && (ctx.base_name == BaseName::Jn || ctx.base_name == BaseName::Yn) + && input.1 == F1::NEG_INFINITY + && actual == F2::ZERO + && expected == F2::ZERO + { + return XFAIL("we disagree with MPFR on the sign of zero"); + } + + // Values near infinity sometimes get cut off for us. `ynf(681, 509.90924) = -inf` but should + // be -3.2161271e38. + if ctx.basis == Musl + && ctx.fn_ident == Identifier::Ynf + && !expected.is_infinite() + && actual.is_infinite() + && (expected.abs().to_bits().abs_diff(actual.abs().to_bits()) + < F2::Int::cast_from(10_000_000u32)) + { + return XFAIL_NOCHECK; + } + + // Our bessel functions blow up with large N values + if ctx.basis == Musl && (ctx.base_name == BaseName::Jn || ctx.base_name == BaseName::Yn) { + if cfg!(x86_no_sse) { + // Precision is especially bad on i586, not worth checking. + return XFAIL_NOCHECK; + } + + if input.0 > 4000 { + return XFAIL_NOCHECK; + } else if input.0 > 100 { + return CheckAction::AssertWithUlp(1_000_000); + } + } + DEFAULT +} + +#[cfg(f16_enabled)] +impl MaybeOverride<(f16, i32)> for SpecialCase {} +impl MaybeOverride<(f32, i32)> for SpecialCase {} +impl MaybeOverride<(f64, i32)> for SpecialCase {} +#[cfg(f128_enabled)] +impl MaybeOverride<(f128, i32)> for SpecialCase {} + +impl MaybeOverride<(f32, f32, f32)> for SpecialCase {} +impl MaybeOverride<(f64, f64, f64)> for SpecialCase {} +#[cfg(f128_enabled)] +impl MaybeOverride<(f128, f128, f128)> for SpecialCase {} diff --git a/library/compiler-builtins/libm-test/src/run_cfg.rs b/library/compiler-builtins/libm-test/src/run_cfg.rs new file mode 100644 index 0000000000000..3345a01d2de79 --- /dev/null +++ b/library/compiler-builtins/libm-test/src/run_cfg.rs @@ -0,0 +1,385 @@ +//! Configuration for how tests get run. + +use std::ops::RangeInclusive; +use std::sync::LazyLock; +use std::{env, str}; + +use crate::generate::random::{SEED, SEED_ENV}; +use crate::{BaseName, FloatTy, Identifier, test_log}; + +/// The environment variable indicating which extensive tests should be run. +pub const EXTENSIVE_ENV: &str = "LIBM_EXTENSIVE_TESTS"; + +/// Specify the number of iterations via this environment variable, rather than using the default. +pub const EXTENSIVE_ITER_ENV: &str = "LIBM_EXTENSIVE_ITERATIONS"; + +/// The override value, if set by the above environment. +static EXTENSIVE_ITER_OVERRIDE: LazyLock> = LazyLock::new(|| { + env::var(EXTENSIVE_ITER_ENV) + .map(|v| v.parse().expect("failed to parse iteration count")) + .ok() +}); + +/// Specific tests that need to have a reduced amount of iterations to complete in a reasonable +/// amount of time. +/// +/// Contains the itentifier+generator combo to match on, plus the factor to reduce by. +const EXTEMELY_SLOW_TESTS: &[(Identifier, GeneratorKind, u64)] = &[ + (Identifier::Fmodf128, GeneratorKind::QuickSpaced, 50), + (Identifier::Fmodf128, GeneratorKind::Extensive, 50), +]; + +/// Maximum number of iterations to run for a single routine. +/// +/// The default value of one greater than `u32::MAX` allows testing single-argument `f32` routines +/// and single- or double-argument `f16` routines exhaustively. `f64` and `f128` can't feasibly +/// be tested exhaustively; however, [`EXTENSIVE_ITER_ENV`] can be set to run tests for multiple +/// hours. +pub fn extensive_max_iterations() -> u64 { + let default = 1 << 32; // default value + EXTENSIVE_ITER_OVERRIDE.unwrap_or(default) +} + +/// Context passed to [`CheckOutput`]. +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct CheckCtx { + /// Allowed ULP deviation + pub ulp: u32, + pub fn_ident: Identifier, + pub base_name: BaseName, + /// Function name. + pub fn_name: &'static str, + /// Return the unsuffixed version of the function name. + pub base_name_str: &'static str, + /// Source of truth for tests. + pub basis: CheckBasis, + pub gen_kind: GeneratorKind, + /// If specified, this value will override the value returned by [`iteration_count`]. + pub override_iterations: Option, +} + +impl CheckCtx { + /// Create a new check context, using the default ULP for the function. + pub fn new(fn_ident: Identifier, basis: CheckBasis, gen_kind: GeneratorKind) -> Self { + let mut ret = Self { + ulp: 0, + fn_ident, + fn_name: fn_ident.as_str(), + base_name: fn_ident.base_name(), + base_name_str: fn_ident.base_name().as_str(), + basis, + gen_kind, + override_iterations: None, + }; + ret.ulp = crate::default_ulp(&ret); + ret + } + + /// The number of input arguments for this function. + pub fn input_count(&self) -> usize { + self.fn_ident.math_op().rust_sig.args.len() + } + + pub fn override_iterations(&mut self, count: u64) { + self.override_iterations = Some(count) + } +} + +/// Possible items to test against +#[derive(Clone, Debug, PartialEq, Eq)] +pub enum CheckBasis { + /// Check against Musl's math sources. + Musl, + /// Check against infinite precision (MPFR). + Mpfr, + /// Benchmarks or other times when this is not relevant. + None, +} + +/// The different kinds of generators that provide test input, which account for input pattern +/// and quantity. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum GeneratorKind { + EdgeCases, + Extensive, + QuickSpaced, + Random, + List, +} + +/// A list of all functions that should get extensive tests. +/// +/// This also supports the special test name `all` to run all tests, as well as `all_f16`, +/// `all_f32`, `all_f64`, and `all_f128` to run all tests for a specific float type. +static EXTENSIVE: LazyLock> = LazyLock::new(|| { + let var = env::var(EXTENSIVE_ENV).unwrap_or_default(); + let list = var.split(",").filter(|s| !s.is_empty()).collect::>(); + let mut ret = Vec::new(); + + let append_ty_ops = |ret: &mut Vec<_>, fty: FloatTy| { + let iter = Identifier::ALL + .iter() + .filter(move |id| id.math_op().float_ty == fty) + .copied(); + ret.extend(iter); + }; + + for item in list { + match item { + "all" => ret = Identifier::ALL.to_owned(), + "all_f16" => append_ty_ops(&mut ret, FloatTy::F16), + "all_f32" => append_ty_ops(&mut ret, FloatTy::F32), + "all_f64" => append_ty_ops(&mut ret, FloatTy::F64), + "all_f128" => append_ty_ops(&mut ret, FloatTy::F128), + s => { + let id = Identifier::from_str(s) + .unwrap_or_else(|| panic!("unrecognized test name `{s}`")); + ret.push(id); + } + } + } + + ret +}); + +/// Information about the function to be tested. +#[derive(Debug)] +struct TestEnv { + /// Tests should be reduced because the platform is slow. E.g. 32-bit or emulated. + slow_platform: bool, + /// The float cannot be tested exhaustively, `f64` or `f128`. + large_float_ty: bool, + /// Env indicates that an extensive test should be run. + should_run_extensive: bool, + /// Multiprecision tests will be run. + mp_tests_enabled: bool, + /// The number of inputs to the function. + input_count: usize, +} + +impl TestEnv { + fn from_env(ctx: &CheckCtx) -> Self { + let id = ctx.fn_ident; + let op = id.math_op(); + + let will_run_mp = cfg!(feature = "build-mpfr"); + let large_float_ty = match op.float_ty { + FloatTy::F16 | FloatTy::F32 => false, + FloatTy::F64 | FloatTy::F128 => true, + }; + + let will_run_extensive = EXTENSIVE.contains(&id); + + let input_count = op.rust_sig.args.len(); + + Self { + slow_platform: slow_platform(), + large_float_ty, + should_run_extensive: will_run_extensive, + mp_tests_enabled: will_run_mp, + input_count, + } + } +} + +/// Tests are pretty slow on non-64-bit targets, x86 MacOS, and targets that run in QEMU. Start +/// with a reduced number on these platforms. +fn slow_platform() -> bool { + let slow_on_ci = crate::emulated() + || usize::BITS < 64 + || cfg!(all(target_arch = "x86_64", target_vendor = "apple")); + + // If not running in CI, there is no need to reduce iteration count. + slow_on_ci && crate::ci() +} + +/// The number of iterations to run for a given test. +pub fn iteration_count(ctx: &CheckCtx, argnum: usize) -> u64 { + let t_env = TestEnv::from_env(ctx); + + // Ideally run 5M tests + let mut domain_iter_count: u64 = 4_000_000; + + // Start with a reduced number of tests on slow platforms. + if t_env.slow_platform { + domain_iter_count = 100_000; + } + + // If we will be running tests against MPFR, we don't need to test as much against musl. + // However, there are some platforms where we have to test against musl since MPFR can't be + // built. + if t_env.mp_tests_enabled && ctx.basis == CheckBasis::Musl { + domain_iter_count /= 100; + } + + // Run fewer random tests than domain tests. + let random_iter_count = domain_iter_count / 100; + + let mut total_iterations = match ctx.gen_kind { + GeneratorKind::QuickSpaced => domain_iter_count, + GeneratorKind::Random => random_iter_count, + GeneratorKind::Extensive => extensive_max_iterations(), + GeneratorKind::EdgeCases | GeneratorKind::List => { + unimplemented!("shoudn't need `iteration_count` for {:?}", ctx.gen_kind) + } + }; + + // Larger float types get more iterations. + if t_env.large_float_ty && ctx.gen_kind != GeneratorKind::Extensive { + if ctx.gen_kind == GeneratorKind::Extensive { + // Extensive already has a pretty high test count. + total_iterations *= 2; + } else { + total_iterations *= 4; + } + } + + // Functions with more arguments get more iterations. + let arg_multiplier = 1 << (t_env.input_count - 1); + total_iterations *= arg_multiplier; + + // FMA has a huge domain but is reasonably fast to run, so increase another 1.5x. + if ctx.base_name == BaseName::Fma { + total_iterations = 3 * total_iterations / 2; + } + + // Some tests are significantly slower than others and need to be further reduced. + if let Some((_id, _gen, scale)) = EXTEMELY_SLOW_TESTS + .iter() + .find(|(id, generator, _scale)| *id == ctx.fn_ident && *generator == ctx.gen_kind) + { + // However, do not override if the extensive iteration count has been manually set. + if !(ctx.gen_kind == GeneratorKind::Extensive && EXTENSIVE_ITER_OVERRIDE.is_some()) { + total_iterations /= scale; + } + } + + if cfg!(optimizations_enabled) { + // Always run at least 10,000 tests. + total_iterations = total_iterations.max(10_000); + } else { + // Without optimizations, just run a quick check regardless of other parameters. + total_iterations = 800; + } + + let mut overridden = false; + if let Some(count) = ctx.override_iterations { + total_iterations = count; + overridden = true; + } + + // Adjust for the number of inputs + let ntests = match t_env.input_count { + 1 => total_iterations, + 2 => (total_iterations as f64).sqrt().ceil() as u64, + 3 => (total_iterations as f64).cbrt().ceil() as u64, + _ => panic!("test has more than three arguments"), + }; + + let total = ntests.pow(t_env.input_count.try_into().unwrap()); + + let seed_msg = match ctx.gen_kind { + GeneratorKind::QuickSpaced | GeneratorKind::Extensive => String::new(), + GeneratorKind::Random => { + format!( + " using `{SEED_ENV}={}`", + str::from_utf8(SEED.as_slice()).unwrap() + ) + } + GeneratorKind::EdgeCases | GeneratorKind::List => unimplemented!(), + }; + + test_log(&format!( + "{gen_kind:?} {basis:?} {fn_ident} arg {arg}/{args}: {ntests} iterations \ + ({total} total){seed_msg}{omsg}", + gen_kind = ctx.gen_kind, + basis = ctx.basis, + fn_ident = ctx.fn_ident, + arg = argnum + 1, + args = t_env.input_count, + omsg = if overridden { " (overridden)" } else { "" } + )); + + ntests +} + +/// Some tests require that an integer be kept within reasonable limits; generate that here. +pub fn int_range(ctx: &CheckCtx, argnum: usize) -> RangeInclusive { + let t_env = TestEnv::from_env(ctx); + + if !matches!(ctx.base_name, BaseName::Jn | BaseName::Yn) { + return i32::MIN..=i32::MAX; + } + + assert_eq!( + argnum, 0, + "For `jn`/`yn`, only the first argument takes an integer" + ); + + // The integer argument to `jn` is an iteration count. Limit this to ensure tests can be + // completed in a reasonable amount of time. + let non_extensive_range = if t_env.slow_platform || !cfg!(optimizations_enabled) { + (-0xf)..=0xff + } else { + (-0xff)..=0xffff + }; + + let extensive_range = (-0xfff)..=0xfffff; + + match ctx.gen_kind { + GeneratorKind::Extensive => extensive_range, + GeneratorKind::QuickSpaced | GeneratorKind::Random => non_extensive_range, + GeneratorKind::EdgeCases => extensive_range, + GeneratorKind::List => unimplemented!("shoudn't need range for {:?}", ctx.gen_kind), + } +} + +/// For domain tests, limit how many asymptotes or specified check points we test. +pub fn check_point_count(ctx: &CheckCtx) -> usize { + assert_eq!( + ctx.gen_kind, + GeneratorKind::EdgeCases, + "check_point_count is intended for edge case tests" + ); + let t_env = TestEnv::from_env(ctx); + if t_env.slow_platform || !cfg!(optimizations_enabled) { + 4 + } else { + 10 + } +} + +/// When validating points of interest (e.g. asymptotes, inflection points, extremes), also check +/// this many surrounding values. +pub fn check_near_count(ctx: &CheckCtx) -> u64 { + assert_eq!( + ctx.gen_kind, + GeneratorKind::EdgeCases, + "check_near_count is intended for edge case tests" + ); + if cfg!(optimizations_enabled) { + // Taper based on the number of inputs. + match ctx.input_count() { + 1 | 2 => 100, + 3 => 50, + x => panic!("unexpected argument count {x}"), + } + } else { + 8 + } +} + +/// Check whether extensive actions should be run or skipped. +pub fn skip_extensive_test(ctx: &CheckCtx) -> bool { + let t_env = TestEnv::from_env(ctx); + !t_env.should_run_extensive +} + +/// The number of iterations to run for `u256` fuzz tests. +pub fn bigint_fuzz_iteration_count() -> u64 { + if !cfg!(optimizations_enabled) { + return 1000; + } + + if slow_platform() { 100_000 } else { 5_000_000 } +} diff --git a/library/compiler-builtins/libm-test/src/test_traits.rs b/library/compiler-builtins/libm-test/src/test_traits.rs new file mode 100644 index 0000000000000..dbb97016153c7 --- /dev/null +++ b/library/compiler-builtins/libm-test/src/test_traits.rs @@ -0,0 +1,453 @@ +//! Traits related to testing. +//! +//! There are two main traits in this module: +//! +//! - `TupleCall`: implemented on tuples to allow calling them as function arguments. +//! - `CheckOutput`: implemented on anything that is an output type for validation against an +//! expected value. + +use std::panic::{RefUnwindSafe, UnwindSafe}; +use std::{fmt, panic}; + +use anyhow::{Context, anyhow, bail, ensure}; +use libm::support::Hexf; + +use crate::precision::CheckAction; +use crate::{ + CheckBasis, CheckCtx, Float, GeneratorKind, Int, MaybeOverride, SpecialCase, TestResult, +}; + +/// Trait for calling a function with a tuple as arguments. +/// +/// Implemented on the tuple with the function signature as the generic (so we can use the same +/// tuple for multiple signatures). +pub trait TupleCall: fmt::Debug { + type Output; + fn call(self, f: Func) -> Self::Output; + + /// Intercept panics and print the input to stderr before continuing. + fn call_intercept_panics(self, f: Func) -> Self::Output + where + Self: RefUnwindSafe + Copy, + Func: UnwindSafe, + { + let res = panic::catch_unwind(|| self.call(f)); + match res { + Ok(v) => v, + Err(e) => { + eprintln!("panic with the following input: {self:?}"); + panic::resume_unwind(e) + } + } + } +} + +/// A trait to implement on any output type so we can verify it in a generic way. +pub trait CheckOutput: Sized { + /// Validate `self` (actual) and `expected` are the same. + /// + /// `input` is only used here for error messages. + fn validate(self, expected: Self, input: Input, ctx: &CheckCtx) -> TestResult; +} + +/// A helper trait to print something as hex with the correct number of nibbles, e.g. a `u32` +/// will always print with `0x` followed by 8 digits. +/// +/// This is only used for printing errors so allocating is okay. +pub trait Hex: Copy { + /// Hex integer syntax. + fn hex(self) -> String; + /// Hex float syntax. + fn hexf(self) -> String; +} + +/* implement `TupleCall` */ + +impl TupleCall R> for (T1,) +where + T1: fmt::Debug, +{ + type Output = R; + + fn call(self, f: fn(T1) -> R) -> Self::Output { + f(self.0) + } +} + +impl TupleCall R> for (T1, T2) +where + T1: fmt::Debug, + T2: fmt::Debug, +{ + type Output = R; + + fn call(self, f: fn(T1, T2) -> R) -> Self::Output { + f(self.0, self.1) + } +} + +impl TupleCall R> for (T1,) +where + T1: fmt::Debug, + T2: fmt::Debug + Default, +{ + type Output = (R, T2); + + fn call(self, f: fn(T1, &mut T2) -> R) -> Self::Output { + let mut t2 = T2::default(); + (f(self.0, &mut t2), t2) + } +} + +impl TupleCall R> for (T1, T2, T3) +where + T1: fmt::Debug, + T2: fmt::Debug, + T3: fmt::Debug, +{ + type Output = R; + + fn call(self, f: fn(T1, T2, T3) -> R) -> Self::Output { + f(self.0, self.1, self.2) + } +} + +impl TupleCall R> for (T1, T2) +where + T1: fmt::Debug, + T2: fmt::Debug, + T3: fmt::Debug + Default, +{ + type Output = (R, T3); + + fn call(self, f: fn(T1, T2, &mut T3) -> R) -> Self::Output { + let mut t3 = T3::default(); + (f(self.0, self.1, &mut t3), t3) + } +} + +impl TupleCall fn(T1, &'a mut T2, &'a mut T3)> for (T1,) +where + T1: fmt::Debug, + T2: fmt::Debug + Default, + T3: fmt::Debug + Default, +{ + type Output = (T2, T3); + + fn call(self, f: for<'a> fn(T1, &'a mut T2, &'a mut T3)) -> Self::Output { + let mut t2 = T2::default(); + let mut t3 = T3::default(); + f(self.0, &mut t2, &mut t3); + (t2, t3) + } +} + +/* implement `Hex` */ + +impl Hex for (T1,) +where + T1: Hex, +{ + fn hex(self) -> String { + format!("({},)", self.0.hex()) + } + + fn hexf(self) -> String { + format!("({},)", self.0.hexf()) + } +} + +impl Hex for (T1, T2) +where + T1: Hex, + T2: Hex, +{ + fn hex(self) -> String { + format!("({}, {})", self.0.hex(), self.1.hex()) + } + + fn hexf(self) -> String { + format!("({}, {})", self.0.hexf(), self.1.hexf()) + } +} + +impl Hex for (T1, T2, T3) +where + T1: Hex, + T2: Hex, + T3: Hex, +{ + fn hex(self) -> String { + format!("({}, {}, {})", self.0.hex(), self.1.hex(), self.2.hex()) + } + + fn hexf(self) -> String { + format!("({}, {}, {})", self.0.hexf(), self.1.hexf(), self.2.hexf()) + } +} + +/* trait implementations for ints */ + +macro_rules! impl_int { + ($($ty:ty),*) => { + $( + impl Hex for $ty { + fn hex(self) -> String { + format!("{self:#0width$x}", width = ((Self::BITS / 4) + 2) as usize) + } + + fn hexf(self) -> String { + String::new() + } + } + + impl $crate::CheckOutput for $ty + where + Input: Hex + fmt::Debug, + SpecialCase: MaybeOverride, + { + fn validate<'a>( + self, + expected: Self, + input: Input, + ctx: &$crate::CheckCtx, + ) -> TestResult { + validate_int(self, expected, input, ctx) + } + } + )* + }; +} + +fn validate_int(actual: I, expected: I, input: Input, ctx: &CheckCtx) -> TestResult +where + I: Int + Hex, + Input: Hex + fmt::Debug, + SpecialCase: MaybeOverride, +{ + let (result, xfail_msg) = match SpecialCase::check_int(input, actual, expected, ctx) { + // `require_biteq` forbids overrides. + _ if ctx.gen_kind == GeneratorKind::List => (actual == expected, None), + CheckAction::AssertSuccess => (actual == expected, None), + CheckAction::AssertFailure(msg) => (actual != expected, Some(msg)), + CheckAction::Custom(res) => return res, + CheckAction::Skip => return Ok(()), + CheckAction::AssertWithUlp(_) => panic!("ulp has no meaning for integer checks"), + }; + + let make_xfail_msg = || match xfail_msg { + Some(m) => format!( + "expected failure but test passed. Does an XFAIL need to be updated?\n\ + failed at: {m}", + ), + None => String::new(), + }; + + anyhow::ensure!( + result, + "\ + \n input: {input:?} {ibits}\ + \n expected: {expected:<22?} {expbits}\ + \n actual: {actual:<22?} {actbits}\ + \n {msg}\ + ", + actbits = actual.hex(), + expbits = expected.hex(), + ibits = input.hex(), + msg = make_xfail_msg() + ); + + Ok(()) +} + +impl_int!(u32, i32, u64, i64); + +/* trait implementations for floats */ + +macro_rules! impl_float { + ($($ty:ty),*) => { + $( + impl Hex for $ty { + fn hex(self) -> String { + format!( + "{:#0width$x}", + self.to_bits(), + width = ((Self::BITS / 4) + 2) as usize + ) + } + + fn hexf(self) -> String { + format!("{}", Hexf(self)) + } + } + + impl $crate::CheckOutput for $ty + where + Input: Hex + fmt::Debug, + SpecialCase: MaybeOverride, + { + fn validate<'a>( + self, + expected: Self, + input: Input, + ctx: &$crate::CheckCtx, + ) -> TestResult { + validate_float(self, expected, input, ctx) + } + } + )* + }; +} + +fn validate_float(actual: F, expected: F, input: Input, ctx: &CheckCtx) -> TestResult +where + F: Float + Hex, + Input: Hex + fmt::Debug, + u32: TryFrom, + SpecialCase: MaybeOverride, +{ + let mut assert_failure_msg = None; + + // Create a wrapper function so we only need to `.with_context` once. + let mut inner = || -> TestResult { + let mut allowed_ulp = ctx.ulp; + + // Forbid overrides if the items came from an explicit list, as long as we are checking + // against either MPFR or the result itself. + let require_biteq = ctx.gen_kind == GeneratorKind::List && ctx.basis != CheckBasis::Musl; + + match SpecialCase::check_float(input, actual, expected, ctx) { + _ if require_biteq => (), + CheckAction::AssertSuccess => (), + CheckAction::AssertFailure(msg) => assert_failure_msg = Some(msg), + CheckAction::Custom(res) => return res, + CheckAction::Skip => return Ok(()), + CheckAction::AssertWithUlp(ulp_override) => allowed_ulp = ulp_override, + }; + + // Check when both are NaNs + if actual.is_nan() && expected.is_nan() { + if require_biteq && ctx.basis == CheckBasis::None { + ensure!( + actual.to_bits() == expected.to_bits(), + "mismatched NaN bitpatterns" + ); + } + // By default, NaNs have nothing special to check. + return Ok(()); + } else if actual.is_nan() || expected.is_nan() { + // Check when only one is a NaN + bail!("real value != NaN") + } + + // Make sure that the signs are the same before checing ULP to avoid wraparound + let act_sig = actual.signum(); + let exp_sig = expected.signum(); + ensure!( + act_sig == exp_sig, + "mismatched signs {act_sig:?} {exp_sig:?}" + ); + + if actual.is_infinite() ^ expected.is_infinite() { + bail!("mismatched infinities"); + } + + let act_bits = actual.to_bits().signed(); + let exp_bits = expected.to_bits().signed(); + + let ulp_diff = act_bits.checked_sub(exp_bits).unwrap().abs(); + + let ulp_u32 = u32::try_from(ulp_diff) + .map_err(|e| anyhow!("{e:?}: ulp of {ulp_diff} exceeds u32::MAX"))?; + + ensure!(ulp_u32 <= allowed_ulp, "ulp {ulp_diff} > {allowed_ulp}",); + + Ok(()) + }; + + let mut res = inner(); + + if let Some(msg) = assert_failure_msg { + // Invert `Ok` and `Err` if the test is an xfail. + if res.is_ok() { + let e = anyhow!( + "expected failure but test passed. Does an XFAIL need to be updated?\n\ + failed at: {msg}", + ); + res = Err(e) + } else { + res = Ok(()) + } + } + + res.with_context(|| { + format!( + "\ + \n input: {input:?}\ + \n as hex: {ihex}\ + \n as bits: {ibits}\ + \n expected: {expected:<22?} {exphex} {expbits}\ + \n actual: {actual:<22?} {acthex} {actbits}\ + ", + ihex = input.hexf(), + ibits = input.hex(), + exphex = expected.hexf(), + expbits = expected.hex(), + actbits = actual.hex(), + acthex = actual.hexf(), + ) + }) +} + +impl_float!(f32, f64); + +#[cfg(f16_enabled)] +impl_float!(f16); + +#[cfg(f128_enabled)] +impl_float!(f128); + +/* trait implementations for compound types */ + +/// Implement `CheckOutput` for combinations of types. +macro_rules! impl_tuples { + ($(($a:ty, $b:ty);)*) => { + $( + impl CheckOutput for ($a, $b) + where + Input: Hex + fmt::Debug, + SpecialCase: MaybeOverride, + { + fn validate<'a>( + self, + expected: Self, + input: Input, + ctx: &CheckCtx, + ) -> TestResult { + self.0.validate(expected.0, input, ctx) + .and_then(|()| self.1.validate(expected.1, input, ctx)) + .with_context(|| format!( + "full context:\ + \n input: {input:?} {ibits}\ + \n as hex: {ihex}\ + \n as bits: {ibits}\ + \n expected: {expected:?} {expbits}\ + \n actual: {self:?} {actbits}\ + ", + ihex = input.hexf(), + ibits = input.hex(), + expbits = expected.hex(), + actbits = self.hex(), + )) + } + } + )* + }; +} + +impl_tuples!( + (f32, i32); + (f64, i32); + (f32, f32); + (f64, f64); +); diff --git a/library/compiler-builtins/libm-test/tests/check_coverage.rs b/library/compiler-builtins/libm-test/tests/check_coverage.rs new file mode 100644 index 0000000000000..3b445a3de9da1 --- /dev/null +++ b/library/compiler-builtins/libm-test/tests/check_coverage.rs @@ -0,0 +1,61 @@ +//! Ensure that `for_each_function!` isn't missing any symbols. + +use std::collections::HashSet; +use std::env; +use std::path::Path; +use std::process::Command; + +macro_rules! callback { + ( + fn_name: $name:ident, + attrs: [$($attr:meta),*], + extra: [$set:ident], + ) => { + let name = stringify!($name); + let new = $set.insert(name); + assert!(new, "duplicate function `{name}` in `ALL_OPERATIONS`"); + }; +} + +#[test] +fn test_for_each_function_all_included() { + let all_functions: HashSet<_> = include_str!("../../etc/function-list.txt") + .lines() + .filter(|line| !line.starts_with("#")) + .collect(); + + let mut tested = HashSet::new(); + + libm_macros::for_each_function! { + callback: callback, + extra: [tested], + }; + + let untested = all_functions.difference(&tested); + if untested.clone().next().is_some() { + panic!( + "missing tests for the following: {untested:#?} \ + \nmake sure any new functions are entered in \ + `ALL_OPERATIONS` (in `libm-macros`)." + ); + } + assert_eq!(all_functions, tested); +} + +#[test] +fn ensure_list_updated() { + if libm_test::ci() { + // Most CI tests run in Docker where we don't have Python or Rustdoc, so it's easiest + // to just run the python file directly when it is available. + eprintln!("skipping test; CI runs the python file directly"); + return; + } + + let res = Command::new("python3") + .arg(Path::new(env!("CARGO_MANIFEST_DIR")).join("../etc/update-api-list.py")) + .arg("--check") + .status() + .unwrap(); + + assert!(res.success(), "May need to run `./etc/update-api-list.py`"); +} diff --git a/library/compiler-builtins/libm-test/tests/compare_built_musl.rs b/library/compiler-builtins/libm-test/tests/compare_built_musl.rs new file mode 100644 index 0000000000000..6ccbb6f4c51d5 --- /dev/null +++ b/library/compiler-builtins/libm-test/tests/compare_built_musl.rs @@ -0,0 +1,106 @@ +//! Compare our implementations with the result of musl functions, as provided by `musl-math-sys`. +//! +//! Currently this only tests randomized inputs. In the future this may be improved to test edge +//! cases or run exhaustive tests. +//! +//! Note that musl functions do not always provide 0.5ULP rounding, so our functions can do better +//! than these results. + +// There are some targets we can't build musl for +#![cfg(feature = "build-musl")] + +use libm_test::generate::{case_list, edge_cases, random, spaced}; +use libm_test::{CheckBasis, CheckCtx, CheckOutput, GeneratorKind, MathOp, TupleCall}; + +const BASIS: CheckBasis = CheckBasis::Musl; + +fn musl_runner( + ctx: &CheckCtx, + cases: impl Iterator, + musl_fn: Op::CFn, +) { + for input in cases { + let musl_res = input.call(musl_fn); + let crate_res = input.call_intercept_panics(Op::ROUTINE); + + crate_res.validate(musl_res, input, ctx).unwrap(); + } +} + +/// Test against musl with generators from a domain. +macro_rules! musl_tests { + ( + fn_name: $fn_name:ident, + attrs: [$($attr:meta),*], + ) => { + paste::paste! { + #[test] + $(#[$attr])* + fn [< musl_case_list_ $fn_name >]() { + type Op = libm_test::op::$fn_name::Routine; + let ctx = CheckCtx::new(Op::IDENTIFIER, BASIS, GeneratorKind::List); + let cases = case_list::get_test_cases_basis::(&ctx).0; + musl_runner::(&ctx, cases, musl_math_sys::$fn_name); + } + + #[test] + $(#[$attr])* + fn [< musl_random_ $fn_name >]() { + type Op = libm_test::op::$fn_name::Routine; + let ctx = CheckCtx::new(Op::IDENTIFIER, BASIS, GeneratorKind::Random); + let cases = random::get_test_cases::<::RustArgs>(&ctx).0; + musl_runner::(&ctx, cases, musl_math_sys::$fn_name); + } + + #[test] + $(#[$attr])* + fn [< musl_edge_case_ $fn_name >]() { + type Op = libm_test::op::$fn_name::Routine; + let ctx = CheckCtx::new(Op::IDENTIFIER, BASIS, GeneratorKind::EdgeCases); + let cases = edge_cases::get_test_cases::(&ctx).0; + musl_runner::(&ctx, cases, musl_math_sys::$fn_name); + } + + #[test] + $(#[$attr])* + fn [< musl_quickspace_ $fn_name >]() { + type Op = libm_test::op::$fn_name::Routine; + let ctx = CheckCtx::new(Op::IDENTIFIER, BASIS, GeneratorKind::QuickSpaced); + let cases = spaced::get_test_cases::(&ctx).0; + musl_runner::(&ctx, cases, musl_math_sys::$fn_name); + } + } + }; +} + +libm_macros::for_each_function! { + callback: musl_tests, + attributes: [], + // Not provided by musl + skip_f16_f128: true, + skip: [ + // TODO integer inputs + jn, + jnf, + ldexp, + ldexpf, + scalbn, + scalbnf, + yn, + ynf, + + // Not provided by musl + // verify-sorted-start + fmaximum, + fmaximum_num, + fmaximum_numf, + fmaximumf, + fminimum, + fminimum_num, + fminimum_numf, + fminimumf, + roundeven, + roundevenf, + // // verify-sorted-end + ], +} diff --git a/library/compiler-builtins/libm-test/tests/multiprecision.rs b/library/compiler-builtins/libm-test/tests/multiprecision.rs new file mode 100644 index 0000000000000..80b2c78688ea7 --- /dev/null +++ b/library/compiler-builtins/libm-test/tests/multiprecision.rs @@ -0,0 +1,79 @@ +//! Test with "infinite precision" + +#![cfg(feature = "build-mpfr")] + +use libm_test::generate::{case_list, edge_cases, random, spaced}; +use libm_test::mpfloat::MpOp; +use libm_test::{CheckBasis, CheckCtx, CheckOutput, GeneratorKind, MathOp, TupleCall}; + +const BASIS: CheckBasis = CheckBasis::Mpfr; + +fn mp_runner(ctx: &CheckCtx, cases: impl Iterator) { + let mut mp_vals = Op::new_mp(); + for input in cases { + let mp_res = Op::run(&mut mp_vals, input); + let crate_res = input.call_intercept_panics(Op::ROUTINE); + + crate_res.validate(mp_res, input, ctx).unwrap(); + } +} + +macro_rules! mp_tests { + ( + fn_name: $fn_name:ident, + attrs: [$($attr:meta),*], + ) => { + paste::paste! { + #[test] + $(#[$attr])* + fn [< mp_case_list_ $fn_name >]() { + type Op = libm_test::op::$fn_name::Routine; + let ctx = CheckCtx::new(Op::IDENTIFIER, BASIS, GeneratorKind::List); + let cases = case_list::get_test_cases_basis::(&ctx).0; + mp_runner::(&ctx, cases); + } + + #[test] + $(#[$attr])* + fn [< mp_random_ $fn_name >]() { + type Op = libm_test::op::$fn_name::Routine; + let ctx = CheckCtx::new(Op::IDENTIFIER, BASIS, GeneratorKind::Random); + let cases = random::get_test_cases::<::RustArgs>(&ctx).0; + mp_runner::(&ctx, cases); + } + + #[test] + $(#[$attr])* + fn [< mp_edge_case_ $fn_name >]() { + type Op = libm_test::op::$fn_name::Routine; + let ctx = CheckCtx::new(Op::IDENTIFIER, BASIS, GeneratorKind::EdgeCases); + let cases = edge_cases::get_test_cases::(&ctx).0; + mp_runner::(&ctx, cases); + } + + #[test] + $(#[$attr])* + fn [< mp_quickspace_ $fn_name >]() { + type Op = libm_test::op::$fn_name::Routine; + let ctx = CheckCtx::new(Op::IDENTIFIER, BASIS, GeneratorKind::QuickSpaced); + let cases = spaced::get_test_cases::(&ctx).0; + mp_runner::(&ctx, cases); + } + } + }; +} + +libm_macros::for_each_function! { + callback: mp_tests, + attributes: [ + // Also an assertion failure on i686: at `MPFR_ASSERTN (! mpfr_erangeflag_p ())` + #[ignore = "large values are infeasible in MPFR"] + [jn, jnf, yn, ynf], + ], + skip: [ + // FIXME: test needed, see + // https://github.com/rust-lang/libm/pull/311#discussion_r1818273392 + nextafter, + nextafterf, + ], +} diff --git a/library/compiler-builtins/libm-test/tests/standalone.rs b/library/compiler-builtins/libm-test/tests/standalone.rs new file mode 100644 index 0000000000000..7b30a3b48d7f5 --- /dev/null +++ b/library/compiler-builtins/libm-test/tests/standalone.rs @@ -0,0 +1,38 @@ +//! Test cases that have both an input and an output, so do not require a basis. + +use libm_test::generate::case_list; +use libm_test::{CheckBasis, CheckCtx, CheckOutput, GeneratorKind, MathOp, TupleCall}; + +const BASIS: CheckBasis = CheckBasis::None; + +fn standalone_runner( + ctx: &CheckCtx, + cases: impl Iterator, +) { + for (input, expected) in cases { + let crate_res = input.call_intercept_panics(Op::ROUTINE); + crate_res.validate(expected, input, ctx).unwrap(); + } +} + +macro_rules! mp_tests { + ( + fn_name: $fn_name:ident, + attrs: [$($attr:meta),*], + ) => { + paste::paste! { + #[test] + $(#[$attr])* + fn [< standalone_ $fn_name >]() { + type Op = libm_test::op::$fn_name::Routine; + let ctx = CheckCtx::new(Op::IDENTIFIER, BASIS, GeneratorKind::List); + let cases = case_list::get_test_cases_standalone::(&ctx); + standalone_runner::(&ctx, cases); + } + } + }; +} + +libm_macros::for_each_function! { + callback: mp_tests, +} diff --git a/library/compiler-builtins/libm-test/tests/u256.rs b/library/compiler-builtins/libm-test/tests/u256.rs new file mode 100644 index 0000000000000..8cbb3ad226f67 --- /dev/null +++ b/library/compiler-builtins/libm-test/tests/u256.rs @@ -0,0 +1,155 @@ +//! Test the u256 implementation. the ops already get exercised reasonably well through the `f128` +//! routines, so this only does a few million fuzz iterations against GMP. + +#![cfg(feature = "build-mpfr")] + +use std::sync::LazyLock; + +use libm::support::{HInt, u256}; +type BigInt = rug::Integer; + +use libm_test::bigint_fuzz_iteration_count; +use libm_test::generate::random::SEED; +use rand::{Rng, SeedableRng}; +use rand_chacha::ChaCha8Rng; +use rug::Assign; +use rug::integer::Order; +use rug::ops::NotAssign; + +static BIGINT_U256_MAX: LazyLock = + LazyLock::new(|| BigInt::from_digits(&[u128::MAX, u128::MAX], Order::Lsf)); + +/// Copied from the test module. +fn hexu(v: u256) -> String { + format!("0x{:032x}{:032x}", v.hi, v.lo) +} + +fn random_u256(rng: &mut ChaCha8Rng) -> u256 { + let lo: u128 = rng.random(); + let hi: u128 = rng.random(); + u256 { lo, hi } +} + +fn assign_bigint(bx: &mut BigInt, x: u256) { + bx.assign_digits(&[x.lo, x.hi], Order::Lsf); +} + +fn from_bigint(bx: &mut BigInt) -> u256 { + // Truncate so the result fits into `[u128; 2]`. This makes all ops overflowing. + *bx &= &*BIGINT_U256_MAX; + let mut bres = [0u128, 0]; + bx.write_digits(&mut bres, Order::Lsf); + bx.assign(0); + u256 { + lo: bres[0], + hi: bres[1], + } +} + +fn check_one( + x: impl FnOnce() -> String, + y: impl FnOnce() -> Option, + actual: u256, + expected: &mut BigInt, +) { + let expected = from_bigint(expected); + if actual != expected { + let xmsg = x(); + let ymsg = y().map(|y| format!("y: {y}\n")).unwrap_or_default(); + panic!( + "Results do not match\n\ + input: {xmsg}\n\ + {ymsg}\ + actual: {}\n\ + expected: {}\ + ", + hexu(actual), + hexu(expected), + ) + } +} + +#[test] +fn mp_u256_bitor() { + let mut rng = ChaCha8Rng::from_seed(*SEED); + let mut bx = BigInt::new(); + let mut by = BigInt::new(); + + for _ in 0..bigint_fuzz_iteration_count() { + let x = random_u256(&mut rng); + let y = random_u256(&mut rng); + assign_bigint(&mut bx, x); + assign_bigint(&mut by, y); + let actual = x | y; + bx |= &by; + check_one(|| hexu(x), || Some(hexu(y)), actual, &mut bx); + } +} + +#[test] +fn mp_u256_not() { + let mut rng = ChaCha8Rng::from_seed(*SEED); + let mut bx = BigInt::new(); + + for _ in 0..bigint_fuzz_iteration_count() { + let x = random_u256(&mut rng); + assign_bigint(&mut bx, x); + let actual = !x; + bx.not_assign(); + check_one(|| hexu(x), || None, actual, &mut bx); + } +} + +#[test] +fn mp_u256_add() { + let mut rng = ChaCha8Rng::from_seed(*SEED); + let mut bx = BigInt::new(); + let mut by = BigInt::new(); + + for _ in 0..bigint_fuzz_iteration_count() { + let x = random_u256(&mut rng); + let y = random_u256(&mut rng); + assign_bigint(&mut bx, x); + assign_bigint(&mut by, y); + let actual = x + y; + bx += &by; + check_one(|| hexu(x), || Some(hexu(y)), actual, &mut bx); + } +} + +#[test] +fn mp_u256_shr() { + let mut rng = ChaCha8Rng::from_seed(*SEED); + let mut bx = BigInt::new(); + + for _ in 0..bigint_fuzz_iteration_count() { + let x = random_u256(&mut rng); + let shift: u32 = rng.random_range(0..255); + assign_bigint(&mut bx, x); + let actual = x >> shift; + bx >>= shift; + check_one(|| hexu(x), || Some(shift.to_string()), actual, &mut bx); + } +} + +#[test] +fn mp_u256_widen_mul() { + let mut rng = ChaCha8Rng::from_seed(*SEED); + let mut bx = BigInt::new(); + let mut by = BigInt::new(); + + for _ in 0..bigint_fuzz_iteration_count() { + let x: u128 = rng.random(); + let y: u128 = rng.random(); + bx.assign(x); + by.assign(y); + let actual = x.widen_mul(y); + bx *= &by; + check_one( + || format!("{x:#034x}"), + || Some(format!("{y:#034x}")), + actual, + &mut bx, + ); + } +} diff --git a/library/compiler-builtins/libm-test/tests/z_extensive/main.rs b/library/compiler-builtins/libm-test/tests/z_extensive/main.rs new file mode 100644 index 0000000000000..5448cb6eaa5bd --- /dev/null +++ b/library/compiler-builtins/libm-test/tests/z_extensive/main.rs @@ -0,0 +1,14 @@ +//! `main` is just a wrapper to handle configuration. + +#[cfg(not(feature = "build-mpfr"))] +fn main() { + eprintln!("multiprecision not enabled; skipping extensive tests"); +} + +#[cfg(feature = "build-mpfr")] +mod run; + +#[cfg(feature = "build-mpfr")] +fn main() { + run::run(); +} diff --git a/library/compiler-builtins/libm-test/tests/z_extensive/run.rs b/library/compiler-builtins/libm-test/tests/z_extensive/run.rs new file mode 100644 index 0000000000000..59c806ce73e24 --- /dev/null +++ b/library/compiler-builtins/libm-test/tests/z_extensive/run.rs @@ -0,0 +1,247 @@ +//! Exhaustive tests for `f16` and `f32`, high-iteration for `f64` and `f128`. + +use std::fmt; +use std::io::{self, IsTerminal}; +use std::sync::atomic::{AtomicU64, Ordering}; +use std::time::Duration; + +use indicatif::{ProgressBar, ProgressStyle}; +use libm_test::generate::spaced; +use libm_test::mpfloat::MpOp; +use libm_test::{ + CheckBasis, CheckCtx, CheckOutput, GeneratorKind, MathOp, TestResult, TupleCall, + skip_extensive_test, +}; +use libtest_mimic::{Arguments, Trial}; +use rayon::prelude::*; +use spaced::SpacedInput; + +const BASIS: CheckBasis = CheckBasis::Mpfr; +const GEN_KIND: GeneratorKind = GeneratorKind::Extensive; + +/// Run the extensive test suite. +pub fn run() { + let mut args = Arguments::from_args(); + // Prevent multiple tests from running in parallel, each test gets parallized internally. + args.test_threads = Some(1); + let tests = register_all_tests(); + + // With default parallelism, the CPU doesn't saturate. We don't need to be nice to + // other processes, so do 1.5x to make sure we use all available resources. + let threads = std::thread::available_parallelism() + .map(Into::into) + .unwrap_or(0) + * 3 + / 2; + rayon::ThreadPoolBuilder::new() + .num_threads(threads) + .build_global() + .unwrap(); + + libtest_mimic::run(&args, tests).exit(); +} + +macro_rules! mp_extensive_tests { + ( + fn_name: $fn_name:ident, + attrs: [$($attr:meta),*], + extra: [$push_to:ident], + ) => { + $(#[$attr])* + register_single_test::(&mut $push_to); + }; +} + +/// Create a list of tests for consumption by `libtest_mimic`. +fn register_all_tests() -> Vec { + let mut all_tests = Vec::new(); + + libm_macros::for_each_function! { + callback: mp_extensive_tests, + extra: [all_tests], + skip: [ + // FIXME: test needed, see + // https://github.com/rust-lang/libm/pull/311#discussion_r1818273392 + nextafter, + nextafterf, + ], + } + + all_tests +} + +/// Add a single test to the list. +fn register_single_test(all: &mut Vec) +where + Op: MathOp + MpOp, + Op::RustArgs: SpacedInput + Send, +{ + let test_name = format!("mp_extensive_{}", Op::NAME); + let ctx = CheckCtx::new(Op::IDENTIFIER, BASIS, GEN_KIND); + let skip = skip_extensive_test(&ctx); + + let runner = move || { + if !cfg!(optimizations_enabled) { + panic!("extensive tests should be run with --release"); + } + + let res = run_single_test::(&ctx); + let e = match res { + Ok(()) => return Ok(()), + Err(e) => e, + }; + + // Format with the `Debug` implementation so we get the error cause chain, and print it + // here so we see the result immediately (rather than waiting for all tests to conclude). + let e = format!("{e:?}"); + eprintln!("failure testing {}:{e}\n", Op::IDENTIFIER); + + Err(e.into()) + }; + + all.push(Trial::test(test_name, runner).with_ignored_flag(skip)); +} + +/// Test runner for a signle routine. +fn run_single_test(ctx: &CheckCtx) -> TestResult +where + Op: MathOp + MpOp, + Op::RustArgs: SpacedInput + Send, +{ + // Small delay before printing anything so other output from the runner has a chance to flush. + std::thread::sleep(Duration::from_millis(500)); + eprintln!(); + + let completed = AtomicU64::new(0); + let (ref mut cases, total) = spaced::get_test_cases::(ctx); + let pb = Progress::new(Op::NAME, total); + + let test_single_chunk = |mp_vals: &mut Op::MpTy, input_vec: Vec| -> TestResult { + for input in input_vec { + // Test the input. + let mp_res = Op::run(mp_vals, input); + let crate_res = input.call_intercept_panics(Op::ROUTINE); + crate_res.validate(mp_res, input, ctx)?; + + let completed = completed.fetch_add(1, Ordering::Relaxed) + 1; + pb.update(completed, input); + } + + Ok(()) + }; + + // Chunk the cases so Rayon doesn't switch threads between each iterator item. 50k seems near + // a performance sweet spot. Ideally we would reuse these allocations rather than discarding, + // but that is difficult with Rayon's API. + let chunk_size = 50_000; + let chunks = std::iter::from_fn(move || { + let mut v = Vec::with_capacity(chunk_size); + v.extend(cases.take(chunk_size)); + (!v.is_empty()).then_some(v) + }); + + // Run the actual tests + let res = chunks + .par_bridge() + .try_for_each_init(Op::new_mp, test_single_chunk); + + let real_total = completed.load(Ordering::Relaxed); + pb.complete(real_total); + + if res.is_ok() && real_total != total { + // Provide a warning if our estimate needs to be updated. + panic!("total run {real_total} does not match expected {total}"); + } + + res +} + +/// Wrapper around a `ProgressBar` that handles styles and non-TTY messages. +struct Progress { + pb: ProgressBar, + name_padded: String, + final_style: ProgressStyle, + is_tty: bool, +} + +impl Progress { + const PB_TEMPLATE: &str = "[{elapsed:3} {percent:3}%] {bar:20.cyan/blue} NAME \ + {human_pos:>13}/{human_len:13} {per_sec:18} eta {eta:8} {msg}"; + const PB_TEMPLATE_FINAL: &str = "[{elapsed:3} {percent:3}%] {bar:20.cyan/blue} NAME \ + {human_pos:>13}/{human_len:13} {per_sec:18} done in {elapsed_precise}"; + + fn new(name: &str, total: u64) -> Self { + eprintln!("starting extensive tests for `{name}`"); + let name_padded = format!("{name:9}"); + let is_tty = io::stderr().is_terminal(); + + let initial_style = + ProgressStyle::with_template(&Self::PB_TEMPLATE.replace("NAME", &name_padded)) + .unwrap() + .progress_chars("##-"); + + let final_style = + ProgressStyle::with_template(&Self::PB_TEMPLATE_FINAL.replace("NAME", &name_padded)) + .unwrap() + .progress_chars("##-"); + + let pb = ProgressBar::new(total); + pb.set_style(initial_style); + + Self { + pb, + final_style, + name_padded, + is_tty, + } + } + + fn update(&self, completed: u64, input: impl fmt::Debug) { + // Infrequently update the progress bar. + if completed % 20_000 == 0 { + self.pb.set_position(completed); + } + + if completed % 500_000 == 0 { + self.pb.set_message(format!("input: {input:<24?}")); + } + + if !self.is_tty && completed % 5_000_000 == 0 { + let len = self.pb.length().unwrap_or_default(); + eprintln!( + "[{elapsed:3?}s {percent:3.0}%] {name} \ + {human_pos:>10}/{human_len:<10} {per_sec:14.2}/s eta {eta:4}s {input:<24?}", + elapsed = self.pb.elapsed().as_secs(), + percent = completed as f32 * 100.0 / len as f32, + name = self.name_padded, + human_pos = completed, + human_len = len, + per_sec = self.pb.per_sec(), + eta = self.pb.eta().as_secs() + ); + } + } + + fn complete(self, real_total: u64) { + self.pb.set_style(self.final_style); + self.pb.set_position(real_total); + self.pb.abandon(); + + if !self.is_tty { + let len = self.pb.length().unwrap_or_default(); + eprintln!( + "[{elapsed:3}s {percent:3.0}%] {name} \ + {human_pos:>10}/{human_len:<10} {per_sec:14.2}/s done in {elapsed_precise}", + elapsed = self.pb.elapsed().as_secs(), + percent = real_total as f32 * 100.0 / len as f32, + name = self.name_padded, + human_pos = real_total, + human_len = len, + per_sec = self.pb.per_sec(), + elapsed_precise = self.pb.elapsed().as_secs(), + ); + } + + eprintln!(); + } +} diff --git a/library/compiler-builtins/libm/CHANGELOG.md b/library/compiler-builtins/libm/CHANGELOG.md new file mode 100644 index 0000000000000..33fec06aa2379 --- /dev/null +++ b/library/compiler-builtins/libm/CHANGELOG.md @@ -0,0 +1,229 @@ +# Changelog + +All notable changes to this project will be documented in this file. + +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), +and this project adheres to +[Semantic Versioning](https://semver.org/spec/v2.0.0.html). + +## [Unreleased] + +## [0.2.15](https://github.com/rust-lang/compiler-builtins/compare/libm-v0.2.14...libm-v0.2.15) - 2025-05-06 + +### Other + +- Require `target_has_atomic = "ptr"` for runtime feature detection + +## [0.2.14](https://github.com/rust-lang/compiler-builtins/compare/libm-v0.2.13...libm-v0.2.14) - 2025-05-03 + +### Other + +- Use runtime feature detection for fma routines on x86 + +## [0.2.13](https://github.com/rust-lang/compiler-builtins/compare/libm-v0.2.12...libm-v0.2.13) - 2025-04-21 + +### Fixed + +- Switch back to workspace resolver v2 to unbreak builds without the 2024 edition + +## [0.2.12](https://github.com/rust-lang/compiler-builtins/compare/libm-v0.2.11...libm-v0.2.12) - 2025-04-21 + +- Mark generic functions `#[inline]` +- Combine the source files for `fmod` +- Ensure all public functions are marked `no_panic` +- Add assembly version of simple operations on aarch64 +- Add `roundeven{,f,f16,f128}` +- Add `fminimum`, `fmaximum`, `fminimum_num`, and `fmaximum_num` +- Eliminate the use of `force_eval!` in `ceil`, `floor`, and `trunc` +- Port the CORE-MATH version of `cbrt` +- Add `fmaf128` +- fma: Ensure zero has the correct sign +- Add `scalbnf16`, `scalbnf128`, `ldexpf16`, and `ldexpf128` +- Specify license as just MIT +- Add `fmodf128` +- Add `fmodf16` using the generic implementation +- Add `fminf16`, `fmaxf16`, `fminf128`, and `fmaxf128` +- Add `roundf16` and `roundf128` +- Add `rintf16` and `rintf128` +- Add `floorf16` and `floorf128` +- Add `ceilf16` and `ceilf128` +- Add `sqrtf16` and `sqrtf128` +- Simplify and optimize `fdim` ([#442](https://github.com/rust-lang/libm/pull/442)) +- Add `fdimf16` and `fdimf128` +- Add `truncf16` and `truncf128` +- Add `fabsf16`, `fabsf128`, `copysignf16`, and `copysignf128` +- Move some numeric trait logic to default implementations +- Add some more basic docstrings ([#352](https://github.com/rust-lang/libm/pull/352)) +- Add support for loongarch64-unknown-linux-gnu +- Add an "arch" Cargo feature that is on by default +- Rename the `special_case` module to `precision` and move default ULP +- Move the existing "unstable" feature to "unstable-intrinsics" + +There are a number of things that changed internally, see the git log for a full +list of changes. + +## [0.2.11](https://github.com/rust-lang/libm/compare/libm-v0.2.10...libm-v0.2.11) - 2024-10-28 + +### Fixed + +- fix type of constants in ported sincosf ([#331](https://github.com/rust-lang/libm/pull/331)) + +### Other + +- Disable a unit test that is failing on i586 +- Add a procedural macro for expanding all function signatures +- Introduce `musl-math-sys` for bindings to musl math symbols +- Add basic docstrings to some functions ([#337](https://github.com/rust-lang/libm/pull/337)) + +## [0.2.10](https://github.com/rust-lang/libm/compare/libm-v0.2.9...libm-v0.2.10) - 2024-10-28 + +### Other + +- Set the MSRV to 1.63 and test this in CI + +## [0.2.9](https://github.com/rust-lang/libm/compare/libm-v0.2.8...libm-v0.2.9) - 2024-10-26 + +### Fixed + +- Update exponent calculations in nextafter to match musl + +### Changed + +- Update licensing to MIT AND (MIT OR Apache-2.0), as this is derivative from + MIT-licensed musl. +- Set edition to 2021 for all crates +- Upgrade all dependencies + +### Other + +- Don't deny warnings in lib.rs +- Rename the `musl-bitwise-tests` feature to `test-musl-serialized` +- Rename the `musl-reference-tests` feature to `musl-bitwise-tests` +- Move `musl-reference-tests` to a new `libm-test` crate +- Add a `force-soft-floats` feature to prevent using any intrinsics or + arch-specific code +- Deny warnings in CI +- Fix `clippy::deprecated_cfg_attr` on compiler_builtins +- Corrected English typos +- Remove unneeded `extern core` in `tgamma` +- Allow internal_features lint when building with "unstable" + +## [v0.2.1] - 2019-11-22 + +### Fixed + +- sincosf + +## [v0.2.0] - 2019-10-18 + +### Added + +- Benchmarks +- signum +- remainder +- remainderf +- nextafter +- nextafterf + +### Fixed + +- Rounding to negative zero +- Overflows in rem_pio2 and remquo +- Overflows in fma +- sincosf + +### Removed + +- F32Ext and F64Ext traits + +## [v0.1.4] - 2019-06-12 + +### Fixed + +- Restored compatibility with Rust 1.31.0 + +## [v0.1.3] - 2019-05-14 + +### Added + +- minf +- fmin +- fmaxf +- fmax + +## [v0.1.2] - 2018-07-18 + +### Added + +- acosf +- asin +- asinf +- atan +- atan2 +- atan2f +- atanf +- cos +- cosf +- cosh +- coshf +- exp2 +- expm1 +- expm1f +- expo2 +- fmaf +- pow +- sin +- sinf +- sinh +- sinhf +- tan +- tanf +- tanh +- tanhf + +## [v0.1.1] - 2018-07-14 + +### Added + +- acos +- acosf +- asin +- asinf +- atanf +- cbrt +- cbrtf +- ceil +- ceilf +- cosf +- exp +- exp2 +- exp2f +- expm1 +- expm1f +- fdim +- fdimf +- floorf +- fma +- fmod +- log +- log2 +- log10 +- log10f +- log1p +- log1pf +- log2f +- roundf +- sinf +- tanf + +## v0.1.0 - 2018-07-13 + +- Initial release + +[Unreleased]: https://github.com/japaric/libm/compare/v0.2.1...HEAD +[v0.2.1]: https://github.com/japaric/libm/compare/0.2.0...v0.2.1 +[v0.2.0]: https://github.com/japaric/libm/compare/0.1.4...v0.2.0 +[v0.1.4]: https://github.com/japaric/libm/compare/0.1.3...v0.1.4 +[v0.1.3]: https://github.com/japaric/libm/compare/v0.1.2...0.1.3 +[v0.1.2]: https://github.com/japaric/libm/compare/v0.1.1...v0.1.2 +[v0.1.1]: https://github.com/japaric/libm/compare/v0.1.0...v0.1.1 diff --git a/library/compiler-builtins/libm/Cargo.toml b/library/compiler-builtins/libm/Cargo.toml new file mode 100644 index 0000000000000..b6fb5efcf76e5 --- /dev/null +++ b/library/compiler-builtins/libm/Cargo.toml @@ -0,0 +1,49 @@ +[package] +authors = ["Jorge Aparicio "] +categories = ["no-std"] +description = "libm in pure Rust" +documentation = "https://docs.rs/libm" +keywords = ["libm", "math"] +license = "MIT" +name = "libm" +readme = "README.md" +repository = "https://github.com/rust-lang/compiler-builtins" +version = "0.2.15" +edition = "2021" +rust-version = "1.63" + +[features] +default = ["arch"] + +# Enable architecture-specific features such as SIMD or assembly routines. +arch = [] + +# This tells the compiler to assume that a Nightly toolchain is being used and +# that it should activate any useful Nightly things accordingly. +unstable = ["unstable-intrinsics", "unstable-float"] + +# Enable calls to functions in `core::intrinsics` +unstable-intrinsics = [] + +# Make some internal things public for testing. +unstable-public-internals = [] + +# Enable the nightly-only `f16` and `f128`. +unstable-float = [] + +# Used to prevent using any intrinsics or arch-specific code. +# +# HACK: this is a negative feature which is generally a bad idea in Cargo, but +# we need it to be able to forbid other features when this crate is used in +# Rust dependencies. Setting this overrides all features that may enable +# hard float operations. +force-soft-floats = [] + +[dev-dependencies] +no-panic = "0.1.35" + +[lints.rust] +unexpected_cfgs = { level = "warn", check-cfg = [ + # compiler-builtins sets this feature, but we use it in `libm` + 'cfg(feature, values("compiler-builtins"))', +] } diff --git a/library/compiler-builtins/libm/LICENSE.txt b/library/compiler-builtins/libm/LICENSE.txt new file mode 100644 index 0000000000000..2f8e41f147472 --- /dev/null +++ b/library/compiler-builtins/libm/LICENSE.txt @@ -0,0 +1,258 @@ +rust-lang/libm as a whole is available for use under the MIT license: + +------------------------------------------------------------------------------ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +------------------------------------------------------------------------------ + +As a contributor, you agree that your code can be used under either the MIT +license or the Apache-2.0 license: + +------------------------------------------------------------------------------ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +------------------------------------------------------------------------------ + +This Rust library contains the following copyrights: + + Copyright (c) 2018 Jorge Aparicio + +Portions of this software are derived from third-party works licensed under +terms compatible with the above MIT license: + +* musl libc https://www.musl-libc.org/. This library contains the following + copyright: + + Copyright © 2005-2020 Rich Felker, et al. + +* The CORE-MATH project https://core-math.gitlabpages.inria.fr/. CORE-MATH + routines are available under the MIT license on a per-file basis. + +The musl libc COPYRIGHT file also includes the following notice relevant to +math portions of the library: + +------------------------------------------------------------------------------ +Much of the math library code (src/math/* and src/complex/*) is +Copyright © 1993,2004 Sun Microsystems or +Copyright © 2003-2011 David Schultz or +Copyright © 2003-2009 Steven G. Kargl or +Copyright © 2003-2009 Bruce D. Evans or +Copyright © 2008 Stephen L. Moshier or +Copyright © 2017-2018 Arm Limited +and labelled as such in comments in the individual source files. All +have been licensed under extremely permissive terms. +------------------------------------------------------------------------------ + +Copyright notices are retained in src/* files where relevant. diff --git a/library/compiler-builtins/libm/README.md b/library/compiler-builtins/libm/README.md new file mode 100644 index 0000000000000..349e892dfcf9c --- /dev/null +++ b/library/compiler-builtins/libm/README.md @@ -0,0 +1,42 @@ +# `libm` + +A Rust implementations of the C math library. + +## Usage + +`libm` provides fallback implementations for Rust's [float math functions] in +`core`, and the [`core_float_math`] feature. If what is available suits your +needs, there is no need to add `libm` as a dependency. + +If more functionality is needed, this crate can also be used directly: + +```toml +[dependencies] +libm = "0.2.11" +``` + +[float math functions]: https://doc.rust-lang.org/std/primitive.f32.html +[`core_float_math`]: https://github.com/rust-lang/rust/issues/137578 + +## Contributing + +Please check [CONTRIBUTING.md](../CONTRIBUTING.md) + +## Minimum Rust version policy + +This crate supports rustc 1.63 and newer. + +## License + +Usage is under the MIT license, available at +. + +### Contribution + +Contributions are licensed under both the MIT license and the Apache License, +Version 2.0, available at . Unless +you explicitly state otherwise, any contribution intentionally submitted for +inclusion in the work by you, as defined in the Apache-2.0 license, shall be +dual licensed as mentioned, without any additional terms or conditions. + +See [LICENSE.txt](LICENSE.txt) for full details. diff --git a/library/compiler-builtins/libm/build.rs b/library/compiler-builtins/libm/build.rs new file mode 100644 index 0000000000000..07d08ed4364db --- /dev/null +++ b/library/compiler-builtins/libm/build.rs @@ -0,0 +1,18 @@ +use std::env; + +mod configure; + +fn main() { + let cfg = configure::Config::from_env(); + + println!("cargo:rerun-if-changed=build.rs"); + println!("cargo:rerun-if-changed=configure.rs"); + println!("cargo:rustc-check-cfg=cfg(assert_no_panic)"); + + // If set, enable `no-panic`. Requires LTO (`release-opt` profile). + if env::var("ENSURE_NO_PANIC").is_ok() { + println!("cargo:rustc-cfg=assert_no_panic"); + } + + configure::emit_libm_config(&cfg); +} diff --git a/library/compiler-builtins/libm/configure.rs b/library/compiler-builtins/libm/configure.rs new file mode 100644 index 0000000000000..2a497c7b11790 --- /dev/null +++ b/library/compiler-builtins/libm/configure.rs @@ -0,0 +1,189 @@ +// Configuration shared with both libm and libm-test + +use std::env; +use std::path::PathBuf; + +#[allow(dead_code)] +pub struct Config { + pub manifest_dir: PathBuf, + pub out_dir: PathBuf, + pub opt_level: String, + pub cargo_features: Vec, + pub target_arch: String, + pub target_env: String, + pub target_family: Option, + pub target_os: String, + pub target_string: String, + pub target_vendor: String, + pub target_features: Vec, +} + +impl Config { + pub fn from_env() -> Self { + let target_features = env::var("CARGO_CFG_TARGET_FEATURE") + .map(|feats| feats.split(',').map(ToOwned::to_owned).collect()) + .unwrap_or_default(); + let cargo_features = env::vars() + .filter_map(|(name, _value)| name.strip_prefix("CARGO_FEATURE_").map(ToOwned::to_owned)) + .map(|s| s.to_lowercase().replace("_", "-")) + .collect(); + + Self { + manifest_dir: PathBuf::from(env::var("CARGO_MANIFEST_DIR").unwrap()), + out_dir: PathBuf::from(env::var("OUT_DIR").unwrap()), + opt_level: env::var("OPT_LEVEL").unwrap(), + cargo_features, + target_arch: env::var("CARGO_CFG_TARGET_ARCH").unwrap(), + target_env: env::var("CARGO_CFG_TARGET_ENV").unwrap(), + target_family: env::var("CARGO_CFG_TARGET_FAMILY").ok(), + target_os: env::var("CARGO_CFG_TARGET_OS").unwrap(), + target_string: env::var("TARGET").unwrap(), + target_vendor: env::var("CARGO_CFG_TARGET_VENDOR").unwrap(), + target_features, + } + } +} + +/// Libm gets most config options made available. +#[allow(dead_code)] +pub fn emit_libm_config(cfg: &Config) { + emit_intrinsics_cfg(); + emit_arch_cfg(); + emit_optimization_cfg(cfg); + emit_cfg_shorthands(cfg); + emit_cfg_env(cfg); + emit_f16_f128_cfg(cfg); +} + +/// Tests don't need most feature-related config. +#[allow(dead_code)] +pub fn emit_test_config(cfg: &Config) { + emit_optimization_cfg(cfg); + emit_cfg_shorthands(cfg); + emit_cfg_env(cfg); + emit_f16_f128_cfg(cfg); +} + +/// Simplify the feature logic for enabling intrinsics so code only needs to use +/// `cfg(intrinsics_enabled)`. +fn emit_intrinsics_cfg() { + println!("cargo:rustc-check-cfg=cfg(intrinsics_enabled)"); + + // Disabled by default; `unstable-intrinsics` enables again; `force-soft-floats` overrides + // to disable. + if cfg!(feature = "unstable-intrinsics") && !cfg!(feature = "force-soft-floats") { + println!("cargo:rustc-cfg=intrinsics_enabled"); + } +} + +/// Simplify the feature logic for enabling arch-specific features so code only needs to use +/// `cfg(arch_enabled)`. +fn emit_arch_cfg() { + println!("cargo:rustc-check-cfg=cfg(arch_enabled)"); + + // Enabled by default via the "arch" feature, `force-soft-floats` overrides to disable. + if cfg!(feature = "arch") && !cfg!(feature = "force-soft-floats") { + println!("cargo:rustc-cfg=arch_enabled"); + } +} + +/// Some tests are extremely slow. Emit a config option based on optimization level. +fn emit_optimization_cfg(cfg: &Config) { + println!("cargo:rustc-check-cfg=cfg(optimizations_enabled)"); + + if !matches!(cfg.opt_level.as_str(), "0" | "1") { + println!("cargo:rustc-cfg=optimizations_enabled"); + } +} + +/// Provide an alias for common longer config combinations. +fn emit_cfg_shorthands(cfg: &Config) { + println!("cargo:rustc-check-cfg=cfg(x86_no_sse)"); + if cfg.target_arch == "x86" && !cfg.target_features.iter().any(|f| f == "sse") { + // Shorthand to detect i586 targets + println!("cargo:rustc-cfg=x86_no_sse"); + } +} + +/// Reemit config that we make use of for test logging. +fn emit_cfg_env(cfg: &Config) { + println!( + "cargo:rustc-env=CFG_CARGO_FEATURES={:?}", + cfg.cargo_features + ); + println!("cargo:rustc-env=CFG_OPT_LEVEL={}", cfg.opt_level); + println!( + "cargo:rustc-env=CFG_TARGET_FEATURES={:?}", + cfg.target_features + ); +} + +/// Configure whether or not `f16` and `f128` support should be enabled. +fn emit_f16_f128_cfg(cfg: &Config) { + println!("cargo:rustc-check-cfg=cfg(f16_enabled)"); + println!("cargo:rustc-check-cfg=cfg(f128_enabled)"); + + // `unstable-float` enables these features. + if !cfg!(feature = "unstable-float") { + return; + } + + // Set whether or not `f16` and `f128` are supported at a basic level by LLVM. This only means + // that the backend will not crash when using these types and generates code that can be called + // without crashing (no infinite recursion). This does not mean that the platform doesn't have + // ABI or other bugs. + // + // We do this here rather than in `rust-lang/rust` because configuring via cargo features is + // not straightforward. + // + // Original source of this list: + // + let f16_enabled = match cfg.target_arch.as_str() { + // Unsupported + "arm64ec" => false, + // Selection failure + "s390x" => false, + // Infinite recursion + // FIXME(llvm): loongarch fixed by + "csky" => false, + "hexagon" => false, + "loongarch64" => false, + "mips" | "mips64" | "mips32r6" | "mips64r6" => false, + "powerpc" | "powerpc64" => false, + "sparc" | "sparc64" => false, + "wasm32" | "wasm64" => false, + // Most everything else works as of LLVM 19 + _ => true, + }; + + let f128_enabled = match cfg.target_arch.as_str() { + // Unsupported (libcall is not supported) + "amdgpu" => false, + // Unsupported + "arm64ec" => false, + // Selection failure + "mips64" | "mips64r6" => false, + // Selection failure + "nvptx64" => false, + // Selection failure + "powerpc64" if &cfg.target_os == "aix" => false, + // Selection failure + "sparc" => false, + // Most everything else works as of LLVM 19 + _ => true, + }; + + // If the feature is set, disable these types. + let disable_both = env::var_os("CARGO_FEATURE_NO_F16_F128").is_some(); + + println!("cargo:rustc-check-cfg=cfg(f16_enabled)"); + println!("cargo:rustc-check-cfg=cfg(f128_enabled)"); + + if f16_enabled && !disable_both { + println!("cargo:rustc-cfg=f16_enabled"); + } + + if f128_enabled && !disable_both { + println!("cargo:rustc-cfg=f128_enabled"); + } +} diff --git a/library/compiler-builtins/libm/src/lib.rs b/library/compiler-builtins/libm/src/lib.rs new file mode 100644 index 0000000000000..31b12235314cd --- /dev/null +++ b/library/compiler-builtins/libm/src/lib.rs @@ -0,0 +1,33 @@ +//! libm in pure Rust +#![no_std] +#![cfg_attr(intrinsics_enabled, allow(internal_features))] +#![cfg_attr(intrinsics_enabled, feature(core_intrinsics))] +#![cfg_attr( + all(intrinsics_enabled, target_family = "wasm"), + feature(wasm_numeric_instr) +)] +#![cfg_attr(f128_enabled, feature(f128))] +#![cfg_attr(f16_enabled, feature(f16))] +#![allow(clippy::assign_op_pattern)] +#![allow(clippy::deprecated_cfg_attr)] +#![allow(clippy::eq_op)] +#![allow(clippy::excessive_precision)] +#![allow(clippy::float_cmp)] +#![allow(clippy::int_plus_one)] +#![allow(clippy::just_underscores_and_digits)] +#![allow(clippy::many_single_char_names)] +#![allow(clippy::mixed_case_hex_literals)] +#![allow(clippy::needless_late_init)] +#![allow(clippy::needless_return)] +#![allow(clippy::unreadable_literal)] +#![allow(clippy::zero_divided_by_zero)] +#![forbid(unsafe_op_in_unsafe_fn)] + +mod libm_helper; +mod math; + +use core::{f32, f64}; + +pub use libm_helper::*; + +pub use self::math::*; diff --git a/library/compiler-builtins/libm/src/libm_helper.rs b/library/compiler-builtins/libm/src/libm_helper.rs new file mode 100644 index 0000000000000..dfa1ff77bf2e4 --- /dev/null +++ b/library/compiler-builtins/libm/src/libm_helper.rs @@ -0,0 +1,244 @@ +use core::marker::PhantomData; + +use crate::*; + +/// Generic helper for libm functions, abstracting over f32 and f64.
+/// # Type Parameter: +/// - `T`: Either `f32` or `f64` +/// +/// # Examples +/// ```rust +/// use libm::{self, Libm}; +/// +/// const PI_F32: f32 = 3.1415927410e+00; +/// const PI_F64: f64 = 3.1415926535897931160e+00; +/// +/// assert!(Libm::::cos(0.0f32) == libm::cosf(0.0)); +/// assert!(Libm::::sin(PI_F32) == libm::sinf(PI_F32)); +/// +/// assert!(Libm::::cos(0.0f64) == libm::cos(0.0)); +/// assert!(Libm::::sin(PI_F64) == libm::sin(PI_F64)); +/// ``` +pub struct Libm(PhantomData); + +macro_rules! libm_helper { + ($t:ident, funcs: $funcs:tt) => { + impl Libm<$t> { + #![allow(unused_parens)] + + libm_helper! { $funcs } + } + }; + + ({$($func:tt;)*}) => { + $( + libm_helper! { $func } + )* + }; + + ((fn $func:ident($($arg:ident: $arg_typ:ty),*) -> ($($ret_typ:ty),*); => $libm_fn:ident)) => { + #[inline(always)] + pub fn $func($($arg: $arg_typ),*) -> ($($ret_typ),*) { + $libm_fn($($arg),*) + } + }; +} + +// verify-apilist-start +libm_helper! { + f32, + funcs: { + // verify-sorted-start + (fn acos(x: f32) -> (f32); => acosf); + (fn acosh(x: f32) -> (f32); => acoshf); + (fn asin(x: f32) -> (f32); => asinf); + (fn asinh(x: f32) -> (f32); => asinhf); + (fn atan(x: f32) -> (f32); => atanf); + (fn atan2(y: f32, x: f32) -> (f32); => atan2f); + (fn atanh(x: f32) -> (f32); => atanhf); + (fn cbrt(x: f32) -> (f32); => cbrtf); + (fn ceil(x: f32) -> (f32); => ceilf); + (fn copysign(x: f32, y: f32) -> (f32); => copysignf); + (fn cos(x: f32) -> (f32); => cosf); + (fn cosh(x: f32) -> (f32); => coshf); + (fn erf(x: f32) -> (f32); => erff); + (fn erfc(x: f32) -> (f32); => erfcf); + (fn exp(x: f32) -> (f32); => expf); + (fn exp10(x: f32) -> (f32); => exp10f); + (fn exp2(x: f32) -> (f32); => exp2f); + (fn expm1(x: f32) -> (f32); => expm1f); + (fn fabs(x: f32) -> (f32); => fabsf); + (fn fdim(x: f32, y: f32) -> (f32); => fdimf); + (fn floor(x: f32) -> (f32); => floorf); + (fn fma(x: f32, y: f32, z: f32) -> (f32); => fmaf); + (fn fmax(x: f32, y: f32) -> (f32); => fmaxf); + (fn fmin(x: f32, y: f32) -> (f32); => fminf); + (fn fmod(x: f32, y: f32) -> (f32); => fmodf); + (fn frexp(x: f32) -> (f32, i32); => frexpf); + (fn hypot(x: f32, y: f32) -> (f32); => hypotf); + (fn ilogb(x: f32) -> (i32); => ilogbf); + (fn j0(x: f32) -> (f32); => j0f); + (fn j1(x: f32) -> (f32); => j1f); + (fn jn(n: i32, x: f32) -> (f32); => jnf); + (fn ldexp(x: f32, n: i32) -> (f32); => ldexpf); + (fn lgamma(x: f32) -> (f32); => lgammaf); + (fn lgamma_r(x: f32) -> (f32, i32); => lgammaf_r); + (fn log(x: f32) -> (f32); => logf); + (fn log10(x: f32) -> (f32); => log10f); + (fn log1p(x: f32) -> (f32); => log1pf); + (fn log2(x: f32) -> (f32); => log2f); + (fn modf(x: f32) -> (f32, f32); => modff); + (fn nextafter(x: f32, y: f32) -> (f32); => nextafterf); + (fn pow(x: f32, y: f32) -> (f32); => powf); + (fn remainder(x: f32, y: f32) -> (f32); => remainderf); + (fn remquo(x: f32, y: f32) -> (f32, i32); => remquof); + (fn rint(x: f32) -> (f32); => rintf); + (fn round(x: f32) -> (f32); => roundf); + (fn roundeven(x: f32) -> (f32); => roundevenf); + (fn scalbn(x: f32, n: i32) -> (f32); => scalbnf); + (fn sin(x: f32) -> (f32); => sinf); + (fn sincos(x: f32) -> (f32, f32); => sincosf); + (fn sinh(x: f32) -> (f32); => sinhf); + (fn sqrt(x: f32) -> (f32); => sqrtf); + (fn tan(x: f32) -> (f32); => tanf); + (fn tanh(x: f32) -> (f32); => tanhf); + (fn tgamma(x: f32) -> (f32); => tgammaf); + (fn trunc(x: f32) -> (f32); => truncf); + (fn y0(x: f32) -> (f32); => y0f); + (fn y1(x: f32) -> (f32); => y1f); + (fn yn(n: i32, x: f32) -> (f32); => ynf); + // verify-sorted-end + } +} + +libm_helper! { + f64, + funcs: { + // verify-sorted-start + (fn acos(x: f64) -> (f64); => acos); + (fn acosh(x: f64) -> (f64); => acosh); + (fn asin(x: f64) -> (f64); => asin); + (fn asinh(x: f64) -> (f64); => asinh); + (fn atan(x: f64) -> (f64); => atan); + (fn atan2(y: f64, x: f64) -> (f64); => atan2); + (fn atanh(x: f64) -> (f64); => atanh); + (fn cbrt(x: f64) -> (f64); => cbrt); + (fn ceil(x: f64) -> (f64); => ceil); + (fn copysign(x: f64, y: f64) -> (f64); => copysign); + (fn cos(x: f64) -> (f64); => cos); + (fn cosh(x: f64) -> (f64); => cosh); + (fn erf(x: f64) -> (f64); => erf); + (fn erfc(x: f64) -> (f64); => erfc); + (fn exp(x: f64) -> (f64); => exp); + (fn exp10(x: f64) -> (f64); => exp10); + (fn exp2(x: f64) -> (f64); => exp2); + (fn expm1(x: f64) -> (f64); => expm1); + (fn fabs(x: f64) -> (f64); => fabs); + (fn fdim(x: f64, y: f64) -> (f64); => fdim); + (fn floor(x: f64) -> (f64); => floor); + (fn fma(x: f64, y: f64, z: f64) -> (f64); => fma); + (fn fmax(x: f64, y: f64) -> (f64); => fmax); + (fn fmaximum(x: f64, y: f64) -> (f64); => fmaximum); + (fn fmaximum_num(x: f64, y: f64) -> (f64); => fmaximum_num); + (fn fmaximum_numf(x: f32, y: f32) -> (f32); => fmaximum_numf); + (fn fmaximumf(x: f32, y: f32) -> (f32); => fmaximumf); + (fn fmin(x: f64, y: f64) -> (f64); => fmin); + (fn fminimum(x: f64, y: f64) -> (f64); => fminimum); + (fn fminimum_num(x: f64, y: f64) -> (f64); => fminimum_num); + (fn fminimum_numf(x: f32, y: f32) -> (f32); => fminimum_numf); + (fn fminimumf(x: f32, y: f32) -> (f32); => fminimumf); + (fn fmod(x: f64, y: f64) -> (f64); => fmod); + (fn frexp(x: f64) -> (f64, i32); => frexp); + (fn hypot(x: f64, y: f64) -> (f64); => hypot); + (fn ilogb(x: f64) -> (i32); => ilogb); + (fn j0(x: f64) -> (f64); => j0); + (fn j1(x: f64) -> (f64); => j1); + (fn jn(n: i32, x: f64) -> (f64); => jn); + (fn ldexp(x: f64, n: i32) -> (f64); => ldexp); + (fn lgamma(x: f64) -> (f64); => lgamma); + (fn lgamma_r(x: f64) -> (f64, i32); => lgamma_r); + (fn log(x: f64) -> (f64); => log); + (fn log10(x: f64) -> (f64); => log10); + (fn log1p(x: f64) -> (f64); => log1p); + (fn log2(x: f64) -> (f64); => log2); + (fn modf(x: f64) -> (f64, f64); => modf); + (fn nextafter(x: f64, y: f64) -> (f64); => nextafter); + (fn pow(x: f64, y: f64) -> (f64); => pow); + (fn remainder(x: f64, y: f64) -> (f64); => remainder); + (fn remquo(x: f64, y: f64) -> (f64, i32); => remquo); + (fn rint(x: f64) -> (f64); => rint); + (fn round(x: f64) -> (f64); => round); + (fn roundevem(x: f64) -> (f64); => roundeven); + (fn scalbn(x: f64, n: i32) -> (f64); => scalbn); + (fn sin(x: f64) -> (f64); => sin); + (fn sincos(x: f64) -> (f64, f64); => sincos); + (fn sinh(x: f64) -> (f64); => sinh); + (fn sqrt(x: f64) -> (f64); => sqrt); + (fn tan(x: f64) -> (f64); => tan); + (fn tanh(x: f64) -> (f64); => tanh); + (fn tgamma(x: f64) -> (f64); => tgamma); + (fn trunc(x: f64) -> (f64); => trunc); + (fn y0(x: f64) -> (f64); => y0); + (fn y1(x: f64) -> (f64); => y1); + (fn yn(n: i32, x: f64) -> (f64); => yn); + // verify-sorted-end + } +} + +#[cfg(f16_enabled)] +libm_helper! { + f16, + funcs: { + // verify-sorted-start + (fn ceil(x: f16) -> (f16); => ceilf16); + (fn copysign(x: f16, y: f16) -> (f16); => copysignf16); + (fn fabs(x: f16) -> (f16); => fabsf16); + (fn fdim(x: f16, y: f16) -> (f16); => fdimf16); + (fn floor(x: f16) -> (f16); => floorf16); + (fn fmax(x: f16, y: f16) -> (f16); => fmaxf16); + (fn fmaximum_num(x: f16, y: f16) -> (f16); => fmaximum_numf16); + (fn fmaximumf16(x: f16, y: f16) -> (f16); => fmaximumf16); + (fn fmin(x: f16, y: f16) -> (f16); => fminf16); + (fn fminimum(x: f16, y: f16) -> (f16); => fminimumf16); + (fn fminimum_num(x: f16, y: f16) -> (f16); => fminimum_numf16); + (fn fmod(x: f16, y: f16) -> (f16); => fmodf16); + (fn ldexp(x: f16, n: i32) -> (f16); => ldexpf16); + (fn rint(x: f16) -> (f16); => rintf16); + (fn round(x: f16) -> (f16); => roundf16); + (fn roundeven(x: f16) -> (f16); => roundevenf16); + (fn scalbn(x: f16, n: i32) -> (f16); => scalbnf16); + (fn sqrtf(x: f16) -> (f16); => sqrtf16); + (fn truncf(x: f16) -> (f16); => truncf16); + // verify-sorted-end + } +} + +#[cfg(f128_enabled)] +libm_helper! { + f128, + funcs: { + // verify-sorted-start + (fn ceil(x: f128) -> (f128); => ceilf128); + (fn copysign(x: f128, y: f128) -> (f128); => copysignf128); + (fn fabs(x: f128) -> (f128); => fabsf128); + (fn fdim(x: f128, y: f128) -> (f128); => fdimf128); + (fn floor(x: f128) -> (f128); => floorf128); + (fn fma(x: f128, y: f128, z: f128) -> (f128); => fmaf128); + (fn fmax(x: f128, y: f128) -> (f128); => fmaxf128); + (fn fmaximum(x: f128, y: f128) -> (f128); => fmaximumf128); + (fn fmaximum_num(x: f128, y: f128) -> (f128); => fmaximum_numf128); + (fn fmin(x: f128, y: f128) -> (f128); => fminf128); + (fn fminimum(x: f128, y: f128) -> (f128); => fminimumf128); + (fn fminimum_num(x: f128, y: f128) -> (f128); => fminimum_numf128); + (fn fmod(x: f128, y: f128) -> (f128); => fmodf128); + (fn ldexp(x: f128, n: i32) -> (f128); => ldexpf128); + (fn rint(x: f128) -> (f128); => rintf128); + (fn round(x: f128) -> (f128); => roundf128); + (fn roundeven(x: f128) -> (f128); => roundevenf128); + (fn scalbn(x: f128, n: i32) -> (f128); => scalbnf128); + (fn sqrt(x: f128) -> (f128); => sqrtf128); + (fn trunc(x: f128) -> (f128); => truncf128); + // verify-sorted-end + } +} +// verify-apilist-end diff --git a/library/compiler-builtins/libm/src/math/acos.rs b/library/compiler-builtins/libm/src/math/acos.rs new file mode 100644 index 0000000000000..23b13251ee231 --- /dev/null +++ b/library/compiler-builtins/libm/src/math/acos.rs @@ -0,0 +1,112 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/e_acos.c */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunSoft, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ +/* acos(x) + * Method : + * acos(x) = pi/2 - asin(x) + * acos(-x) = pi/2 + asin(x) + * For |x|<=0.5 + * acos(x) = pi/2 - (x + x*x^2*R(x^2)) (see asin.c) + * For x>0.5 + * acos(x) = pi/2 - (pi/2 - 2asin(sqrt((1-x)/2))) + * = 2asin(sqrt((1-x)/2)) + * = 2s + 2s*z*R(z) ...z=(1-x)/2, s=sqrt(z) + * = 2f + (2c + 2s*z*R(z)) + * where f=hi part of s, and c = (z-f*f)/(s+f) is the correction term + * for f so that f+c ~ sqrt(z). + * For x<-0.5 + * acos(x) = pi - 2asin(sqrt((1-|x|)/2)) + * = pi - 0.5*(s+s*z*R(z)), where z=(1-|x|)/2,s=sqrt(z) + * + * Special cases: + * if x is NaN, return x itself; + * if |x|>1, return NaN with invalid signal. + * + * Function needed: sqrt + */ + +use super::sqrt; + +const PIO2_HI: f64 = 1.57079632679489655800e+00; /* 0x3FF921FB, 0x54442D18 */ +const PIO2_LO: f64 = 6.12323399573676603587e-17; /* 0x3C91A626, 0x33145C07 */ +const PS0: f64 = 1.66666666666666657415e-01; /* 0x3FC55555, 0x55555555 */ +const PS1: f64 = -3.25565818622400915405e-01; /* 0xBFD4D612, 0x03EB6F7D */ +const PS2: f64 = 2.01212532134862925881e-01; /* 0x3FC9C155, 0x0E884455 */ +const PS3: f64 = -4.00555345006794114027e-02; /* 0xBFA48228, 0xB5688F3B */ +const PS4: f64 = 7.91534994289814532176e-04; /* 0x3F49EFE0, 0x7501B288 */ +const PS5: f64 = 3.47933107596021167570e-05; /* 0x3F023DE1, 0x0DFDF709 */ +const QS1: f64 = -2.40339491173441421878e+00; /* 0xC0033A27, 0x1C8A2D4B */ +const QS2: f64 = 2.02094576023350569471e+00; /* 0x40002AE5, 0x9C598AC8 */ +const QS3: f64 = -6.88283971605453293030e-01; /* 0xBFE6066C, 0x1B8D0159 */ +const QS4: f64 = 7.70381505559019352791e-02; /* 0x3FB3B8C5, 0xB12E9282 */ + +fn r(z: f64) -> f64 { + let p: f64 = z * (PS0 + z * (PS1 + z * (PS2 + z * (PS3 + z * (PS4 + z * PS5))))); + let q: f64 = 1.0 + z * (QS1 + z * (QS2 + z * (QS3 + z * QS4))); + p / q +} + +/// Arccosine (f64) +/// +/// Computes the inverse cosine (arc cosine) of the input value. +/// Arguments must be in the range -1 to 1. +/// Returns values in radians, in the range of 0 to pi. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn acos(x: f64) -> f64 { + let x1p_120f = f64::from_bits(0x3870000000000000); // 0x1p-120 === 2 ^ -120 + let z: f64; + let w: f64; + let s: f64; + let c: f64; + let df: f64; + let hx: u32; + let ix: u32; + + hx = (x.to_bits() >> 32) as u32; + ix = hx & 0x7fffffff; + /* |x| >= 1 or nan */ + if ix >= 0x3ff00000 { + let lx: u32 = x.to_bits() as u32; + + if ((ix - 0x3ff00000) | lx) == 0 { + /* acos(1)=0, acos(-1)=pi */ + if (hx >> 31) != 0 { + return 2. * PIO2_HI + x1p_120f; + } + return 0.; + } + return 0. / (x - x); + } + /* |x| < 0.5 */ + if ix < 0x3fe00000 { + if ix <= 0x3c600000 { + /* |x| < 2**-57 */ + return PIO2_HI + x1p_120f; + } + return PIO2_HI - (x - (PIO2_LO - x * r(x * x))); + } + /* x < -0.5 */ + if (hx >> 31) != 0 { + z = (1.0 + x) * 0.5; + s = sqrt(z); + w = r(z) * s - PIO2_LO; + return 2. * (PIO2_HI - (s + w)); + } + /* x > 0.5 */ + z = (1.0 - x) * 0.5; + s = sqrt(z); + // Set the low 4 bytes to zero + df = f64::from_bits(s.to_bits() & 0xff_ff_ff_ff_00_00_00_00); + + c = (z - df * df) / (s + df); + w = r(z) * s + c; + 2. * (df + w) +} diff --git a/library/compiler-builtins/libm/src/math/acosf.rs b/library/compiler-builtins/libm/src/math/acosf.rs new file mode 100644 index 0000000000000..dd88eea5b13a8 --- /dev/null +++ b/library/compiler-builtins/libm/src/math/acosf.rs @@ -0,0 +1,79 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/e_acosf.c */ +/* + * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. + */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ + +use super::sqrt::sqrtf; + +const PIO2_HI: f32 = 1.5707962513e+00; /* 0x3fc90fda */ +const PIO2_LO: f32 = 7.5497894159e-08; /* 0x33a22168 */ +const P_S0: f32 = 1.6666586697e-01; +const P_S1: f32 = -4.2743422091e-02; +const P_S2: f32 = -8.6563630030e-03; +const Q_S1: f32 = -7.0662963390e-01; + +fn r(z: f32) -> f32 { + let p = z * (P_S0 + z * (P_S1 + z * P_S2)); + let q = 1. + z * Q_S1; + p / q +} + +/// Arccosine (f32) +/// +/// Computes the inverse cosine (arc cosine) of the input value. +/// Arguments must be in the range -1 to 1. +/// Returns values in radians, in the range of 0 to pi. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn acosf(x: f32) -> f32 { + let x1p_120 = f32::from_bits(0x03800000); // 0x1p-120 === 2 ^ (-120) + + let z: f32; + let w: f32; + let s: f32; + + let mut hx = x.to_bits(); + let ix = hx & 0x7fffffff; + /* |x| >= 1 or nan */ + if ix >= 0x3f800000 { + if ix == 0x3f800000 { + if (hx >> 31) != 0 { + return 2. * PIO2_HI + x1p_120; + } + return 0.; + } + return 0. / (x - x); + } + /* |x| < 0.5 */ + if ix < 0x3f000000 { + if ix <= 0x32800000 { + /* |x| < 2**-26 */ + return PIO2_HI + x1p_120; + } + return PIO2_HI - (x - (PIO2_LO - x * r(x * x))); + } + /* x < -0.5 */ + if (hx >> 31) != 0 { + z = (1. + x) * 0.5; + s = sqrtf(z); + w = r(z) * s - PIO2_LO; + return 2. * (PIO2_HI - (s + w)); + } + /* x > 0.5 */ + z = (1. - x) * 0.5; + s = sqrtf(z); + hx = s.to_bits(); + let df = f32::from_bits(hx & 0xfffff000); + let c = (z - df * df) / (s + df); + w = r(z) * s + c; + 2. * (df + w) +} diff --git a/library/compiler-builtins/libm/src/math/acosh.rs b/library/compiler-builtins/libm/src/math/acosh.rs new file mode 100644 index 0000000000000..d1f5b9fa9372c --- /dev/null +++ b/library/compiler-builtins/libm/src/math/acosh.rs @@ -0,0 +1,27 @@ +use super::{log, log1p, sqrt}; + +const LN2: f64 = 0.693147180559945309417232121458176568; /* 0x3fe62e42, 0xfefa39ef*/ + +/// Inverse hyperbolic cosine (f64) +/// +/// Calculates the inverse hyperbolic cosine of `x`. +/// Is defined as `log(x + sqrt(x*x-1))`. +/// `x` must be a number greater than or equal to 1. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn acosh(x: f64) -> f64 { + let u = x.to_bits(); + let e = ((u >> 52) as usize) & 0x7ff; + + /* x < 1 domain error is handled in the called functions */ + + if e < 0x3ff + 1 { + /* |x| < 2, up to 2ulp error in [1,1.125] */ + return log1p(x - 1.0 + sqrt((x - 1.0) * (x - 1.0) + 2.0 * (x - 1.0))); + } + if e < 0x3ff + 26 { + /* |x| < 0x1p26 */ + return log(2.0 * x - 1.0 / (x + sqrt(x * x - 1.0))); + } + /* |x| >= 0x1p26 or nan */ + return log(x) + LN2; +} diff --git a/library/compiler-builtins/libm/src/math/acoshf.rs b/library/compiler-builtins/libm/src/math/acoshf.rs new file mode 100644 index 0000000000000..ad3455fdd48cf --- /dev/null +++ b/library/compiler-builtins/libm/src/math/acoshf.rs @@ -0,0 +1,26 @@ +use super::{log1pf, logf, sqrtf}; + +const LN2: f32 = 0.693147180559945309417232121458176568; + +/// Inverse hyperbolic cosine (f32) +/// +/// Calculates the inverse hyperbolic cosine of `x`. +/// Is defined as `log(x + sqrt(x*x-1))`. +/// `x` must be a number greater than or equal to 1. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn acoshf(x: f32) -> f32 { + let u = x.to_bits(); + let a = u & 0x7fffffff; + + if a < 0x3f800000 + (1 << 23) { + /* |x| < 2, invalid if x < 1 or nan */ + /* up to 2ulp error in [1,1.125] */ + return log1pf(x - 1.0 + sqrtf((x - 1.0) * (x - 1.0) + 2.0 * (x - 1.0))); + } + if a < 0x3f800000 + (12 << 23) { + /* |x| < 0x1p12 */ + return logf(2.0 * x - 1.0 / (x + sqrtf(x * x - 1.0))); + } + /* x >= 0x1p12 */ + return logf(x) + LN2; +} diff --git a/library/compiler-builtins/libm/src/math/arch/aarch64.rs b/library/compiler-builtins/libm/src/math/arch/aarch64.rs new file mode 100644 index 0000000000000..020bb731cdc7a --- /dev/null +++ b/library/compiler-builtins/libm/src/math/arch/aarch64.rs @@ -0,0 +1,115 @@ +//! Architecture-specific support for aarch64 with neon. + +use core::arch::asm; + +pub fn fma(mut x: f64, y: f64, z: f64) -> f64 { + // SAFETY: `fmadd` is available with neon and has no side effects. + unsafe { + asm!( + "fmadd {x:d}, {x:d}, {y:d}, {z:d}", + x = inout(vreg) x, + y = in(vreg) y, + z = in(vreg) z, + options(nomem, nostack, pure) + ); + } + x +} + +pub fn fmaf(mut x: f32, y: f32, z: f32) -> f32 { + // SAFETY: `fmadd` is available with neon and has no side effects. + unsafe { + asm!( + "fmadd {x:s}, {x:s}, {y:s}, {z:s}", + x = inout(vreg) x, + y = in(vreg) y, + z = in(vreg) z, + options(nomem, nostack, pure) + ); + } + x +} + +pub fn rint(mut x: f64) -> f64 { + // SAFETY: `frintn` is available with neon and has no side effects. + // + // `frintn` is always round-to-nearest which does not match the C specification, but Rust does + // not support rounding modes. + unsafe { + asm!( + "frintn {x:d}, {x:d}", + x = inout(vreg) x, + options(nomem, nostack, pure) + ); + } + x +} + +pub fn rintf(mut x: f32) -> f32 { + // SAFETY: `frintn` is available with neon and has no side effects. + // + // `frintn` is always round-to-nearest which does not match the C specification, but Rust does + // not support rounding modes. + unsafe { + asm!( + "frintn {x:s}, {x:s}", + x = inout(vreg) x, + options(nomem, nostack, pure) + ); + } + x +} + +#[cfg(all(f16_enabled, target_feature = "fp16"))] +pub fn rintf16(mut x: f16) -> f16 { + // SAFETY: `frintn` is available for `f16` with `fp16` (implies `neon`) and has no side effects. + // + // `frintn` is always round-to-nearest which does not match the C specification, but Rust does + // not support rounding modes. + unsafe { + asm!( + "frintn {x:h}, {x:h}", + x = inout(vreg) x, + options(nomem, nostack, pure) + ); + } + x +} + +pub fn sqrt(mut x: f64) -> f64 { + // SAFETY: `fsqrt` is available with neon and has no side effects. + unsafe { + asm!( + "fsqrt {x:d}, {x:d}", + x = inout(vreg) x, + options(nomem, nostack, pure) + ); + } + x +} + +pub fn sqrtf(mut x: f32) -> f32 { + // SAFETY: `fsqrt` is available with neon and has no side effects. + unsafe { + asm!( + "fsqrt {x:s}, {x:s}", + x = inout(vreg) x, + options(nomem, nostack, pure) + ); + } + x +} + +#[cfg(all(f16_enabled, target_feature = "fp16"))] +pub fn sqrtf16(mut x: f16) -> f16 { + // SAFETY: `fsqrt` is available for `f16` with `fp16` (implies `neon`) and has no + // side effects. + unsafe { + asm!( + "fsqrt {x:h}, {x:h}", + x = inout(vreg) x, + options(nomem, nostack, pure) + ); + } + x +} diff --git a/library/compiler-builtins/libm/src/math/arch/i586.rs b/library/compiler-builtins/libm/src/math/arch/i586.rs new file mode 100644 index 0000000000000..f92b9a2af711f --- /dev/null +++ b/library/compiler-builtins/libm/src/math/arch/i586.rs @@ -0,0 +1,37 @@ +//! Architecture-specific support for x86-32 without SSE2 + +use super::super::fabs; + +/// Use an alternative implementation on x86, because the +/// main implementation fails with the x87 FPU used by +/// debian i386, probably due to excess precision issues. +/// Basic implementation taken from https://github.com/rust-lang/libm/issues/219. +pub fn ceil(x: f64) -> f64 { + if fabs(x).to_bits() < 4503599627370496.0_f64.to_bits() { + let truncated = x as i64 as f64; + if truncated < x { + return truncated + 1.0; + } else { + return truncated; + } + } else { + return x; + } +} + +/// Use an alternative implementation on x86, because the +/// main implementation fails with the x87 FPU used by +/// debian i386, probably due to excess precision issues. +/// Basic implementation taken from https://github.com/rust-lang/libm/issues/219. +pub fn floor(x: f64) -> f64 { + if fabs(x).to_bits() < 4503599627370496.0_f64.to_bits() { + let truncated = x as i64 as f64; + if truncated > x { + return truncated - 1.0; + } else { + return truncated; + } + } else { + return x; + } +} diff --git a/library/compiler-builtins/libm/src/math/arch/mod.rs b/library/compiler-builtins/libm/src/math/arch/mod.rs new file mode 100644 index 0000000000000..984ae7f3129f9 --- /dev/null +++ b/library/compiler-builtins/libm/src/math/arch/mod.rs @@ -0,0 +1,50 @@ +//! Architecture-specific routines and operations. +//! +//! LLVM will already optimize calls to some of these in cases that there are hardware +//! instructions. Providing an implementation here just ensures that the faster implementation +//! is used when calling the function directly. This helps anyone who uses `libm` directly, as +//! well as improving things when these routines are called as part of other implementations. + +// Most implementations should be defined here, to ensure they are not made available when +// soft floats are required. +#[cfg(arch_enabled)] +cfg_if! { + if #[cfg(all(target_arch = "wasm32", intrinsics_enabled))] { + mod wasm32; + pub use wasm32::{ + ceil, ceilf, fabs, fabsf, floor, floorf, rint, rintf, sqrt, sqrtf, trunc, truncf, + }; + } else if #[cfg(target_feature = "sse2")] { + mod x86; + pub use x86::{sqrt, sqrtf, fma, fmaf}; + } else if #[cfg(all( + any(target_arch = "aarch64", target_arch = "arm64ec"), + target_feature = "neon" + ))] { + mod aarch64; + + pub use aarch64::{ + fma, + fmaf, + rint, + rintf, + sqrt, + sqrtf, + }; + + #[cfg(all(f16_enabled, target_feature = "fp16"))] + pub use aarch64::{ + rintf16, + sqrtf16, + }; + } +} + +// There are certain architecture-specific implementations that are needed for correctness +// even with `force-soft-float`. These are configured here. +cfg_if! { + if #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))] { + mod i586; + pub use i586::{ceil, floor}; + } +} diff --git a/library/compiler-builtins/libm/src/math/arch/wasm32.rs b/library/compiler-builtins/libm/src/math/arch/wasm32.rs new file mode 100644 index 0000000000000..de80c8a581726 --- /dev/null +++ b/library/compiler-builtins/libm/src/math/arch/wasm32.rs @@ -0,0 +1,50 @@ +//! Wasm has builtins for simple float operations. Use the unstable `core::arch` intrinsics which +//! are significantly faster than soft float operations. + +pub fn ceil(x: f64) -> f64 { + core::arch::wasm32::f64_ceil(x) +} + +pub fn ceilf(x: f32) -> f32 { + core::arch::wasm32::f32_ceil(x) +} + +pub fn fabs(x: f64) -> f64 { + x.abs() +} + +pub fn fabsf(x: f32) -> f32 { + x.abs() +} + +pub fn floor(x: f64) -> f64 { + core::arch::wasm32::f64_floor(x) +} + +pub fn floorf(x: f32) -> f32 { + core::arch::wasm32::f32_floor(x) +} + +pub fn rint(x: f64) -> f64 { + core::arch::wasm32::f64_nearest(x) +} + +pub fn rintf(x: f32) -> f32 { + core::arch::wasm32::f32_nearest(x) +} + +pub fn sqrt(x: f64) -> f64 { + core::arch::wasm32::f64_sqrt(x) +} + +pub fn sqrtf(x: f32) -> f32 { + core::arch::wasm32::f32_sqrt(x) +} + +pub fn trunc(x: f64) -> f64 { + core::arch::wasm32::f64_trunc(x) +} + +pub fn truncf(x: f32) -> f32 { + core::arch::wasm32::f32_trunc(x) +} diff --git a/library/compiler-builtins/libm/src/math/arch/x86.rs b/library/compiler-builtins/libm/src/math/arch/x86.rs new file mode 100644 index 0000000000000..454aa285074d6 --- /dev/null +++ b/library/compiler-builtins/libm/src/math/arch/x86.rs @@ -0,0 +1,32 @@ +//! Architecture-specific support for x86-32 and x86-64 with SSE2 + +mod detect; +mod fma; + +pub use fma::{fma, fmaf}; + +pub fn sqrtf(mut x: f32) -> f32 { + // SAFETY: `sqrtss` is part of `sse2`, which this module is gated behind. It has no memory + // access or side effects. + unsafe { + core::arch::asm!( + "sqrtss {x}, {x}", + x = inout(xmm_reg) x, + options(nostack, nomem, pure), + ) + }; + x +} + +pub fn sqrt(mut x: f64) -> f64 { + // SAFETY: `sqrtsd` is part of `sse2`, which this module is gated behind. It has no memory + // access or side effects. + unsafe { + core::arch::asm!( + "sqrtsd {x}, {x}", + x = inout(xmm_reg) x, + options(nostack, nomem, pure), + ) + }; + x +} diff --git a/library/compiler-builtins/libm/src/math/arch/x86/detect.rs b/library/compiler-builtins/libm/src/math/arch/x86/detect.rs new file mode 100644 index 0000000000000..e6d9b040bfaff --- /dev/null +++ b/library/compiler-builtins/libm/src/math/arch/x86/detect.rs @@ -0,0 +1,232 @@ +// Using runtime feature detection requires atomics. Currently there are no x86 targets +// that support sse but not `AtomicPtr`. + +#[cfg(target_arch = "x86")] +use core::arch::x86::{__cpuid, __cpuid_count, _xgetbv, CpuidResult}; +#[cfg(target_arch = "x86_64")] +use core::arch::x86_64::{__cpuid, __cpuid_count, _xgetbv, CpuidResult}; + +use crate::support::feature_detect::{Flags, get_or_init_flags_cache, unique_masks}; + +/// CPU features that get cached (doesn't correlate to anything on the CPU). +pub mod cpu_flags { + use super::unique_masks; + + unique_masks! { + u32, + SSE3, + F16C, + SSE, + SSE2, + ERMSB, + MOVRS, + FMA, + FMA4, + AVX512FP16, + AVX512BF16, + } +} + +/// Get CPU features, loading from a cache if available. +pub fn get_cpu_features() -> Flags { + use core::sync::atomic::AtomicU32; + static CACHE: AtomicU32 = AtomicU32::new(0); + get_or_init_flags_cache(&CACHE, load_x86_features) +} + +/// Read from cpuid and translate to a `Flags` instance, using `cpu_flags`. +/// +/// Implementation is taken from [std-detect][std-detect]. +/// +/// [std-detect]: https://github.com/rust-lang/stdarch/blob/690b3a6334d482874163bd6fcef408e0518febe9/crates/std_detect/src/detect/os/x86.rs#L142 +fn load_x86_features() -> Flags { + let mut value = Flags::empty(); + + if cfg!(target_env = "sgx") { + // doesn't support this because it is untrusted data + return Flags::empty(); + } + + // Calling `__cpuid`/`__cpuid_count` from here on is safe because the CPU + // has `cpuid` support. + + // 0. EAX = 0: Basic Information: + // - EAX returns the "Highest Function Parameter", that is, the maximum leaf + // value for subsequent calls of `cpuinfo` in range [0, 0x8000_0000]. + // - The vendor ID is stored in 12 u8 ascii chars, returned in EBX, EDX, and ECX + // (in that order) + let mut vendor_id = [0u8; 12]; + let max_basic_leaf; + unsafe { + let CpuidResult { eax, ebx, ecx, edx } = __cpuid(0); + max_basic_leaf = eax; + vendor_id[0..4].copy_from_slice(&ebx.to_ne_bytes()); + vendor_id[4..8].copy_from_slice(&edx.to_ne_bytes()); + vendor_id[8..12].copy_from_slice(&ecx.to_ne_bytes()); + } + + if max_basic_leaf < 1 { + // Earlier Intel 486, CPUID not implemented + return value; + } + + // EAX = 1, ECX = 0: Queries "Processor Info and Feature Bits"; + // Contains information about most x86 features. + let CpuidResult { ecx, edx, .. } = unsafe { __cpuid(0x0000_0001_u32) }; + let proc_info_ecx = Flags::from_bits(ecx); + let proc_info_edx = Flags::from_bits(edx); + + // EAX = 7: Queries "Extended Features"; + // Contains information about bmi,bmi2, and avx2 support. + let mut extended_features_ebx = Flags::empty(); + let mut extended_features_edx = Flags::empty(); + let mut extended_features_eax_leaf_1 = Flags::empty(); + if max_basic_leaf >= 7 { + let CpuidResult { ebx, edx, .. } = unsafe { __cpuid(0x0000_0007_u32) }; + extended_features_ebx = Flags::from_bits(ebx); + extended_features_edx = Flags::from_bits(edx); + + let CpuidResult { eax, .. } = unsafe { __cpuid_count(0x0000_0007_u32, 0x0000_0001_u32) }; + extended_features_eax_leaf_1 = Flags::from_bits(eax) + } + + // EAX = 0x8000_0000, ECX = 0: Get Highest Extended Function Supported + // - EAX returns the max leaf value for extended information, that is, + // `cpuid` calls in range [0x8000_0000; u32::MAX]: + let extended_max_basic_leaf = unsafe { __cpuid(0x8000_0000_u32) }.eax; + + // EAX = 0x8000_0001, ECX=0: Queries "Extended Processor Info and Feature Bits" + let mut extended_proc_info_ecx = Flags::empty(); + if extended_max_basic_leaf >= 1 { + let CpuidResult { ecx, .. } = unsafe { __cpuid(0x8000_0001_u32) }; + extended_proc_info_ecx = Flags::from_bits(ecx); + } + + let mut enable = |regflags: Flags, regbit, flag| { + if regflags.test_nth(regbit) { + value.insert(flag); + } + }; + + enable(proc_info_ecx, 0, cpu_flags::SSE3); + enable(proc_info_ecx, 29, cpu_flags::F16C); + enable(proc_info_edx, 25, cpu_flags::SSE); + enable(proc_info_edx, 26, cpu_flags::SSE2); + enable(extended_features_ebx, 9, cpu_flags::ERMSB); + enable(extended_features_eax_leaf_1, 31, cpu_flags::MOVRS); + + // `XSAVE` and `AVX` support: + let cpu_xsave = proc_info_ecx.test_nth(26); + if cpu_xsave { + // 0. Here the CPU supports `XSAVE`. + + // 1. Detect `OSXSAVE`, that is, whether the OS is AVX enabled and + // supports saving the state of the AVX/AVX2 vector registers on + // context-switches, see: + // + // - [intel: is avx enabled?][is_avx_enabled], + // - [mozilla: sse.cpp][mozilla_sse_cpp]. + // + // [is_avx_enabled]: https://software.intel.com/en-us/blogs/2011/04/14/is-avx-enabled + // [mozilla_sse_cpp]: https://hg.mozilla.org/mozilla-central/file/64bab5cbb9b6/mozglue/build/SSE.cpp#l190 + let cpu_osxsave = proc_info_ecx.test_nth(27); + + if cpu_osxsave { + // 2. The OS must have signaled the CPU that it supports saving and + // restoring the: + // + // * SSE -> `XCR0.SSE[1]` + // * AVX -> `XCR0.AVX[2]` + // * AVX-512 -> `XCR0.AVX-512[7:5]`. + // * AMX -> `XCR0.AMX[18:17]` + // + // by setting the corresponding bits of `XCR0` to `1`. + // + // This is safe because the CPU supports `xsave` and the OS has set `osxsave`. + let xcr0 = unsafe { _xgetbv(0) }; + // Test `XCR0.SSE[1]` and `XCR0.AVX[2]` with the mask `0b110 == 6`: + let os_avx_support = xcr0 & 6 == 6; + // Test `XCR0.AVX-512[7:5]` with the mask `0b1110_0000 == 0xe0`: + let os_avx512_support = xcr0 & 0xe0 == 0xe0; + + // Only if the OS and the CPU support saving/restoring the AVX + // registers we enable `xsave` support: + if os_avx_support { + // See "13.3 ENABLING THE XSAVE FEATURE SET AND XSAVE-ENABLED + // FEATURES" in the "Intel® 64 and IA-32 Architectures Software + // Developer’s Manual, Volume 1: Basic Architecture": + // + // "Software enables the XSAVE feature set by setting + // CR4.OSXSAVE[bit 18] to 1 (e.g., with the MOV to CR4 + // instruction). If this bit is 0, execution of any of XGETBV, + // XRSTOR, XRSTORS, XSAVE, XSAVEC, XSAVEOPT, XSAVES, and XSETBV + // causes an invalid-opcode exception (#UD)" + + // FMA (uses 256-bit wide registers): + enable(proc_info_ecx, 12, cpu_flags::FMA); + + // For AVX-512 the OS also needs to support saving/restoring + // the extended state, only then we enable AVX-512 support: + if os_avx512_support { + enable(extended_features_edx, 23, cpu_flags::AVX512FP16); + enable(extended_features_eax_leaf_1, 5, cpu_flags::AVX512BF16); + } + } + } + } + + // As Hygon Dhyana originates from AMD technology and shares most of the architecture with + // AMD's family 17h, but with different CPU Vendor ID("HygonGenuine")/Family series number + // (Family 18h). + // + // For CPUID feature bits, Hygon Dhyana(family 18h) share the same definition with AMD + // family 17h. + // + // Related AMD CPUID specification is https://www.amd.com/system/files/TechDocs/25481.pdf + // (AMD64 Architecture Programmer's Manual, Appendix E). + // Related Hygon kernel patch can be found on + // http://lkml.kernel.org/r/5ce86123a7b9dad925ac583d88d2f921040e859b.1538583282.git.puwen@hygon.cn + if vendor_id == *b"AuthenticAMD" || vendor_id == *b"HygonGenuine" { + // These features are available on AMD arch CPUs: + enable(extended_proc_info_ecx, 16, cpu_flags::FMA4); + } + + value +} + +#[cfg(test)] +mod tests { + extern crate std; + use std::is_x86_feature_detected; + + use super::*; + + #[test] + fn check_matches_std() { + let features = get_cpu_features(); + for i in 0..cpu_flags::ALL.len() { + let flag = cpu_flags::ALL[i]; + let name = cpu_flags::NAMES[i]; + + let std_detected = match flag { + cpu_flags::SSE3 => is_x86_feature_detected!("sse3"), + cpu_flags::F16C => is_x86_feature_detected!("f16c"), + cpu_flags::SSE => is_x86_feature_detected!("sse"), + cpu_flags::SSE2 => is_x86_feature_detected!("sse2"), + cpu_flags::ERMSB => is_x86_feature_detected!("ermsb"), + cpu_flags::MOVRS => continue, // only very recent support in std + cpu_flags::FMA => is_x86_feature_detected!("fma"), + cpu_flags::FMA4 => continue, // not yet supported in std + cpu_flags::AVX512FP16 => is_x86_feature_detected!("avx512fp16"), + cpu_flags::AVX512BF16 => is_x86_feature_detected!("avx512bf16"), + _ => panic!("untested CPU flag {name}"), + }; + + assert_eq!( + std_detected, + features.contains(flag), + "different flag {name}. flags: {features:?}" + ); + } + } +} diff --git a/library/compiler-builtins/libm/src/math/arch/x86/fma.rs b/library/compiler-builtins/libm/src/math/arch/x86/fma.rs new file mode 100644 index 0000000000000..43ac187792d84 --- /dev/null +++ b/library/compiler-builtins/libm/src/math/arch/x86/fma.rs @@ -0,0 +1,135 @@ +//! Use assembly fma if the `fma` or `fma4` feature is detected at runtime. + +use core::arch::asm; + +use super::super::super::generic; +use super::detect::{cpu_flags, get_cpu_features}; +use crate::support::Round; +use crate::support::feature_detect::select_once; + +pub fn fma(x: f64, y: f64, z: f64) -> f64 { + select_once! { + sig: fn(x: f64, y: f64, z: f64) -> f64, + init: || { + let features = get_cpu_features(); + if features.contains(cpu_flags::FMA) { + fma_with_fma + } else if features.contains(cpu_flags::FMA4) { + fma_with_fma4 + } else { + fma_fallback as Func + } + }, + // SAFETY: `fn_ptr` is the result of `init`, preconditions have been checked. + call: |fn_ptr: Func| unsafe { fn_ptr(x, y, z) }, + } +} + +pub fn fmaf(x: f32, y: f32, z: f32) -> f32 { + select_once! { + sig: fn(x: f32, y: f32, z: f32) -> f32, + init: || { + let features = get_cpu_features(); + if features.contains(cpu_flags::FMA) { + fmaf_with_fma + } else if features.contains(cpu_flags::FMA4) { + fmaf_with_fma4 + } else { + fmaf_fallback as Func + } + }, + // SAFETY: `fn_ptr` is the result of `init`, preconditions have been checked. + call: |fn_ptr: Func| unsafe { fn_ptr(x, y, z) }, + } +} + +/// # Safety +/// +/// Must have +fma available. +unsafe fn fma_with_fma(mut x: f64, y: f64, z: f64) -> f64 { + debug_assert!(get_cpu_features().contains(cpu_flags::FMA)); + + // SAFETY: fma is asserted available by precondition, which provides the instruction. No + // memory access or side effects. + unsafe { + asm!( + "vfmadd213sd {x}, {y}, {z}", + x = inout(xmm_reg) x, + y = in(xmm_reg) y, + z = in(xmm_reg) z, + options(nostack, nomem, pure), + ); + } + x +} + +/// # Safety +/// +/// Must have +fma available. +unsafe fn fmaf_with_fma(mut x: f32, y: f32, z: f32) -> f32 { + debug_assert!(get_cpu_features().contains(cpu_flags::FMA)); + + // SAFETY: fma is asserted available by precondition, which provides the instruction. No + // memory access or side effects. + unsafe { + asm!( + "vfmadd213ss {x}, {y}, {z}", + x = inout(xmm_reg) x, + y = in(xmm_reg) y, + z = in(xmm_reg) z, + options(nostack, nomem, pure), + ); + } + x +} + +/// # Safety +/// +/// Must have +fma4 available. +unsafe fn fma_with_fma4(mut x: f64, y: f64, z: f64) -> f64 { + debug_assert!(get_cpu_features().contains(cpu_flags::FMA4)); + + // SAFETY: fma4 is asserted available by precondition, which provides the instruction. No + // memory access or side effects. + unsafe { + asm!( + "vfmaddsd {x}, {x}, {y}, {z}", + x = inout(xmm_reg) x, + y = in(xmm_reg) y, + z = in(xmm_reg) z, + options(nostack, nomem, pure), + ); + } + x +} + +/// # Safety +/// +/// Must have +fma4 available. +unsafe fn fmaf_with_fma4(mut x: f32, y: f32, z: f32) -> f32 { + debug_assert!(get_cpu_features().contains(cpu_flags::FMA4)); + + // SAFETY: fma4 is asserted available by precondition, which provides the instruction. No + // memory access or side effects. + unsafe { + asm!( + "vfmaddss {x}, {x}, {y}, {z}", + x = inout(xmm_reg) x, + y = in(xmm_reg) y, + z = in(xmm_reg) z, + options(nostack, nomem, pure), + ); + } + x +} + +// FIXME: the `select_implementation` macro should handle arch implementations that want +// to use the fallback, so we don't need to recreate the body. + +fn fma_fallback(x: f64, y: f64, z: f64) -> f64 { + generic::fma_round(x, y, z, Round::Nearest).val +} + +fn fmaf_fallback(x: f32, y: f32, z: f32) -> f32 { + generic::fma_wide_round(x, y, z, Round::Nearest).val +} diff --git a/library/compiler-builtins/libm/src/math/asin.rs b/library/compiler-builtins/libm/src/math/asin.rs new file mode 100644 index 0000000000000..12d0cd35fa58a --- /dev/null +++ b/library/compiler-builtins/libm/src/math/asin.rs @@ -0,0 +1,115 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/e_asin.c */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunSoft, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ +/* asin(x) + * Method : + * Since asin(x) = x + x^3/6 + x^5*3/40 + x^7*15/336 + ... + * we approximate asin(x) on [0,0.5] by + * asin(x) = x + x*x^2*R(x^2) + * where + * R(x^2) is a rational approximation of (asin(x)-x)/x^3 + * and its remez error is bounded by + * |(asin(x)-x)/x^3 - R(x^2)| < 2^(-58.75) + * + * For x in [0.5,1] + * asin(x) = pi/2-2*asin(sqrt((1-x)/2)) + * Let y = (1-x), z = y/2, s := sqrt(z), and pio2_hi+pio2_lo=pi/2; + * then for x>0.98 + * asin(x) = pi/2 - 2*(s+s*z*R(z)) + * = pio2_hi - (2*(s+s*z*R(z)) - pio2_lo) + * For x<=0.98, let pio4_hi = pio2_hi/2, then + * f = hi part of s; + * c = sqrt(z) - f = (z-f*f)/(s+f) ...f+c=sqrt(z) + * and + * asin(x) = pi/2 - 2*(s+s*z*R(z)) + * = pio4_hi+(pio4-2s)-(2s*z*R(z)-pio2_lo) + * = pio4_hi+(pio4-2f)-(2s*z*R(z)-(pio2_lo+2c)) + * + * Special cases: + * if x is NaN, return x itself; + * if |x|>1, return NaN with invalid signal. + * + */ + +use super::{fabs, get_high_word, get_low_word, sqrt, with_set_low_word}; + +const PIO2_HI: f64 = 1.57079632679489655800e+00; /* 0x3FF921FB, 0x54442D18 */ +const PIO2_LO: f64 = 6.12323399573676603587e-17; /* 0x3C91A626, 0x33145C07 */ +/* coefficients for R(x^2) */ +const P_S0: f64 = 1.66666666666666657415e-01; /* 0x3FC55555, 0x55555555 */ +const P_S1: f64 = -3.25565818622400915405e-01; /* 0xBFD4D612, 0x03EB6F7D */ +const P_S2: f64 = 2.01212532134862925881e-01; /* 0x3FC9C155, 0x0E884455 */ +const P_S3: f64 = -4.00555345006794114027e-02; /* 0xBFA48228, 0xB5688F3B */ +const P_S4: f64 = 7.91534994289814532176e-04; /* 0x3F49EFE0, 0x7501B288 */ +const P_S5: f64 = 3.47933107596021167570e-05; /* 0x3F023DE1, 0x0DFDF709 */ +const Q_S1: f64 = -2.40339491173441421878e+00; /* 0xC0033A27, 0x1C8A2D4B */ +const Q_S2: f64 = 2.02094576023350569471e+00; /* 0x40002AE5, 0x9C598AC8 */ +const Q_S3: f64 = -6.88283971605453293030e-01; /* 0xBFE6066C, 0x1B8D0159 */ +const Q_S4: f64 = 7.70381505559019352791e-02; /* 0x3FB3B8C5, 0xB12E9282 */ + +fn comp_r(z: f64) -> f64 { + let p = z * (P_S0 + z * (P_S1 + z * (P_S2 + z * (P_S3 + z * (P_S4 + z * P_S5))))); + let q = 1.0 + z * (Q_S1 + z * (Q_S2 + z * (Q_S3 + z * Q_S4))); + p / q +} + +/// Arcsine (f64) +/// +/// Computes the inverse sine (arc sine) of the argument `x`. +/// Arguments to asin must be in the range -1 to 1. +/// Returns values in radians, in the range of -pi/2 to pi/2. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn asin(mut x: f64) -> f64 { + let z: f64; + let r: f64; + let s: f64; + let hx: u32; + let ix: u32; + + hx = get_high_word(x); + ix = hx & 0x7fffffff; + /* |x| >= 1 or nan */ + if ix >= 0x3ff00000 { + let lx: u32; + lx = get_low_word(x); + if ((ix - 0x3ff00000) | lx) == 0 { + /* asin(1) = +-pi/2 with inexact */ + return x * PIO2_HI + f64::from_bits(0x3870000000000000); + } else { + return 0.0 / (x - x); + } + } + /* |x| < 0.5 */ + if ix < 0x3fe00000 { + /* if 0x1p-1022 <= |x| < 0x1p-26, avoid raising underflow */ + if (0x00100000..0x3e500000).contains(&ix) { + return x; + } else { + return x + x * comp_r(x * x); + } + } + /* 1 > |x| >= 0.5 */ + z = (1.0 - fabs(x)) * 0.5; + s = sqrt(z); + r = comp_r(z); + if ix >= 0x3fef3333 { + /* if |x| > 0.975 */ + x = PIO2_HI - (2. * (s + s * r) - PIO2_LO); + } else { + let f: f64; + let c: f64; + /* f+c = sqrt(z) */ + f = with_set_low_word(s, 0); + c = (z - f * f) / (s + f); + x = 0.5 * PIO2_HI - (2.0 * s * r - (PIO2_LO - 2.0 * c) - (0.5 * PIO2_HI - 2.0 * f)); + } + if hx >> 31 != 0 { -x } else { x } +} diff --git a/library/compiler-builtins/libm/src/math/asinf.rs b/library/compiler-builtins/libm/src/math/asinf.rs new file mode 100644 index 0000000000000..ed685556730e5 --- /dev/null +++ b/library/compiler-builtins/libm/src/math/asinf.rs @@ -0,0 +1,68 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/e_asinf.c */ +/* + * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. + */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ + +use super::sqrt::sqrt; +use super::support::Float; + +const PIO2: f64 = 1.570796326794896558e+00; + +/* coefficients for R(x^2) */ +const P_S0: f32 = 1.6666586697e-01; +const P_S1: f32 = -4.2743422091e-02; +const P_S2: f32 = -8.6563630030e-03; +const Q_S1: f32 = -7.0662963390e-01; + +fn r(z: f32) -> f32 { + let p = z * (P_S0 + z * (P_S1 + z * P_S2)); + let q = 1. + z * Q_S1; + p / q +} + +/// Arcsine (f32) +/// +/// Computes the inverse sine (arc sine) of the argument `x`. +/// Arguments to asin must be in the range -1 to 1. +/// Returns values in radians, in the range of -pi/2 to pi/2. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn asinf(mut x: f32) -> f32 { + let x1p_120 = f64::from_bits(0x3870000000000000); // 0x1p-120 === 2 ^ (-120) + + let hx = x.to_bits(); + let ix = hx & 0x7fffffff; + + if ix >= 0x3f800000 { + /* |x| >= 1 */ + if ix == 0x3f800000 { + /* |x| == 1 */ + return ((x as f64) * PIO2 + x1p_120) as f32; /* asin(+-1) = +-pi/2 with inexact */ + } + return 0. / (x - x); /* asin(|x|>1) is NaN */ + } + + if ix < 0x3f000000 { + /* |x| < 0.5 */ + /* if 0x1p-126 <= |x| < 0x1p-12, avoid raising underflow */ + if (0x00800000..0x39800000).contains(&ix) { + return x; + } + return x + x * r(x * x); + } + + /* 1 > |x| >= 0.5 */ + let z = (1. - Float::abs(x)) * 0.5; + let s = sqrt(z as f64); + x = (PIO2 - 2. * (s + s * (r(z) as f64))) as f32; + if (hx >> 31) != 0 { -x } else { x } +} diff --git a/library/compiler-builtins/libm/src/math/asinh.rs b/library/compiler-builtins/libm/src/math/asinh.rs new file mode 100644 index 0000000000000..75d3c3ad462ac --- /dev/null +++ b/library/compiler-builtins/libm/src/math/asinh.rs @@ -0,0 +1,36 @@ +use super::{log, log1p, sqrt}; + +const LN2: f64 = 0.693147180559945309417232121458176568; /* 0x3fe62e42, 0xfefa39ef*/ + +/* asinh(x) = sign(x)*log(|x|+sqrt(x*x+1)) ~= x - x^3/6 + o(x^5) */ +/// Inverse hyperbolic sine (f64) +/// +/// Calculates the inverse hyperbolic sine of `x`. +/// Is defined as `sgn(x)*log(|x|+sqrt(x*x+1))`. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn asinh(mut x: f64) -> f64 { + let mut u = x.to_bits(); + let e = ((u >> 52) as usize) & 0x7ff; + let sign = (u >> 63) != 0; + + /* |x| */ + u &= (!0) >> 1; + x = f64::from_bits(u); + + if e >= 0x3ff + 26 { + /* |x| >= 0x1p26 or inf or nan */ + x = log(x) + LN2; + } else if e >= 0x3ff + 1 { + /* |x| >= 2 */ + x = log(2.0 * x + 1.0 / (sqrt(x * x + 1.0) + x)); + } else if e >= 0x3ff - 26 { + /* |x| >= 0x1p-26, up to 1.6ulp error in [0.125,0.5] */ + x = log1p(x + x * x / (sqrt(x * x + 1.0) + 1.0)); + } else { + /* |x| < 0x1p-26, raise inexact if x != 0 */ + let x1p120 = f64::from_bits(0x4770000000000000); + force_eval!(x + x1p120); + } + + if sign { -x } else { x } +} diff --git a/library/compiler-builtins/libm/src/math/asinhf.rs b/library/compiler-builtins/libm/src/math/asinhf.rs new file mode 100644 index 0000000000000..27ed9dd372daa --- /dev/null +++ b/library/compiler-builtins/libm/src/math/asinhf.rs @@ -0,0 +1,35 @@ +use super::{log1pf, logf, sqrtf}; + +const LN2: f32 = 0.693147180559945309417232121458176568; + +/* asinh(x) = sign(x)*log(|x|+sqrt(x*x+1)) ~= x - x^3/6 + o(x^5) */ +/// Inverse hyperbolic sine (f32) +/// +/// Calculates the inverse hyperbolic sine of `x`. +/// Is defined as `sgn(x)*log(|x|+sqrt(x*x+1))`. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn asinhf(mut x: f32) -> f32 { + let u = x.to_bits(); + let i = u & 0x7fffffff; + let sign = (u >> 31) != 0; + + /* |x| */ + x = f32::from_bits(i); + + if i >= 0x3f800000 + (12 << 23) { + /* |x| >= 0x1p12 or inf or nan */ + x = logf(x) + LN2; + } else if i >= 0x3f800000 + (1 << 23) { + /* |x| >= 2 */ + x = logf(2.0 * x + 1.0 / (sqrtf(x * x + 1.0) + x)); + } else if i >= 0x3f800000 - (12 << 23) { + /* |x| >= 0x1p-12, up to 1.6ulp error in [0.125,0.5] */ + x = log1pf(x + x * x / (sqrtf(x * x + 1.0) + 1.0)); + } else { + /* |x| < 0x1p-12, raise inexact if x!=0 */ + let x1p120 = f32::from_bits(0x7b800000); + force_eval!(x + x1p120); + } + + if sign { -x } else { x } +} diff --git a/library/compiler-builtins/libm/src/math/atan.rs b/library/compiler-builtins/libm/src/math/atan.rs new file mode 100644 index 0000000000000..4ca5cc91a1e48 --- /dev/null +++ b/library/compiler-builtins/libm/src/math/atan.rs @@ -0,0 +1,182 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/s_atan.c */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ +/* atan(x) + * Method + * 1. Reduce x to positive by atan(x) = -atan(-x). + * 2. According to the integer k=4t+0.25 chopped, t=x, the argument + * is further reduced to one of the following intervals and the + * arctangent of t is evaluated by the corresponding formula: + * + * [0,7/16] atan(x) = t-t^3*(a1+t^2*(a2+...(a10+t^2*a11)...) + * [7/16,11/16] atan(x) = atan(1/2) + atan( (t-0.5)/(1+t/2) ) + * [11/16.19/16] atan(x) = atan( 1 ) + atan( (t-1)/(1+t) ) + * [19/16,39/16] atan(x) = atan(3/2) + atan( (t-1.5)/(1+1.5t) ) + * [39/16,INF] atan(x) = atan(INF) + atan( -1/t ) + * + * Constants: + * The hexadecimal values are the intended ones for the following + * constants. The decimal values may be used, provided that the + * compiler will convert from decimal to binary accurately enough + * to produce the hexadecimal values shown. + */ + +use core::f64; + +use super::fabs; + +const ATANHI: [f64; 4] = [ + 4.63647609000806093515e-01, /* atan(0.5)hi 0x3FDDAC67, 0x0561BB4F */ + 7.85398163397448278999e-01, /* atan(1.0)hi 0x3FE921FB, 0x54442D18 */ + 9.82793723247329054082e-01, /* atan(1.5)hi 0x3FEF730B, 0xD281F69B */ + 1.57079632679489655800e+00, /* atan(inf)hi 0x3FF921FB, 0x54442D18 */ +]; + +const ATANLO: [f64; 4] = [ + 2.26987774529616870924e-17, /* atan(0.5)lo 0x3C7A2B7F, 0x222F65E2 */ + 3.06161699786838301793e-17, /* atan(1.0)lo 0x3C81A626, 0x33145C07 */ + 1.39033110312309984516e-17, /* atan(1.5)lo 0x3C700788, 0x7AF0CBBD */ + 6.12323399573676603587e-17, /* atan(inf)lo 0x3C91A626, 0x33145C07 */ +]; + +const AT: [f64; 11] = [ + 3.33333333333329318027e-01, /* 0x3FD55555, 0x5555550D */ + -1.99999999998764832476e-01, /* 0xBFC99999, 0x9998EBC4 */ + 1.42857142725034663711e-01, /* 0x3FC24924, 0x920083FF */ + -1.11111104054623557880e-01, /* 0xBFBC71C6, 0xFE231671 */ + 9.09088713343650656196e-02, /* 0x3FB745CD, 0xC54C206E */ + -7.69187620504482999495e-02, /* 0xBFB3B0F2, 0xAF749A6D */ + 6.66107313738753120669e-02, /* 0x3FB10D66, 0xA0D03D51 */ + -5.83357013379057348645e-02, /* 0xBFADDE2D, 0x52DEFD9A */ + 4.97687799461593236017e-02, /* 0x3FA97B4B, 0x24760DEB */ + -3.65315727442169155270e-02, /* 0xBFA2B444, 0x2C6A6C2F */ + 1.62858201153657823623e-02, /* 0x3F90AD3A, 0xE322DA11 */ +]; + +/// Arctangent (f64) +/// +/// Computes the inverse tangent (arc tangent) of the input value. +/// Returns a value in radians, in the range of -pi/2 to pi/2. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn atan(x: f64) -> f64 { + let mut x = x; + let mut ix = (x.to_bits() >> 32) as u32; + let sign = ix >> 31; + ix &= 0x7fff_ffff; + if ix >= 0x4410_0000 { + if x.is_nan() { + return x; + } + + let z = ATANHI[3] + f64::from_bits(0x0380_0000); // 0x1p-120f + return if sign != 0 { -z } else { z }; + } + + let id = if ix < 0x3fdc_0000 { + /* |x| < 0.4375 */ + if ix < 0x3e40_0000 { + /* |x| < 2^-27 */ + if ix < 0x0010_0000 { + /* raise underflow for subnormal x */ + force_eval!(x as f32); + } + + return x; + } + + -1 + } else { + x = fabs(x); + if ix < 0x3ff30000 { + /* |x| < 1.1875 */ + if ix < 0x3fe60000 { + /* 7/16 <= |x| < 11/16 */ + x = (2. * x - 1.) / (2. + x); + 0 + } else { + /* 11/16 <= |x| < 19/16 */ + x = (x - 1.) / (x + 1.); + 1 + } + } else if ix < 0x40038000 { + /* |x| < 2.4375 */ + x = (x - 1.5) / (1. + 1.5 * x); + 2 + } else { + /* 2.4375 <= |x| < 2^66 */ + x = -1. / x; + 3 + } + }; + + let z = x * x; + let w = z * z; + /* break sum from i=0 to 10 AT[i]z**(i+1) into odd and even poly */ + let s1 = z * (AT[0] + w * (AT[2] + w * (AT[4] + w * (AT[6] + w * (AT[8] + w * AT[10]))))); + let s2 = w * (AT[1] + w * (AT[3] + w * (AT[5] + w * (AT[7] + w * AT[9])))); + + if id < 0 { + return x - x * (s1 + s2); + } + + let z = i!(ATANHI, id as usize) - (x * (s1 + s2) - i!(ATANLO, id as usize) - x); + + if sign != 0 { -z } else { z } +} + +#[cfg(test)] +mod tests { + use core::f64; + + use super::atan; + + #[test] + fn sanity_check() { + for (input, answer) in [ + (3.0_f64.sqrt() / 3.0, f64::consts::FRAC_PI_6), + (1.0, f64::consts::FRAC_PI_4), + (3.0_f64.sqrt(), f64::consts::FRAC_PI_3), + (-3.0_f64.sqrt() / 3.0, -f64::consts::FRAC_PI_6), + (-1.0, -f64::consts::FRAC_PI_4), + (-3.0_f64.sqrt(), -f64::consts::FRAC_PI_3), + ] + .iter() + { + assert!( + (atan(*input) - answer) / answer < 1e-5, + "\natan({:.4}/16) = {:.4}, actual: {}", + input * 16.0, + answer, + atan(*input) + ); + } + } + + #[test] + fn zero() { + assert_eq!(atan(0.0), 0.0); + } + + #[test] + fn infinity() { + assert_eq!(atan(f64::INFINITY), f64::consts::FRAC_PI_2); + } + + #[test] + fn minus_infinity() { + assert_eq!(atan(f64::NEG_INFINITY), -f64::consts::FRAC_PI_2); + } + + #[test] + fn nan() { + assert!(atan(f64::NAN).is_nan()); + } +} diff --git a/library/compiler-builtins/libm/src/math/atan2.rs b/library/compiler-builtins/libm/src/math/atan2.rs new file mode 100644 index 0000000000000..c668731cf3760 --- /dev/null +++ b/library/compiler-builtins/libm/src/math/atan2.rs @@ -0,0 +1,131 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/e_atan2.c */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunSoft, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + * + */ +/* atan2(y,x) + * Method : + * 1. Reduce y to positive by atan2(y,x)=-atan2(-y,x). + * 2. Reduce x to positive by (if x and y are unexceptional): + * ARG (x+iy) = arctan(y/x) ... if x > 0, + * ARG (x+iy) = pi - arctan[y/(-x)] ... if x < 0, + * + * Special cases: + * + * ATAN2((anything), NaN ) is NaN; + * ATAN2(NAN , (anything) ) is NaN; + * ATAN2(+-0, +(anything but NaN)) is +-0 ; + * ATAN2(+-0, -(anything but NaN)) is +-pi ; + * ATAN2(+-(anything but 0 and NaN), 0) is +-pi/2; + * ATAN2(+-(anything but INF and NaN), +INF) is +-0 ; + * ATAN2(+-(anything but INF and NaN), -INF) is +-pi; + * ATAN2(+-INF,+INF ) is +-pi/4 ; + * ATAN2(+-INF,-INF ) is +-3pi/4; + * ATAN2(+-INF, (anything but,0,NaN, and INF)) is +-pi/2; + * + * Constants: + * The hexadecimal values are the intended ones for the following + * constants. The decimal values may be used, provided that the + * compiler will convert from decimal to binary accurately enough + * to produce the hexadecimal values shown. + */ + +use super::{atan, fabs}; + +const PI: f64 = 3.1415926535897931160E+00; /* 0x400921FB, 0x54442D18 */ +const PI_LO: f64 = 1.2246467991473531772E-16; /* 0x3CA1A626, 0x33145C07 */ + +/// Arctangent of y/x (f64) +/// +/// Computes the inverse tangent (arc tangent) of `y/x`. +/// Produces the correct result even for angles near pi/2 or -pi/2 (that is, when `x` is near 0). +/// Returns a value in radians, in the range of -pi to pi. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn atan2(y: f64, x: f64) -> f64 { + if x.is_nan() || y.is_nan() { + return x + y; + } + let mut ix = (x.to_bits() >> 32) as u32; + let lx = x.to_bits() as u32; + let mut iy = (y.to_bits() >> 32) as u32; + let ly = y.to_bits() as u32; + if ((ix.wrapping_sub(0x3ff00000)) | lx) == 0 { + /* x = 1.0 */ + return atan(y); + } + let m = ((iy >> 31) & 1) | ((ix >> 30) & 2); /* 2*sign(x)+sign(y) */ + ix &= 0x7fffffff; + iy &= 0x7fffffff; + + /* when y = 0 */ + if (iy | ly) == 0 { + return match m { + 0 | 1 => y, /* atan(+-0,+anything)=+-0 */ + 2 => PI, /* atan(+0,-anything) = PI */ + _ => -PI, /* atan(-0,-anything) =-PI */ + }; + } + /* when x = 0 */ + if (ix | lx) == 0 { + return if m & 1 != 0 { -PI / 2.0 } else { PI / 2.0 }; + } + /* when x is INF */ + if ix == 0x7ff00000 { + if iy == 0x7ff00000 { + return match m { + 0 => PI / 4.0, /* atan(+INF,+INF) */ + 1 => -PI / 4.0, /* atan(-INF,+INF) */ + 2 => 3.0 * PI / 4.0, /* atan(+INF,-INF) */ + _ => -3.0 * PI / 4.0, /* atan(-INF,-INF) */ + }; + } else { + return match m { + 0 => 0.0, /* atan(+...,+INF) */ + 1 => -0.0, /* atan(-...,+INF) */ + 2 => PI, /* atan(+...,-INF) */ + _ => -PI, /* atan(-...,-INF) */ + }; + } + } + /* |y/x| > 0x1p64 */ + if ix.wrapping_add(64 << 20) < iy || iy == 0x7ff00000 { + return if m & 1 != 0 { -PI / 2.0 } else { PI / 2.0 }; + } + + /* z = atan(|y/x|) without spurious underflow */ + let z = if (m & 2 != 0) && iy.wrapping_add(64 << 20) < ix { + /* |y/x| < 0x1p-64, x<0 */ + 0.0 + } else { + atan(fabs(y / x)) + }; + match m { + 0 => z, /* atan(+,+) */ + 1 => -z, /* atan(-,+) */ + 2 => PI - (z - PI_LO), /* atan(+,-) */ + _ => (z - PI_LO) - PI, /* atan(-,-) */ + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + #[cfg_attr(x86_no_sse, ignore = "FIXME(i586): possible incorrect rounding")] + fn sanity_check() { + assert_eq!(atan2(0.0, 1.0), 0.0); + assert_eq!(atan2(0.0, -1.0), PI); + assert_eq!(atan2(-0.0, -1.0), -PI); + assert_eq!(atan2(3.0, 2.0), atan(3.0 / 2.0)); + assert_eq!(atan2(2.0, -1.0), atan(2.0 / -1.0) + PI); + assert_eq!(atan2(-2.0, -1.0), atan(-2.0 / -1.0) - PI); + } +} diff --git a/library/compiler-builtins/libm/src/math/atan2f.rs b/library/compiler-builtins/libm/src/math/atan2f.rs new file mode 100644 index 0000000000000..95b466fff4e42 --- /dev/null +++ b/library/compiler-builtins/libm/src/math/atan2f.rs @@ -0,0 +1,90 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/e_atan2f.c */ +/* + * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. + */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ + +use super::{atanf, fabsf}; + +const PI: f32 = 3.1415927410e+00; /* 0x40490fdb */ +const PI_LO: f32 = -8.7422776573e-08; /* 0xb3bbbd2e */ + +/// Arctangent of y/x (f32) +/// +/// Computes the inverse tangent (arc tangent) of `y/x`. +/// Produces the correct result even for angles near pi/2 or -pi/2 (that is, when `x` is near 0). +/// Returns a value in radians, in the range of -pi to pi. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn atan2f(y: f32, x: f32) -> f32 { + if x.is_nan() || y.is_nan() { + return x + y; + } + let mut ix = x.to_bits(); + let mut iy = y.to_bits(); + + if ix == 0x3f800000 { + /* x=1.0 */ + return atanf(y); + } + let m = ((iy >> 31) & 1) | ((ix >> 30) & 2); /* 2*sign(x)+sign(y) */ + ix &= 0x7fffffff; + iy &= 0x7fffffff; + + /* when y = 0 */ + if iy == 0 { + return match m { + 0 | 1 => y, /* atan(+-0,+anything)=+-0 */ + 2 => PI, /* atan(+0,-anything) = pi */ + _ => -PI, /* atan(-0,-anything) =-pi */ + }; + } + /* when x = 0 */ + if ix == 0 { + return if m & 1 != 0 { -PI / 2. } else { PI / 2. }; + } + /* when x is INF */ + if ix == 0x7f800000 { + return if iy == 0x7f800000 { + match m { + 0 => PI / 4., /* atan(+INF,+INF) */ + 1 => -PI / 4., /* atan(-INF,+INF) */ + 2 => 3. * PI / 4., /* atan(+INF,-INF)*/ + _ => -3. * PI / 4., /* atan(-INF,-INF)*/ + } + } else { + match m { + 0 => 0., /* atan(+...,+INF) */ + 1 => -0., /* atan(-...,+INF) */ + 2 => PI, /* atan(+...,-INF) */ + _ => -PI, /* atan(-...,-INF) */ + } + }; + } + /* |y/x| > 0x1p26 */ + if (ix + (26 << 23) < iy) || (iy == 0x7f800000) { + return if m & 1 != 0 { -PI / 2. } else { PI / 2. }; + } + + /* z = atan(|y/x|) with correct underflow */ + let z = if (m & 2 != 0) && (iy + (26 << 23) < ix) { + /*|y/x| < 0x1p-26, x < 0 */ + 0. + } else { + atanf(fabsf(y / x)) + }; + match m { + 0 => z, /* atan(+,+) */ + 1 => -z, /* atan(-,+) */ + 2 => PI - (z - PI_LO), /* atan(+,-) */ + _ => (z - PI_LO) - PI, /* case 3 */ /* atan(-,-) */ + } +} diff --git a/library/compiler-builtins/libm/src/math/atanf.rs b/library/compiler-builtins/libm/src/math/atanf.rs new file mode 100644 index 0000000000000..da8daa41a0106 --- /dev/null +++ b/library/compiler-builtins/libm/src/math/atanf.rs @@ -0,0 +1,108 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/s_atanf.c */ +/* + * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. + */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ + +use super::fabsf; + +const ATAN_HI: [f32; 4] = [ + 4.6364760399e-01, /* atan(0.5)hi 0x3eed6338 */ + 7.8539812565e-01, /* atan(1.0)hi 0x3f490fda */ + 9.8279368877e-01, /* atan(1.5)hi 0x3f7b985e */ + 1.5707962513e+00, /* atan(inf)hi 0x3fc90fda */ +]; + +const ATAN_LO: [f32; 4] = [ + 5.0121582440e-09, /* atan(0.5)lo 0x31ac3769 */ + 3.7748947079e-08, /* atan(1.0)lo 0x33222168 */ + 3.4473217170e-08, /* atan(1.5)lo 0x33140fb4 */ + 7.5497894159e-08, /* atan(inf)lo 0x33a22168 */ +]; + +const A_T: [f32; 5] = [ + 3.3333328366e-01, + -1.9999158382e-01, + 1.4253635705e-01, + -1.0648017377e-01, + 6.1687607318e-02, +]; + +/// Arctangent (f32) +/// +/// Computes the inverse tangent (arc tangent) of the input value. +/// Returns a value in radians, in the range of -pi/2 to pi/2. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn atanf(mut x: f32) -> f32 { + let x1p_120 = f32::from_bits(0x03800000); // 0x1p-120 === 2 ^ (-120) + + let z: f32; + + let mut ix = x.to_bits(); + let sign = (ix >> 31) != 0; + ix &= 0x7fffffff; + + if ix >= 0x4c800000 { + /* if |x| >= 2**26 */ + if x.is_nan() { + return x; + } + z = i!(ATAN_HI, 3) + x1p_120; + return if sign { -z } else { z }; + } + let id = if ix < 0x3ee00000 { + /* |x| < 0.4375 */ + if ix < 0x39800000 { + /* |x| < 2**-12 */ + if ix < 0x00800000 { + /* raise underflow for subnormal x */ + force_eval!(x * x); + } + return x; + } + -1 + } else { + x = fabsf(x); + if ix < 0x3f980000 { + /* |x| < 1.1875 */ + if ix < 0x3f300000 { + /* 7/16 <= |x| < 11/16 */ + x = (2. * x - 1.) / (2. + x); + 0 + } else { + /* 11/16 <= |x| < 19/16 */ + x = (x - 1.) / (x + 1.); + 1 + } + } else if ix < 0x401c0000 { + /* |x| < 2.4375 */ + x = (x - 1.5) / (1. + 1.5 * x); + 2 + } else { + /* 2.4375 <= |x| < 2**26 */ + x = -1. / x; + 3 + } + }; + /* end of argument reduction */ + z = x * x; + let w = z * z; + /* break sum from i=0 to 10 aT[i]z**(i+1) into odd and even poly */ + let s1 = z * (i!(A_T, 0) + w * (i!(A_T, 2) + w * i!(A_T, 4))); + let s2 = w * (i!(A_T, 1) + w * i!(A_T, 3)); + if id < 0 { + return x - x * (s1 + s2); + } + let id = id as usize; + let z = i!(ATAN_HI, id) - ((x * (s1 + s2) - i!(ATAN_LO, id)) - x); + if sign { -z } else { z } +} diff --git a/library/compiler-builtins/libm/src/math/atanh.rs b/library/compiler-builtins/libm/src/math/atanh.rs new file mode 100644 index 0000000000000..9dc826f5605b8 --- /dev/null +++ b/library/compiler-builtins/libm/src/math/atanh.rs @@ -0,0 +1,33 @@ +use super::log1p; + +/* atanh(x) = log((1+x)/(1-x))/2 = log1p(2x/(1-x))/2 ~= x + x^3/3 + o(x^5) */ +/// Inverse hyperbolic tangent (f64) +/// +/// Calculates the inverse hyperbolic tangent of `x`. +/// Is defined as `log((1+x)/(1-x))/2 = log1p(2x/(1-x))/2`. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn atanh(x: f64) -> f64 { + let u = x.to_bits(); + let e = ((u >> 52) as usize) & 0x7ff; + let sign = (u >> 63) != 0; + + /* |x| */ + let mut y = f64::from_bits(u & 0x7fff_ffff_ffff_ffff); + + if e < 0x3ff - 1 { + if e < 0x3ff - 32 { + /* handle underflow */ + if e == 0 { + force_eval!(y as f32); + } + } else { + /* |x| < 0.5, up to 1.7ulp error */ + y = 0.5 * log1p(2.0 * y + 2.0 * y * y / (1.0 - y)); + } + } else { + /* avoid overflow */ + y = 0.5 * log1p(2.0 * (y / (1.0 - y))); + } + + if sign { -y } else { y } +} diff --git a/library/compiler-builtins/libm/src/math/atanhf.rs b/library/compiler-builtins/libm/src/math/atanhf.rs new file mode 100644 index 0000000000000..80ccec1f67fe4 --- /dev/null +++ b/library/compiler-builtins/libm/src/math/atanhf.rs @@ -0,0 +1,33 @@ +use super::log1pf; + +/* atanh(x) = log((1+x)/(1-x))/2 = log1p(2x/(1-x))/2 ~= x + x^3/3 + o(x^5) */ +/// Inverse hyperbolic tangent (f32) +/// +/// Calculates the inverse hyperbolic tangent of `x`. +/// Is defined as `log((1+x)/(1-x))/2 = log1p(2x/(1-x))/2`. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn atanhf(mut x: f32) -> f32 { + let mut u = x.to_bits(); + let sign = (u >> 31) != 0; + + /* |x| */ + u &= 0x7fffffff; + x = f32::from_bits(u); + + if u < 0x3f800000 - (1 << 23) { + if u < 0x3f800000 - (32 << 23) { + /* handle underflow */ + if u < (1 << 23) { + force_eval!(x * x); + } + } else { + /* |x| < 0.5, up to 1.7ulp error */ + x = 0.5 * log1pf(2.0 * x + 2.0 * x * x / (1.0 - x)); + } + } else { + /* avoid overflow */ + x = 0.5 * log1pf(2.0 * (x / (1.0 - x))); + } + + if sign { -x } else { x } +} diff --git a/library/compiler-builtins/libm/src/math/cbrt.rs b/library/compiler-builtins/libm/src/math/cbrt.rs new file mode 100644 index 0000000000000..cf56f7a9792ae --- /dev/null +++ b/library/compiler-builtins/libm/src/math/cbrt.rs @@ -0,0 +1,219 @@ +/* SPDX-License-Identifier: MIT */ +/* origin: core-math/src/binary64/cbrt/cbrt.c + * Copyright (c) 2021-2022 Alexei Sibidanov. + * Ported to Rust in 2025 by Trevor Gross. + */ + +use super::Float; +use super::support::{FpResult, Round, cold_path}; + +/// Compute the cube root of the argument. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn cbrt(x: f64) -> f64 { + cbrt_round(x, Round::Nearest).val +} + +pub fn cbrt_round(x: f64, round: Round) -> FpResult { + const ESCALE: [f64; 3] = [ + 1.0, + hf64!("0x1.428a2f98d728bp+0"), /* 2^(1/3) */ + hf64!("0x1.965fea53d6e3dp+0"), /* 2^(2/3) */ + ]; + + /* the polynomial c0+c1*x+c2*x^2+c3*x^3 approximates x^(1/3) on [1,2] + with maximal error < 9.2e-5 (attained at x=2) */ + const C: [f64; 4] = [ + hf64!("0x1.1b0babccfef9cp-1"), + hf64!("0x1.2c9a3e94d1da5p-1"), + hf64!("-0x1.4dc30b1a1ddbap-3"), + hf64!("0x1.7a8d3e4ec9b07p-6"), + ]; + + let u0: f64 = hf64!("0x1.5555555555555p-2"); + let u1: f64 = hf64!("0x1.c71c71c71c71cp-3"); + + let rsc = [1.0, -1.0, 0.5, -0.5, 0.25, -0.25]; + + let off = [hf64!("0x1p-53"), 0.0, 0.0, 0.0]; + + /* rm=0 for rounding to nearest, and other values for directed roundings */ + let hx: u64 = x.to_bits(); + let mut mant: u64 = hx & f64::SIG_MASK; + let sign: u64 = hx >> 63; + + let mut e: u32 = (hx >> f64::SIG_BITS) as u32 & f64::EXP_SAT; + + if ((e + 1) & f64::EXP_SAT) < 2 { + cold_path(); + + let ix: u64 = hx & !f64::SIGN_MASK; + + /* 0, inf, nan: we return x + x instead of simply x, + to that for x a signaling NaN, it correctly triggers + the invalid exception. */ + if e == f64::EXP_SAT || ix == 0 { + return FpResult::ok(x + x); + } + + let nz = ix.leading_zeros() - 11; /* subnormal */ + mant <<= nz; + mant &= f64::SIG_MASK; + e = e.wrapping_sub(nz - 1); + } + + e = e.wrapping_add(3072); + let cvt1: u64 = mant | (0x3ffu64 << 52); + let mut cvt5: u64 = cvt1; + + let et: u32 = e / 3; + let it: u32 = e % 3; + + /* 2^(3k+it) <= x < 2^(3k+it+1), with 0 <= it <= 3 */ + cvt5 += u64::from(it) << f64::SIG_BITS; + cvt5 |= sign << 63; + let zz: f64 = f64::from_bits(cvt5); + + /* cbrt(x) = cbrt(zz)*2^(et-1365) where 1 <= zz < 8 */ + let mut isc: u64 = ESCALE[it as usize].to_bits(); // todo: index + isc |= sign << 63; + let cvt2: u64 = isc; + let z: f64 = f64::from_bits(cvt1); + + /* cbrt(zz) = cbrt(z)*isc, where isc encodes 1, 2^(1/3) or 2^(2/3), + and 1 <= z < 2 */ + let r: f64 = 1.0 / z; + let rr: f64 = r * rsc[((it as usize) << 1) | sign as usize]; + let z2: f64 = z * z; + let c0: f64 = C[0] + z * C[1]; + let c2: f64 = C[2] + z * C[3]; + let mut y: f64 = c0 + z2 * c2; + let mut y2: f64 = y * y; + + /* y is an approximation of z^(1/3) */ + let mut h: f64 = y2 * (y * r) - 1.0; + + /* h determines the error between y and z^(1/3) */ + y -= (h * y) * (u0 - u1 * h); + + /* The correction y -= (h*y)*(u0 - u1*h) corresponds to a cubic variant + of Newton's method, with the function f(y) = 1-z/y^3. */ + y *= f64::from_bits(cvt2); + + /* Now y is an approximation of zz^(1/3), + * and rr an approximation of 1/zz. We now perform another iteration of + * Newton-Raphson, this time with a linear approximation only. */ + y2 = y * y; + let mut y2l: f64 = y.fma(y, -y2); + + /* y2 + y2l = y^2 exactly */ + let mut y3: f64 = y2 * y; + let mut y3l: f64 = y.fma(y2, -y3) + y * y2l; + + /* y3 + y3l approximates y^3 with about 106 bits of accuracy */ + h = ((y3 - zz) + y3l) * rr; + let mut dy: f64 = h * (y * u0); + + /* the approximation of zz^(1/3) is y - dy */ + let mut y1: f64 = y - dy; + dy = (y - y1) - dy; + + /* the approximation of zz^(1/3) is now y1 + dy, where |dy| < 1/2 ulp(y) + * (for rounding to nearest) */ + let mut ady: f64 = dy.abs(); + + /* For directed roundings, ady0 is tiny when dy is tiny, or ady0 is near + * from ulp(1); + * for rounding to nearest, ady0 is tiny when dy is near from 1/2 ulp(1), + * or from 3/2 ulp(1). */ + let mut ady0: f64 = (ady - off[round as usize]).abs(); + let mut ady1: f64 = (ady - (hf64!("0x1p-52") + off[round as usize])).abs(); + + if ady0 < hf64!("0x1p-75") || ady1 < hf64!("0x1p-75") { + cold_path(); + + y2 = y1 * y1; + y2l = y1.fma(y1, -y2); + y3 = y2 * y1; + y3l = y1.fma(y2, -y3) + y1 * y2l; + h = ((y3 - zz) + y3l) * rr; + dy = h * (y1 * u0); + y = y1 - dy; + dy = (y1 - y) - dy; + y1 = y; + ady = dy.abs(); + ady0 = (ady - off[round as usize]).abs(); + ady1 = (ady - (hf64!("0x1p-52") + off[round as usize])).abs(); + + if ady0 < hf64!("0x1p-98") || ady1 < hf64!("0x1p-98") { + cold_path(); + let azz: f64 = zz.abs(); + + // ~ 0x1.79d15d0e8d59b80000000000000ffc3dp+0 + if azz == hf64!("0x1.9b78223aa307cp+1") { + y1 = hf64!("0x1.79d15d0e8d59cp+0").copysign(zz); + } + + // ~ 0x1.de87aa837820e80000000000001c0f08p+0 + if azz == hf64!("0x1.a202bfc89ddffp+2") { + y1 = hf64!("0x1.de87aa837820fp+0").copysign(zz); + } + + if round != Round::Nearest { + let wlist = [ + (hf64!("0x1.3a9ccd7f022dbp+0"), hf64!("0x1.1236160ba9b93p+0")), // ~ 0x1.1236160ba9b930000000000001e7e8fap+0 + (hf64!("0x1.7845d2faac6fep+0"), hf64!("0x1.23115e657e49cp+0")), // ~ 0x1.23115e657e49c0000000000001d7a799p+0 + (hf64!("0x1.d1ef81cbbbe71p+0"), hf64!("0x1.388fb44cdcf5ap+0")), // ~ 0x1.388fb44cdcf5a0000000000002202c55p+0 + (hf64!("0x1.0a2014f62987cp+1"), hf64!("0x1.46bcbf47dc1e8p+0")), // ~ 0x1.46bcbf47dc1e8000000000000303aa2dp+0 + (hf64!("0x1.fe18a044a5501p+1"), hf64!("0x1.95decfec9c904p+0")), // ~ 0x1.95decfec9c9040000000000000159e8ep+0 + (hf64!("0x1.a6bb8c803147bp+2"), hf64!("0x1.e05335a6401dep+0")), // ~ 0x1.e05335a6401de00000000000027ca017p+0 + (hf64!("0x1.ac8538a031cbdp+2"), hf64!("0x1.e281d87098de8p+0")), // ~ 0x1.e281d87098de80000000000000ee9314p+0 + ]; + + for (a, b) in wlist { + if azz == a { + let tmp = if round as u64 + sign == 2 { + hf64!("0x1p-52") + } else { + 0.0 + }; + y1 = (b + tmp).copysign(zz); + } + } + } + } + } + + let mut cvt3: u64 = y1.to_bits(); + cvt3 = cvt3.wrapping_add(((et.wrapping_sub(342).wrapping_sub(1023)) as u64) << 52); + let m0: u64 = cvt3 << 30; + let m1 = m0 >> 63; + + if (m0 ^ m1) <= (1u64 << 30) { + cold_path(); + + let mut cvt4: u64 = y1.to_bits(); + cvt4 = (cvt4 + (164 << 15)) & 0xffffffffffff0000u64; + + if ((f64::from_bits(cvt4) - y1) - dy).abs() < hf64!("0x1p-60") || (zz).abs() == 1.0 { + cvt3 = (cvt3 + (1u64 << 15)) & 0xffffffffffff0000u64; + } + } + + FpResult::ok(f64::from_bits(cvt3)) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn spot_checks() { + if !cfg!(x86_no_sse) { + // Exposes a rounding mode problem. Ignored on i586 because of inaccurate FMA. + assert_biteq!( + cbrt(f64::from_bits(0xf7f792b28f600000)), + f64::from_bits(0xd29ce68655d962f3) + ); + } + } +} diff --git a/library/compiler-builtins/libm/src/math/cbrtf.rs b/library/compiler-builtins/libm/src/math/cbrtf.rs new file mode 100644 index 0000000000000..9d70305c64720 --- /dev/null +++ b/library/compiler-builtins/libm/src/math/cbrtf.rs @@ -0,0 +1,75 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/s_cbrtf.c */ +/* + * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. + * Debugged and optimized by Bruce D. Evans. + */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ +/* cbrtf(x) + * Return cube root of x + */ + +use core::f32; + +const B1: u32 = 709958130; /* B1 = (127-127.0/3-0.03306235651)*2**23 */ +const B2: u32 = 642849266; /* B2 = (127-127.0/3-24/3-0.03306235651)*2**23 */ + +/// Cube root (f32) +/// +/// Computes the cube root of the argument. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn cbrtf(x: f32) -> f32 { + let x1p24 = f32::from_bits(0x4b800000); // 0x1p24f === 2 ^ 24 + + let mut r: f64; + let mut t: f64; + let mut ui: u32 = x.to_bits(); + let mut hx: u32 = ui & 0x7fffffff; + + if hx >= 0x7f800000 { + /* cbrt(NaN,INF) is itself */ + return x + x; + } + + /* rough cbrt to 5 bits */ + if hx < 0x00800000 { + /* zero or subnormal? */ + if hx == 0 { + return x; /* cbrt(+-0) is itself */ + } + ui = (x * x1p24).to_bits(); + hx = ui & 0x7fffffff; + hx = hx / 3 + B2; + } else { + hx = hx / 3 + B1; + } + ui &= 0x80000000; + ui |= hx; + + /* + * First step Newton iteration (solving t*t-x/t == 0) to 16 bits. In + * double precision so that its terms can be arranged for efficiency + * without causing overflow or underflow. + */ + t = f32::from_bits(ui) as f64; + r = t * t * t; + t = t * (x as f64 + x as f64 + r) / (x as f64 + r + r); + + /* + * Second step Newton iteration to 47 bits. In double precision for + * efficiency and accuracy. + */ + r = t * t * t; + t = t * (x as f64 + x as f64 + r) / (x as f64 + r + r); + + /* rounding to 24 bits is perfect in round-to-nearest mode */ + t as f32 +} diff --git a/library/compiler-builtins/libm/src/math/ceil.rs b/library/compiler-builtins/libm/src/math/ceil.rs new file mode 100644 index 0000000000000..4e103545727aa --- /dev/null +++ b/library/compiler-builtins/libm/src/math/ceil.rs @@ -0,0 +1,46 @@ +/// Ceil (f16) +/// +/// Finds the nearest integer greater than or equal to `x`. +#[cfg(f16_enabled)] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn ceilf16(x: f16) -> f16 { + super::generic::ceil(x) +} + +/// Ceil (f32) +/// +/// Finds the nearest integer greater than or equal to `x`. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn ceilf(x: f32) -> f32 { + select_implementation! { + name: ceilf, + use_arch: all(target_arch = "wasm32", intrinsics_enabled), + args: x, + } + + super::generic::ceil(x) +} + +/// Ceil (f64) +/// +/// Finds the nearest integer greater than or equal to `x`. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn ceil(x: f64) -> f64 { + select_implementation! { + name: ceil, + use_arch: all(target_arch = "wasm32", intrinsics_enabled), + use_arch_required: all(target_arch = "x86", not(target_feature = "sse2")), + args: x, + } + + super::generic::ceil(x) +} + +/// Ceil (f128) +/// +/// Finds the nearest integer greater than or equal to `x`. +#[cfg(f128_enabled)] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn ceilf128(x: f128) -> f128 { + super::generic::ceil(x) +} diff --git a/library/compiler-builtins/libm/src/math/copysign.rs b/library/compiler-builtins/libm/src/math/copysign.rs new file mode 100644 index 0000000000000..d2a86e7fd545f --- /dev/null +++ b/library/compiler-builtins/libm/src/math/copysign.rs @@ -0,0 +1,88 @@ +/// Sign of Y, magnitude of X (f16) +/// +/// Constructs a number with the magnitude (absolute value) of its +/// first argument, `x`, and the sign of its second argument, `y`. +#[cfg(f16_enabled)] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn copysignf16(x: f16, y: f16) -> f16 { + super::generic::copysign(x, y) +} + +/// Sign of Y, magnitude of X (f32) +/// +/// Constructs a number with the magnitude (absolute value) of its +/// first argument, `x`, and the sign of its second argument, `y`. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn copysignf(x: f32, y: f32) -> f32 { + super::generic::copysign(x, y) +} + +/// Sign of Y, magnitude of X (f64) +/// +/// Constructs a number with the magnitude (absolute value) of its +/// first argument, `x`, and the sign of its second argument, `y`. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn copysign(x: f64, y: f64) -> f64 { + super::generic::copysign(x, y) +} + +/// Sign of Y, magnitude of X (f128) +/// +/// Constructs a number with the magnitude (absolute value) of its +/// first argument, `x`, and the sign of its second argument, `y`. +#[cfg(f128_enabled)] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn copysignf128(x: f128, y: f128) -> f128 { + super::generic::copysign(x, y) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::support::Float; + + fn spec_test(f: impl Fn(F, F) -> F) { + assert_biteq!(f(F::ZERO, F::ZERO), F::ZERO); + assert_biteq!(f(F::NEG_ZERO, F::ZERO), F::ZERO); + assert_biteq!(f(F::ZERO, F::NEG_ZERO), F::NEG_ZERO); + assert_biteq!(f(F::NEG_ZERO, F::NEG_ZERO), F::NEG_ZERO); + + assert_biteq!(f(F::ONE, F::ONE), F::ONE); + assert_biteq!(f(F::NEG_ONE, F::ONE), F::ONE); + assert_biteq!(f(F::ONE, F::NEG_ONE), F::NEG_ONE); + assert_biteq!(f(F::NEG_ONE, F::NEG_ONE), F::NEG_ONE); + + assert_biteq!(f(F::INFINITY, F::INFINITY), F::INFINITY); + assert_biteq!(f(F::NEG_INFINITY, F::INFINITY), F::INFINITY); + assert_biteq!(f(F::INFINITY, F::NEG_INFINITY), F::NEG_INFINITY); + assert_biteq!(f(F::NEG_INFINITY, F::NEG_INFINITY), F::NEG_INFINITY); + + // Not required but we expect it + assert_biteq!(f(F::NAN, F::NAN), F::NAN); + assert_biteq!(f(F::NEG_NAN, F::NAN), F::NAN); + assert_biteq!(f(F::NAN, F::NEG_NAN), F::NEG_NAN); + assert_biteq!(f(F::NEG_NAN, F::NEG_NAN), F::NEG_NAN); + } + + #[test] + #[cfg(f16_enabled)] + fn spec_tests_f16() { + spec_test::(copysignf16); + } + + #[test] + fn spec_tests_f32() { + spec_test::(copysignf); + } + + #[test] + fn spec_tests_f64() { + spec_test::(copysign); + } + + #[test] + #[cfg(f128_enabled)] + fn spec_tests_f128() { + spec_test::(copysignf128); + } +} diff --git a/library/compiler-builtins/libm/src/math/copysignf.rs b/library/compiler-builtins/libm/src/math/copysignf.rs new file mode 100644 index 0000000000000..8b9bed4c0c427 --- /dev/null +++ b/library/compiler-builtins/libm/src/math/copysignf.rs @@ -0,0 +1,8 @@ +/// Sign of Y, magnitude of X (f32) +/// +/// Constructs a number with the magnitude (absolute value) of its +/// first argument, `x`, and the sign of its second argument, `y`. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn copysignf(x: f32, y: f32) -> f32 { + super::generic::copysign(x, y) +} diff --git a/library/compiler-builtins/libm/src/math/copysignf128.rs b/library/compiler-builtins/libm/src/math/copysignf128.rs new file mode 100644 index 0000000000000..7bd81d42b2e9a --- /dev/null +++ b/library/compiler-builtins/libm/src/math/copysignf128.rs @@ -0,0 +1,8 @@ +/// Sign of Y, magnitude of X (f128) +/// +/// Constructs a number with the magnitude (absolute value) of its +/// first argument, `x`, and the sign of its second argument, `y`. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn copysignf128(x: f128, y: f128) -> f128 { + super::generic::copysign(x, y) +} diff --git a/library/compiler-builtins/libm/src/math/copysignf16.rs b/library/compiler-builtins/libm/src/math/copysignf16.rs new file mode 100644 index 0000000000000..8206586860102 --- /dev/null +++ b/library/compiler-builtins/libm/src/math/copysignf16.rs @@ -0,0 +1,8 @@ +/// Sign of Y, magnitude of X (f16) +/// +/// Constructs a number with the magnitude (absolute value) of its +/// first argument, `x`, and the sign of its second argument, `y`. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn copysignf16(x: f16, y: f16) -> f16 { + super::generic::copysign(x, y) +} diff --git a/library/compiler-builtins/libm/src/math/cos.rs b/library/compiler-builtins/libm/src/math/cos.rs new file mode 100644 index 0000000000000..de99cd4c5e454 --- /dev/null +++ b/library/compiler-builtins/libm/src/math/cos.rs @@ -0,0 +1,77 @@ +// origin: FreeBSD /usr/src/lib/msun/src/s_cos.c */ +// +// ==================================================== +// Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. +// +// Developed at SunPro, a Sun Microsystems, Inc. business. +// Permission to use, copy, modify, and distribute this +// software is freely granted, provided that this notice +// is preserved. +// ==================================================== + +use super::{k_cos, k_sin, rem_pio2}; + +// cos(x) +// Return cosine function of x. +// +// kernel function: +// k_sin ... sine function on [-pi/4,pi/4] +// k_cos ... cosine function on [-pi/4,pi/4] +// rem_pio2 ... argument reduction routine +// +// Method. +// Let S,C and T denote the sin, cos and tan respectively on +// [-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2 +// in [-pi/4 , +pi/4], and let n = k mod 4. +// We have +// +// n sin(x) cos(x) tan(x) +// ---------------------------------------------------------- +// 0 S C T +// 1 C -S -1/T +// 2 -S -C T +// 3 -C S -1/T +// ---------------------------------------------------------- +// +// Special cases: +// Let trig be any of sin, cos, or tan. +// trig(+-INF) is NaN, with signals; +// trig(NaN) is that NaN; +// +// Accuracy: +// TRIG(x) returns trig(x) nearly rounded +// + +/// The cosine of `x` (f64). +/// +/// `x` is specified in radians. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn cos(x: f64) -> f64 { + let ix = (f64::to_bits(x) >> 32) as u32 & 0x7fffffff; + + /* |x| ~< pi/4 */ + if ix <= 0x3fe921fb { + if ix < 0x3e46a09e { + /* if x < 2**-27 * sqrt(2) */ + /* raise inexact if x != 0 */ + if x as i32 == 0 { + return 1.0; + } + } + return k_cos(x, 0.0); + } + + /* cos(Inf or NaN) is NaN */ + if ix >= 0x7ff00000 { + return x - x; + } + + /* argument reduction needed */ + let (n, y0, y1) = rem_pio2(x); + match n & 3 { + 0 => k_cos(y0, y1), + 1 => -k_sin(y0, y1, 1), + 2 => -k_cos(y0, y1), + _ => k_sin(y0, y1, 1), + } +} diff --git a/library/compiler-builtins/libm/src/math/cosf.rs b/library/compiler-builtins/libm/src/math/cosf.rs new file mode 100644 index 0000000000000..27c2fc3b9945f --- /dev/null +++ b/library/compiler-builtins/libm/src/math/cosf.rs @@ -0,0 +1,86 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/s_cosf.c */ +/* + * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. + * Optimized by Bruce D. Evans. + */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ + +use core::f64::consts::FRAC_PI_2; + +use super::{k_cosf, k_sinf, rem_pio2f}; + +/* Small multiples of pi/2 rounded to double precision. */ +const C1_PIO2: f64 = 1. * FRAC_PI_2; /* 0x3FF921FB, 0x54442D18 */ +const C2_PIO2: f64 = 2. * FRAC_PI_2; /* 0x400921FB, 0x54442D18 */ +const C3_PIO2: f64 = 3. * FRAC_PI_2; /* 0x4012D97C, 0x7F3321D2 */ +const C4_PIO2: f64 = 4. * FRAC_PI_2; /* 0x401921FB, 0x54442D18 */ + +/// The cosine of `x` (f32). +/// +/// `x` is specified in radians. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn cosf(x: f32) -> f32 { + let x64 = x as f64; + + let x1p120 = f32::from_bits(0x7b800000); // 0x1p120f === 2 ^ 120 + + let mut ix = x.to_bits(); + let sign = (ix >> 31) != 0; + ix &= 0x7fffffff; + + if ix <= 0x3f490fda { + /* |x| ~<= pi/4 */ + if ix < 0x39800000 { + /* |x| < 2**-12 */ + /* raise inexact if x != 0 */ + force_eval!(x + x1p120); + return 1.; + } + return k_cosf(x64); + } + if ix <= 0x407b53d1 { + /* |x| ~<= 5*pi/4 */ + if ix > 0x4016cbe3 { + /* |x| ~> 3*pi/4 */ + return -k_cosf(if sign { x64 + C2_PIO2 } else { x64 - C2_PIO2 }); + } else if sign { + return k_sinf(x64 + C1_PIO2); + } else { + return k_sinf(C1_PIO2 - x64); + } + } + if ix <= 0x40e231d5 { + /* |x| ~<= 9*pi/4 */ + if ix > 0x40afeddf { + /* |x| ~> 7*pi/4 */ + return k_cosf(if sign { x64 + C4_PIO2 } else { x64 - C4_PIO2 }); + } else if sign { + return k_sinf(-x64 - C3_PIO2); + } else { + return k_sinf(x64 - C3_PIO2); + } + } + + /* cos(Inf or NaN) is NaN */ + if ix >= 0x7f800000 { + return x - x; + } + + /* general argument reduction needed */ + let (n, y) = rem_pio2f(x); + match n & 3 { + 0 => k_cosf(y), + 1 => k_sinf(-y), + 2 => -k_cosf(y), + _ => k_sinf(y), + } +} diff --git a/library/compiler-builtins/libm/src/math/cosh.rs b/library/compiler-builtins/libm/src/math/cosh.rs new file mode 100644 index 0000000000000..d2e43fd6cb690 --- /dev/null +++ b/library/compiler-builtins/libm/src/math/cosh.rs @@ -0,0 +1,36 @@ +use super::{exp, expm1, k_expo2}; + +/// Hyperbolic cosine (f64) +/// +/// Computes the hyperbolic cosine of the argument x. +/// Is defined as `(exp(x) + exp(-x))/2` +/// Angles are specified in radians. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn cosh(mut x: f64) -> f64 { + /* |x| */ + let mut ix = x.to_bits(); + ix &= 0x7fffffffffffffff; + x = f64::from_bits(ix); + let w = ix >> 32; + + /* |x| < log(2) */ + if w < 0x3fe62e42 { + if w < 0x3ff00000 - (26 << 20) { + let x1p120 = f64::from_bits(0x4770000000000000); + force_eval!(x + x1p120); + return 1.; + } + let t = expm1(x); // exponential minus 1 + return 1. + t * t / (2. * (1. + t)); + } + + /* |x| < log(DBL_MAX) */ + if w < 0x40862e42 { + let t = exp(x); + /* note: if x>log(0x1p26) then the 1/t is not needed */ + return 0.5 * (t + 1. / t); + } + + /* |x| > log(DBL_MAX) or nan */ + k_expo2(x) +} diff --git a/library/compiler-builtins/libm/src/math/coshf.rs b/library/compiler-builtins/libm/src/math/coshf.rs new file mode 100644 index 0000000000000..567a24410e796 --- /dev/null +++ b/library/compiler-builtins/libm/src/math/coshf.rs @@ -0,0 +1,36 @@ +use super::{expf, expm1f, k_expo2f}; + +/// Hyperbolic cosine (f64) +/// +/// Computes the hyperbolic cosine of the argument x. +/// Is defined as `(exp(x) + exp(-x))/2` +/// Angles are specified in radians. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn coshf(mut x: f32) -> f32 { + let x1p120 = f32::from_bits(0x7b800000); // 0x1p120f === 2 ^ 120 + + /* |x| */ + let mut ix = x.to_bits(); + ix &= 0x7fffffff; + x = f32::from_bits(ix); + let w = ix; + + /* |x| < log(2) */ + if w < 0x3f317217 { + if w < (0x3f800000 - (12 << 23)) { + force_eval!(x + x1p120); + return 1.; + } + let t = expm1f(x); + return 1. + t * t / (2. * (1. + t)); + } + + /* |x| < log(FLT_MAX) */ + if w < 0x42b17217 { + let t = expf(x); + return 0.5 * (t + 1. / t); + } + + /* |x| > log(FLT_MAX) or nan */ + k_expo2f(x) +} diff --git a/library/compiler-builtins/libm/src/math/erf.rs b/library/compiler-builtins/libm/src/math/erf.rs new file mode 100644 index 0000000000000..5d82228a05fd0 --- /dev/null +++ b/library/compiler-builtins/libm/src/math/erf.rs @@ -0,0 +1,314 @@ +use super::{exp, fabs, get_high_word, with_set_low_word}; +/* origin: FreeBSD /usr/src/lib/msun/src/s_erf.c */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ +/* double erf(double x) + * double erfc(double x) + * x + * 2 |\ + * erf(x) = --------- | exp(-t*t)dt + * sqrt(pi) \| + * 0 + * + * erfc(x) = 1-erf(x) + * Note that + * erf(-x) = -erf(x) + * erfc(-x) = 2 - erfc(x) + * + * Method: + * 1. For |x| in [0, 0.84375] + * erf(x) = x + x*R(x^2) + * erfc(x) = 1 - erf(x) if x in [-.84375,0.25] + * = 0.5 + ((0.5-x)-x*R) if x in [0.25,0.84375] + * where R = P/Q where P is an odd poly of degree 8 and + * Q is an odd poly of degree 10. + * -57.90 + * | R - (erf(x)-x)/x | <= 2 + * + * + * Remark. The formula is derived by noting + * erf(x) = (2/sqrt(pi))*(x - x^3/3 + x^5/10 - x^7/42 + ....) + * and that + * 2/sqrt(pi) = 1.128379167095512573896158903121545171688 + * is close to one. The interval is chosen because the fix + * point of erf(x) is near 0.6174 (i.e., erf(x)=x when x is + * near 0.6174), and by some experiment, 0.84375 is chosen to + * guarantee the error is less than one ulp for erf. + * + * 2. For |x| in [0.84375,1.25], let s = |x| - 1, and + * c = 0.84506291151 rounded to single (24 bits) + * erf(x) = sign(x) * (c + P1(s)/Q1(s)) + * erfc(x) = (1-c) - P1(s)/Q1(s) if x > 0 + * 1+(c+P1(s)/Q1(s)) if x < 0 + * |P1/Q1 - (erf(|x|)-c)| <= 2**-59.06 + * Remark: here we use the taylor series expansion at x=1. + * erf(1+s) = erf(1) + s*Poly(s) + * = 0.845.. + P1(s)/Q1(s) + * That is, we use rational approximation to approximate + * erf(1+s) - (c = (single)0.84506291151) + * Note that |P1/Q1|< 0.078 for x in [0.84375,1.25] + * where + * P1(s) = degree 6 poly in s + * Q1(s) = degree 6 poly in s + * + * 3. For x in [1.25,1/0.35(~2.857143)], + * erfc(x) = (1/x)*exp(-x*x-0.5625+R1/S1) + * erf(x) = 1 - erfc(x) + * where + * R1(z) = degree 7 poly in z, (z=1/x^2) + * S1(z) = degree 8 poly in z + * + * 4. For x in [1/0.35,28] + * erfc(x) = (1/x)*exp(-x*x-0.5625+R2/S2) if x > 0 + * = 2.0 - (1/x)*exp(-x*x-0.5625+R2/S2) if -6 x >= 28 + * erf(x) = sign(x) *(1 - tiny) (raise inexact) + * erfc(x) = tiny*tiny (raise underflow) if x > 0 + * = 2 - tiny if x<0 + * + * 7. Special case: + * erf(0) = 0, erf(inf) = 1, erf(-inf) = -1, + * erfc(0) = 1, erfc(inf) = 0, erfc(-inf) = 2, + * erfc/erf(NaN) is NaN + */ + +const ERX: f64 = 8.45062911510467529297e-01; /* 0x3FEB0AC1, 0x60000000 */ +/* + * Coefficients for approximation to erf on [0,0.84375] + */ +const EFX8: f64 = 1.02703333676410069053e+00; /* 0x3FF06EBA, 0x8214DB69 */ +const PP0: f64 = 1.28379167095512558561e-01; /* 0x3FC06EBA, 0x8214DB68 */ +const PP1: f64 = -3.25042107247001499370e-01; /* 0xBFD4CD7D, 0x691CB913 */ +const PP2: f64 = -2.84817495755985104766e-02; /* 0xBF9D2A51, 0xDBD7194F */ +const PP3: f64 = -5.77027029648944159157e-03; /* 0xBF77A291, 0x236668E4 */ +const PP4: f64 = -2.37630166566501626084e-05; /* 0xBEF8EAD6, 0x120016AC */ +const QQ1: f64 = 3.97917223959155352819e-01; /* 0x3FD97779, 0xCDDADC09 */ +const QQ2: f64 = 6.50222499887672944485e-02; /* 0x3FB0A54C, 0x5536CEBA */ +const QQ3: f64 = 5.08130628187576562776e-03; /* 0x3F74D022, 0xC4D36B0F */ +const QQ4: f64 = 1.32494738004321644526e-04; /* 0x3F215DC9, 0x221C1A10 */ +const QQ5: f64 = -3.96022827877536812320e-06; /* 0xBED09C43, 0x42A26120 */ +/* + * Coefficients for approximation to erf in [0.84375,1.25] + */ +const PA0: f64 = -2.36211856075265944077e-03; /* 0xBF6359B8, 0xBEF77538 */ +const PA1: f64 = 4.14856118683748331666e-01; /* 0x3FDA8D00, 0xAD92B34D */ +const PA2: f64 = -3.72207876035701323847e-01; /* 0xBFD7D240, 0xFBB8C3F1 */ +const PA3: f64 = 3.18346619901161753674e-01; /* 0x3FD45FCA, 0x805120E4 */ +const PA4: f64 = -1.10894694282396677476e-01; /* 0xBFBC6398, 0x3D3E28EC */ +const PA5: f64 = 3.54783043256182359371e-02; /* 0x3FA22A36, 0x599795EB */ +const PA6: f64 = -2.16637559486879084300e-03; /* 0xBF61BF38, 0x0A96073F */ +const QA1: f64 = 1.06420880400844228286e-01; /* 0x3FBB3E66, 0x18EEE323 */ +const QA2: f64 = 5.40397917702171048937e-01; /* 0x3FE14AF0, 0x92EB6F33 */ +const QA3: f64 = 7.18286544141962662868e-02; /* 0x3FB2635C, 0xD99FE9A7 */ +const QA4: f64 = 1.26171219808761642112e-01; /* 0x3FC02660, 0xE763351F */ +const QA5: f64 = 1.36370839120290507362e-02; /* 0x3F8BEDC2, 0x6B51DD1C */ +const QA6: f64 = 1.19844998467991074170e-02; /* 0x3F888B54, 0x5735151D */ +/* + * Coefficients for approximation to erfc in [1.25,1/0.35] + */ +const RA0: f64 = -9.86494403484714822705e-03; /* 0xBF843412, 0x600D6435 */ +const RA1: f64 = -6.93858572707181764372e-01; /* 0xBFE63416, 0xE4BA7360 */ +const RA2: f64 = -1.05586262253232909814e+01; /* 0xC0251E04, 0x41B0E726 */ +const RA3: f64 = -6.23753324503260060396e+01; /* 0xC04F300A, 0xE4CBA38D */ +const RA4: f64 = -1.62396669462573470355e+02; /* 0xC0644CB1, 0x84282266 */ +const RA5: f64 = -1.84605092906711035994e+02; /* 0xC067135C, 0xEBCCABB2 */ +const RA6: f64 = -8.12874355063065934246e+01; /* 0xC0545265, 0x57E4D2F2 */ +const RA7: f64 = -9.81432934416914548592e+00; /* 0xC023A0EF, 0xC69AC25C */ +const SA1: f64 = 1.96512716674392571292e+01; /* 0x4033A6B9, 0xBD707687 */ +const SA2: f64 = 1.37657754143519042600e+02; /* 0x4061350C, 0x526AE721 */ +const SA3: f64 = 4.34565877475229228821e+02; /* 0x407B290D, 0xD58A1A71 */ +const SA4: f64 = 6.45387271733267880336e+02; /* 0x40842B19, 0x21EC2868 */ +const SA5: f64 = 4.29008140027567833386e+02; /* 0x407AD021, 0x57700314 */ +const SA6: f64 = 1.08635005541779435134e+02; /* 0x405B28A3, 0xEE48AE2C */ +const SA7: f64 = 6.57024977031928170135e+00; /* 0x401A47EF, 0x8E484A93 */ +const SA8: f64 = -6.04244152148580987438e-02; /* 0xBFAEEFF2, 0xEE749A62 */ +/* + * Coefficients for approximation to erfc in [1/.35,28] + */ +const RB0: f64 = -9.86494292470009928597e-03; /* 0xBF843412, 0x39E86F4A */ +const RB1: f64 = -7.99283237680523006574e-01; /* 0xBFE993BA, 0x70C285DE */ +const RB2: f64 = -1.77579549177547519889e+01; /* 0xC031C209, 0x555F995A */ +const RB3: f64 = -1.60636384855821916062e+02; /* 0xC064145D, 0x43C5ED98 */ +const RB4: f64 = -6.37566443368389627722e+02; /* 0xC083EC88, 0x1375F228 */ +const RB5: f64 = -1.02509513161107724954e+03; /* 0xC0900461, 0x6A2E5992 */ +const RB6: f64 = -4.83519191608651397019e+02; /* 0xC07E384E, 0x9BDC383F */ +const SB1: f64 = 3.03380607434824582924e+01; /* 0x403E568B, 0x261D5190 */ +const SB2: f64 = 3.25792512996573918826e+02; /* 0x40745CAE, 0x221B9F0A */ +const SB3: f64 = 1.53672958608443695994e+03; /* 0x409802EB, 0x189D5118 */ +const SB4: f64 = 3.19985821950859553908e+03; /* 0x40A8FFB7, 0x688C246A */ +const SB5: f64 = 2.55305040643316442583e+03; /* 0x40A3F219, 0xCEDF3BE6 */ +const SB6: f64 = 4.74528541206955367215e+02; /* 0x407DA874, 0xE79FE763 */ +const SB7: f64 = -2.24409524465858183362e+01; /* 0xC03670E2, 0x42712D62 */ + +fn erfc1(x: f64) -> f64 { + let s: f64; + let p: f64; + let q: f64; + + s = fabs(x) - 1.0; + p = PA0 + s * (PA1 + s * (PA2 + s * (PA3 + s * (PA4 + s * (PA5 + s * PA6))))); + q = 1.0 + s * (QA1 + s * (QA2 + s * (QA3 + s * (QA4 + s * (QA5 + s * QA6))))); + + 1.0 - ERX - p / q +} + +fn erfc2(ix: u32, mut x: f64) -> f64 { + let s: f64; + let r: f64; + let big_s: f64; + let z: f64; + + if ix < 0x3ff40000 { + /* |x| < 1.25 */ + return erfc1(x); + } + + x = fabs(x); + s = 1.0 / (x * x); + if ix < 0x4006db6d { + /* |x| < 1/.35 ~ 2.85714 */ + r = RA0 + s * (RA1 + s * (RA2 + s * (RA3 + s * (RA4 + s * (RA5 + s * (RA6 + s * RA7)))))); + big_s = 1.0 + + s * (SA1 + + s * (SA2 + s * (SA3 + s * (SA4 + s * (SA5 + s * (SA6 + s * (SA7 + s * SA8))))))); + } else { + /* |x| > 1/.35 */ + r = RB0 + s * (RB1 + s * (RB2 + s * (RB3 + s * (RB4 + s * (RB5 + s * RB6))))); + big_s = + 1.0 + s * (SB1 + s * (SB2 + s * (SB3 + s * (SB4 + s * (SB5 + s * (SB6 + s * SB7)))))); + } + z = with_set_low_word(x, 0); + + exp(-z * z - 0.5625) * exp((z - x) * (z + x) + r / big_s) / x +} + +/// Error function (f64) +/// +/// Calculates an approximation to the “error function”, which estimates +/// the probability that an observation will fall within x standard +/// deviations of the mean (assuming a normal distribution). +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn erf(x: f64) -> f64 { + let r: f64; + let s: f64; + let z: f64; + let y: f64; + let mut ix: u32; + let sign: usize; + + ix = get_high_word(x); + sign = (ix >> 31) as usize; + ix &= 0x7fffffff; + if ix >= 0x7ff00000 { + /* erf(nan)=nan, erf(+-inf)=+-1 */ + return 1.0 - 2.0 * (sign as f64) + 1.0 / x; + } + if ix < 0x3feb0000 { + /* |x| < 0.84375 */ + if ix < 0x3e300000 { + /* |x| < 2**-28 */ + /* avoid underflow */ + return 0.125 * (8.0 * x + EFX8 * x); + } + z = x * x; + r = PP0 + z * (PP1 + z * (PP2 + z * (PP3 + z * PP4))); + s = 1.0 + z * (QQ1 + z * (QQ2 + z * (QQ3 + z * (QQ4 + z * QQ5)))); + y = r / s; + return x + x * y; + } + if ix < 0x40180000 { + /* 0.84375 <= |x| < 6 */ + y = 1.0 - erfc2(ix, x); + } else { + let x1p_1022 = f64::from_bits(0x0010000000000000); + y = 1.0 - x1p_1022; + } + + if sign != 0 { -y } else { y } +} + +/// Complementary error function (f64) +/// +/// Calculates the complementary probability. +/// Is `1 - erf(x)`. Is computed directly, so that you can use it to avoid +/// the loss of precision that would result from subtracting +/// large probabilities (on large `x`) from 1. +pub fn erfc(x: f64) -> f64 { + let r: f64; + let s: f64; + let z: f64; + let y: f64; + let mut ix: u32; + let sign: usize; + + ix = get_high_word(x); + sign = (ix >> 31) as usize; + ix &= 0x7fffffff; + if ix >= 0x7ff00000 { + /* erfc(nan)=nan, erfc(+-inf)=0,2 */ + return 2.0 * (sign as f64) + 1.0 / x; + } + if ix < 0x3feb0000 { + /* |x| < 0.84375 */ + if ix < 0x3c700000 { + /* |x| < 2**-56 */ + return 1.0 - x; + } + z = x * x; + r = PP0 + z * (PP1 + z * (PP2 + z * (PP3 + z * PP4))); + s = 1.0 + z * (QQ1 + z * (QQ2 + z * (QQ3 + z * (QQ4 + z * QQ5)))); + y = r / s; + if sign != 0 || ix < 0x3fd00000 { + /* x < 1/4 */ + return 1.0 - (x + x * y); + } + return 0.5 - (x - 0.5 + x * y); + } + if ix < 0x403c0000 { + /* 0.84375 <= |x| < 28 */ + if sign != 0 { + return 2.0 - erfc2(ix, x); + } else { + return erfc2(ix, x); + } + } + + let x1p_1022 = f64::from_bits(0x0010000000000000); + if sign != 0 { + 2.0 - x1p_1022 + } else { + x1p_1022 * x1p_1022 + } +} diff --git a/library/compiler-builtins/libm/src/math/erff.rs b/library/compiler-builtins/libm/src/math/erff.rs new file mode 100644 index 0000000000000..fe15f01082e4f --- /dev/null +++ b/library/compiler-builtins/libm/src/math/erff.rs @@ -0,0 +1,226 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/s_erff.c */ +/* + * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. + */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ + +use super::{expf, fabsf}; + +const ERX: f32 = 8.4506291151e-01; /* 0x3f58560b */ +/* + * Coefficients for approximation to erf on [0,0.84375] + */ +const EFX8: f32 = 1.0270333290e+00; /* 0x3f8375d4 */ +const PP0: f32 = 1.2837916613e-01; /* 0x3e0375d4 */ +const PP1: f32 = -3.2504209876e-01; /* 0xbea66beb */ +const PP2: f32 = -2.8481749818e-02; /* 0xbce9528f */ +const PP3: f32 = -5.7702702470e-03; /* 0xbbbd1489 */ +const PP4: f32 = -2.3763017452e-05; /* 0xb7c756b1 */ +const QQ1: f32 = 3.9791721106e-01; /* 0x3ecbbbce */ +const QQ2: f32 = 6.5022252500e-02; /* 0x3d852a63 */ +const QQ3: f32 = 5.0813062117e-03; /* 0x3ba68116 */ +const QQ4: f32 = 1.3249473704e-04; /* 0x390aee49 */ +const QQ5: f32 = -3.9602282413e-06; /* 0xb684e21a */ +/* + * Coefficients for approximation to erf in [0.84375,1.25] + */ +const PA0: f32 = -2.3621185683e-03; /* 0xbb1acdc6 */ +const PA1: f32 = 4.1485610604e-01; /* 0x3ed46805 */ +const PA2: f32 = -3.7220788002e-01; /* 0xbebe9208 */ +const PA3: f32 = 3.1834661961e-01; /* 0x3ea2fe54 */ +const PA4: f32 = -1.1089469492e-01; /* 0xbde31cc2 */ +const PA5: f32 = 3.5478305072e-02; /* 0x3d1151b3 */ +const PA6: f32 = -2.1663755178e-03; /* 0xbb0df9c0 */ +const QA1: f32 = 1.0642088205e-01; /* 0x3dd9f331 */ +const QA2: f32 = 5.4039794207e-01; /* 0x3f0a5785 */ +const QA3: f32 = 7.1828655899e-02; /* 0x3d931ae7 */ +const QA4: f32 = 1.2617121637e-01; /* 0x3e013307 */ +const QA5: f32 = 1.3637083583e-02; /* 0x3c5f6e13 */ +const QA6: f32 = 1.1984500103e-02; /* 0x3c445aa3 */ +/* + * Coefficients for approximation to erfc in [1.25,1/0.35] + */ +const RA0: f32 = -9.8649440333e-03; /* 0xbc21a093 */ +const RA1: f32 = -6.9385856390e-01; /* 0xbf31a0b7 */ +const RA2: f32 = -1.0558626175e+01; /* 0xc128f022 */ +const RA3: f32 = -6.2375331879e+01; /* 0xc2798057 */ +const RA4: f32 = -1.6239666748e+02; /* 0xc322658c */ +const RA5: f32 = -1.8460508728e+02; /* 0xc3389ae7 */ +const RA6: f32 = -8.1287437439e+01; /* 0xc2a2932b */ +const RA7: f32 = -9.8143291473e+00; /* 0xc11d077e */ +const SA1: f32 = 1.9651271820e+01; /* 0x419d35ce */ +const SA2: f32 = 1.3765776062e+02; /* 0x4309a863 */ +const SA3: f32 = 4.3456588745e+02; /* 0x43d9486f */ +const SA4: f32 = 6.4538726807e+02; /* 0x442158c9 */ +const SA5: f32 = 4.2900814819e+02; /* 0x43d6810b */ +const SA6: f32 = 1.0863500214e+02; /* 0x42d9451f */ +const SA7: f32 = 6.5702495575e+00; /* 0x40d23f7c */ +const SA8: f32 = -6.0424413532e-02; /* 0xbd777f97 */ +/* + * Coefficients for approximation to erfc in [1/.35,28] + */ +const RB0: f32 = -9.8649431020e-03; /* 0xbc21a092 */ +const RB1: f32 = -7.9928326607e-01; /* 0xbf4c9dd4 */ +const RB2: f32 = -1.7757955551e+01; /* 0xc18e104b */ +const RB3: f32 = -1.6063638306e+02; /* 0xc320a2ea */ +const RB4: f32 = -6.3756646729e+02; /* 0xc41f6441 */ +const RB5: f32 = -1.0250950928e+03; /* 0xc480230b */ +const RB6: f32 = -4.8351919556e+02; /* 0xc3f1c275 */ +const SB1: f32 = 3.0338060379e+01; /* 0x41f2b459 */ +const SB2: f32 = 3.2579251099e+02; /* 0x43a2e571 */ +const SB3: f32 = 1.5367296143e+03; /* 0x44c01759 */ +const SB4: f32 = 3.1998581543e+03; /* 0x4547fdbb */ +const SB5: f32 = 2.5530502930e+03; /* 0x451f90ce */ +const SB6: f32 = 4.7452853394e+02; /* 0x43ed43a7 */ +const SB7: f32 = -2.2440952301e+01; /* 0xc1b38712 */ + +fn erfc1(x: f32) -> f32 { + let s: f32; + let p: f32; + let q: f32; + + s = fabsf(x) - 1.0; + p = PA0 + s * (PA1 + s * (PA2 + s * (PA3 + s * (PA4 + s * (PA5 + s * PA6))))); + q = 1.0 + s * (QA1 + s * (QA2 + s * (QA3 + s * (QA4 + s * (QA5 + s * QA6))))); + return 1.0 - ERX - p / q; +} + +fn erfc2(mut ix: u32, mut x: f32) -> f32 { + let s: f32; + let r: f32; + let big_s: f32; + let z: f32; + + if ix < 0x3fa00000 { + /* |x| < 1.25 */ + return erfc1(x); + } + + x = fabsf(x); + s = 1.0 / (x * x); + if ix < 0x4036db6d { + /* |x| < 1/0.35 */ + r = RA0 + s * (RA1 + s * (RA2 + s * (RA3 + s * (RA4 + s * (RA5 + s * (RA6 + s * RA7)))))); + big_s = 1.0 + + s * (SA1 + + s * (SA2 + s * (SA3 + s * (SA4 + s * (SA5 + s * (SA6 + s * (SA7 + s * SA8))))))); + } else { + /* |x| >= 1/0.35 */ + r = RB0 + s * (RB1 + s * (RB2 + s * (RB3 + s * (RB4 + s * (RB5 + s * RB6))))); + big_s = + 1.0 + s * (SB1 + s * (SB2 + s * (SB3 + s * (SB4 + s * (SB5 + s * (SB6 + s * SB7)))))); + } + ix = x.to_bits(); + z = f32::from_bits(ix & 0xffffe000); + + expf(-z * z - 0.5625) * expf((z - x) * (z + x) + r / big_s) / x +} + +/// Error function (f32) +/// +/// Calculates an approximation to the “error function”, which estimates +/// the probability that an observation will fall within x standard +/// deviations of the mean (assuming a normal distribution). +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn erff(x: f32) -> f32 { + let r: f32; + let s: f32; + let z: f32; + let y: f32; + let mut ix: u32; + let sign: usize; + + ix = x.to_bits(); + sign = (ix >> 31) as usize; + ix &= 0x7fffffff; + if ix >= 0x7f800000 { + /* erf(nan)=nan, erf(+-inf)=+-1 */ + return 1.0 - 2.0 * (sign as f32) + 1.0 / x; + } + if ix < 0x3f580000 { + /* |x| < 0.84375 */ + if ix < 0x31800000 { + /* |x| < 2**-28 */ + /*avoid underflow */ + return 0.125 * (8.0 * x + EFX8 * x); + } + z = x * x; + r = PP0 + z * (PP1 + z * (PP2 + z * (PP3 + z * PP4))); + s = 1.0 + z * (QQ1 + z * (QQ2 + z * (QQ3 + z * (QQ4 + z * QQ5)))); + y = r / s; + return x + x * y; + } + if ix < 0x40c00000 { + /* |x| < 6 */ + y = 1.0 - erfc2(ix, x); + } else { + let x1p_120 = f32::from_bits(0x03800000); + y = 1.0 - x1p_120; + } + + if sign != 0 { -y } else { y } +} + +/// Complementary error function (f32) +/// +/// Calculates the complementary probability. +/// Is `1 - erf(x)`. Is computed directly, so that you can use it to avoid +/// the loss of precision that would result from subtracting +/// large probabilities (on large `x`) from 1. +pub fn erfcf(x: f32) -> f32 { + let r: f32; + let s: f32; + let z: f32; + let y: f32; + let mut ix: u32; + let sign: usize; + + ix = x.to_bits(); + sign = (ix >> 31) as usize; + ix &= 0x7fffffff; + if ix >= 0x7f800000 { + /* erfc(nan)=nan, erfc(+-inf)=0,2 */ + return 2.0 * (sign as f32) + 1.0 / x; + } + + if ix < 0x3f580000 { + /* |x| < 0.84375 */ + if ix < 0x23800000 { + /* |x| < 2**-56 */ + return 1.0 - x; + } + z = x * x; + r = PP0 + z * (PP1 + z * (PP2 + z * (PP3 + z * PP4))); + s = 1.0 + z * (QQ1 + z * (QQ2 + z * (QQ3 + z * (QQ4 + z * QQ5)))); + y = r / s; + if sign != 0 || ix < 0x3e800000 { + /* x < 1/4 */ + return 1.0 - (x + x * y); + } + return 0.5 - (x - 0.5 + x * y); + } + if ix < 0x41e00000 { + /* |x| < 28 */ + if sign != 0 { + return 2.0 - erfc2(ix, x); + } else { + return erfc2(ix, x); + } + } + + let x1p_120 = f32::from_bits(0x03800000); + if sign != 0 { + 2.0 - x1p_120 + } else { + x1p_120 * x1p_120 + } +} diff --git a/library/compiler-builtins/libm/src/math/exp.rs b/library/compiler-builtins/libm/src/math/exp.rs new file mode 100644 index 0000000000000..782042b62cd3a --- /dev/null +++ b/library/compiler-builtins/libm/src/math/exp.rs @@ -0,0 +1,150 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/e_exp.c */ +/* + * ==================================================== + * Copyright (C) 2004 by Sun Microsystems, Inc. All rights reserved. + * + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ +/* exp(x) + * Returns the exponential of x. + * + * Method + * 1. Argument reduction: + * Reduce x to an r so that |r| <= 0.5*ln2 ~ 0.34658. + * Given x, find r and integer k such that + * + * x = k*ln2 + r, |r| <= 0.5*ln2. + * + * Here r will be represented as r = hi-lo for better + * accuracy. + * + * 2. Approximation of exp(r) by a special rational function on + * the interval [0,0.34658]: + * Write + * R(r**2) = r*(exp(r)+1)/(exp(r)-1) = 2 + r*r/6 - r**4/360 + ... + * We use a special Remez algorithm on [0,0.34658] to generate + * a polynomial of degree 5 to approximate R. The maximum error + * of this polynomial approximation is bounded by 2**-59. In + * other words, + * R(z) ~ 2.0 + P1*z + P2*z**2 + P3*z**3 + P4*z**4 + P5*z**5 + * (where z=r*r, and the values of P1 to P5 are listed below) + * and + * | 5 | -59 + * | 2.0+P1*z+...+P5*z - R(z) | <= 2 + * | | + * The computation of exp(r) thus becomes + * 2*r + * exp(r) = 1 + ---------- + * R(r) - r + * r*c(r) + * = 1 + r + ----------- (for better accuracy) + * 2 - c(r) + * where + * 2 4 10 + * c(r) = r - (P1*r + P2*r + ... + P5*r ). + * + * 3. Scale back to obtain exp(x): + * From step 1, we have + * exp(x) = 2^k * exp(r) + * + * Special cases: + * exp(INF) is INF, exp(NaN) is NaN; + * exp(-INF) is 0, and + * for finite argument, only exp(0)=1 is exact. + * + * Accuracy: + * according to an error analysis, the error is always less than + * 1 ulp (unit in the last place). + * + * Misc. info. + * For IEEE double + * if x > 709.782712893383973096 then exp(x) overflows + * if x < -745.133219101941108420 then exp(x) underflows + */ + +use super::scalbn; + +const HALF: [f64; 2] = [0.5, -0.5]; +const LN2HI: f64 = 6.93147180369123816490e-01; /* 0x3fe62e42, 0xfee00000 */ +const LN2LO: f64 = 1.90821492927058770002e-10; /* 0x3dea39ef, 0x35793c76 */ +const INVLN2: f64 = 1.44269504088896338700e+00; /* 0x3ff71547, 0x652b82fe */ +const P1: f64 = 1.66666666666666019037e-01; /* 0x3FC55555, 0x5555553E */ +const P2: f64 = -2.77777777770155933842e-03; /* 0xBF66C16C, 0x16BEBD93 */ +const P3: f64 = 6.61375632143793436117e-05; /* 0x3F11566A, 0xAF25DE2C */ +const P4: f64 = -1.65339022054652515390e-06; /* 0xBEBBBD41, 0xC5D26BF1 */ +const P5: f64 = 4.13813679705723846039e-08; /* 0x3E663769, 0x72BEA4D0 */ + +/// Exponential, base *e* (f64) +/// +/// Calculate the exponential of `x`, that is, *e* raised to the power `x` +/// (where *e* is the base of the natural system of logarithms, approximately 2.71828). +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn exp(mut x: f64) -> f64 { + let x1p1023 = f64::from_bits(0x7fe0000000000000); // 0x1p1023 === 2 ^ 1023 + let x1p_149 = f64::from_bits(0x36a0000000000000); // 0x1p-149 === 2 ^ -149 + + let hi: f64; + let lo: f64; + let c: f64; + let xx: f64; + let y: f64; + let k: i32; + let sign: i32; + let mut hx: u32; + + hx = (x.to_bits() >> 32) as u32; + sign = (hx >> 31) as i32; + hx &= 0x7fffffff; /* high word of |x| */ + + /* special cases */ + if hx >= 0x4086232b { + /* if |x| >= 708.39... */ + if x.is_nan() { + return x; + } + if x > 709.782712893383973096 { + /* overflow if x!=inf */ + x *= x1p1023; + return x; + } + if x < -708.39641853226410622 { + /* underflow if x!=-inf */ + force_eval!((-x1p_149 / x) as f32); + if x < -745.13321910194110842 { + return 0.; + } + } + } + + /* argument reduction */ + if hx > 0x3fd62e42 { + /* if |x| > 0.5 ln2 */ + if hx >= 0x3ff0a2b2 { + /* if |x| >= 1.5 ln2 */ + k = (INVLN2 * x + i!(HALF, sign as usize)) as i32; + } else { + k = 1 - sign - sign; + } + hi = x - k as f64 * LN2HI; /* k*ln2hi is exact here */ + lo = k as f64 * LN2LO; + x = hi - lo; + } else if hx > 0x3e300000 { + /* if |x| > 2**-28 */ + k = 0; + hi = x; + lo = 0.; + } else { + /* inexact if x!=0 */ + force_eval!(x1p1023 + x); + return 1. + x; + } + + /* x is now in primary range */ + xx = x * x; + c = x - xx * (P1 + xx * (P2 + xx * (P3 + xx * (P4 + xx * P5)))); + y = 1. + (x * c / (2. - c) - lo + hi); + if k == 0 { y } else { scalbn(y, k) } +} diff --git a/library/compiler-builtins/libm/src/math/exp10.rs b/library/compiler-builtins/libm/src/math/exp10.rs new file mode 100644 index 0000000000000..7c33c92b6032c --- /dev/null +++ b/library/compiler-builtins/libm/src/math/exp10.rs @@ -0,0 +1,23 @@ +use super::{exp2, modf, pow}; + +const LN10: f64 = 3.32192809488736234787031942948939; +const P10: &[f64] = &[ + 1e-15, 1e-14, 1e-13, 1e-12, 1e-11, 1e-10, 1e-9, 1e-8, 1e-7, 1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1, + 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 1e10, 1e11, 1e12, 1e13, 1e14, 1e15, +]; + +/// Calculates 10 raised to the power of `x` (f64). +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn exp10(x: f64) -> f64 { + let (mut y, n) = modf(x); + let u: u64 = n.to_bits(); + /* fabs(n) < 16 without raising invalid on nan */ + if ((u >> 52) & 0x7ff) < 0x3ff + 4 { + if y == 0.0 { + return i!(P10, ((n as isize) + 15) as usize); + } + y = exp2(LN10 * y); + return y * i!(P10, ((n as isize) + 15) as usize); + } + return pow(10.0, x); +} diff --git a/library/compiler-builtins/libm/src/math/exp10f.rs b/library/compiler-builtins/libm/src/math/exp10f.rs new file mode 100644 index 0000000000000..303045b331322 --- /dev/null +++ b/library/compiler-builtins/libm/src/math/exp10f.rs @@ -0,0 +1,23 @@ +use super::{exp2, exp2f, modff}; + +const LN10_F32: f32 = 3.32192809488736234787031942948939; +const LN10_F64: f64 = 3.32192809488736234787031942948939; +const P10: &[f32] = &[ + 1e-7, 1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, +]; + +/// Calculates 10 raised to the power of `x` (f32). +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn exp10f(x: f32) -> f32 { + let (mut y, n) = modff(x); + let u = n.to_bits(); + /* fabsf(n) < 8 without raising invalid on nan */ + if ((u >> 23) & 0xff) < 0x7f + 3 { + if y == 0.0 { + return i!(P10, ((n as isize) + 7) as usize); + } + y = exp2f(LN10_F32 * y); + return y * i!(P10, ((n as isize) + 7) as usize); + } + return exp2(LN10_F64 * (x as f64)) as f32; +} diff --git a/library/compiler-builtins/libm/src/math/exp2.rs b/library/compiler-builtins/libm/src/math/exp2.rs new file mode 100644 index 0000000000000..6e98d066cbfce --- /dev/null +++ b/library/compiler-builtins/libm/src/math/exp2.rs @@ -0,0 +1,394 @@ +// origin: FreeBSD /usr/src/lib/msun/src/s_exp2.c */ +//- +// Copyright (c) 2005 David Schultz +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +// OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +// SUCH DAMAGE. + +use super::scalbn; + +const TBLSIZE: usize = 256; + +#[rustfmt::skip] +static TBL: [u64; TBLSIZE * 2] = [ + // exp2(z + eps) eps + 0x3fe6a09e667f3d5d, 0x3d39880000000000, + 0x3fe6b052fa751744, 0x3cd8000000000000, + 0x3fe6c012750bd9fe, 0xbd28780000000000, + 0x3fe6cfdcddd476bf, 0x3d1ec00000000000, + 0x3fe6dfb23c651a29, 0xbcd8000000000000, + 0x3fe6ef9298593ae3, 0xbcbc000000000000, + 0x3fe6ff7df9519386, 0xbd2fd80000000000, + 0x3fe70f7466f42da3, 0xbd2c880000000000, + 0x3fe71f75e8ec5fc3, 0x3d13c00000000000, + 0x3fe72f8286eacf05, 0xbd38300000000000, + 0x3fe73f9a48a58152, 0xbd00c00000000000, + 0x3fe74fbd35d7ccfc, 0x3d2f880000000000, + 0x3fe75feb564267f1, 0x3d03e00000000000, + 0x3fe77024b1ab6d48, 0xbd27d00000000000, + 0x3fe780694fde5d38, 0xbcdd000000000000, + 0x3fe790b938ac1d00, 0x3ce3000000000000, + 0x3fe7a11473eb0178, 0xbced000000000000, + 0x3fe7b17b0976d060, 0x3d20400000000000, + 0x3fe7c1ed0130c133, 0x3ca0000000000000, + 0x3fe7d26a62ff8636, 0xbd26900000000000, + 0x3fe7e2f336cf4e3b, 0xbd02e00000000000, + 0x3fe7f3878491c3e8, 0xbd24580000000000, + 0x3fe80427543e1b4e, 0x3d33000000000000, + 0x3fe814d2add1071a, 0x3d0f000000000000, + 0x3fe82589994ccd7e, 0xbd21c00000000000, + 0x3fe8364c1eb942d0, 0x3d29d00000000000, + 0x3fe8471a4623cab5, 0x3d47100000000000, + 0x3fe857f4179f5bbc, 0x3d22600000000000, + 0x3fe868d99b4491af, 0xbd32c40000000000, + 0x3fe879cad931a395, 0xbd23000000000000, + 0x3fe88ac7d98a65b8, 0xbd2a800000000000, + 0x3fe89bd0a4785800, 0xbced000000000000, + 0x3fe8ace5422aa223, 0x3d33280000000000, + 0x3fe8be05bad619fa, 0x3d42b40000000000, + 0x3fe8cf3216b54383, 0xbd2ed00000000000, + 0x3fe8e06a5e08664c, 0xbd20500000000000, + 0x3fe8f1ae99157807, 0x3d28280000000000, + 0x3fe902fed0282c0e, 0xbd1cb00000000000, + 0x3fe9145b0b91ff96, 0xbd05e00000000000, + 0x3fe925c353aa2ff9, 0x3cf5400000000000, + 0x3fe93737b0cdc64a, 0x3d17200000000000, + 0x3fe948b82b5f98ae, 0xbd09000000000000, + 0x3fe95a44cbc852cb, 0x3d25680000000000, + 0x3fe96bdd9a766f21, 0xbd36d00000000000, + 0x3fe97d829fde4e2a, 0xbd01000000000000, + 0x3fe98f33e47a23a3, 0x3d2d000000000000, + 0x3fe9a0f170ca0604, 0xbd38a40000000000, + 0x3fe9b2bb4d53ff89, 0x3d355c0000000000, + 0x3fe9c49182a3f15b, 0x3d26b80000000000, + 0x3fe9d674194bb8c5, 0xbcec000000000000, + 0x3fe9e86319e3238e, 0x3d17d00000000000, + 0x3fe9fa5e8d07f302, 0x3d16400000000000, + 0x3fea0c667b5de54d, 0xbcf5000000000000, + 0x3fea1e7aed8eb8f6, 0x3d09e00000000000, + 0x3fea309bec4a2e27, 0x3d2ad80000000000, + 0x3fea42c980460a5d, 0xbd1af00000000000, + 0x3fea5503b23e259b, 0x3d0b600000000000, + 0x3fea674a8af46213, 0x3d38880000000000, + 0x3fea799e1330b3a7, 0x3d11200000000000, + 0x3fea8bfe53c12e8d, 0x3d06c00000000000, + 0x3fea9e6b5579fcd2, 0xbd29b80000000000, + 0x3feab0e521356fb8, 0x3d2b700000000000, + 0x3feac36bbfd3f381, 0x3cd9000000000000, + 0x3fead5ff3a3c2780, 0x3ce4000000000000, + 0x3feae89f995ad2a3, 0xbd2c900000000000, + 0x3feafb4ce622f367, 0x3d16500000000000, + 0x3feb0e07298db790, 0x3d2fd40000000000, + 0x3feb20ce6c9a89a9, 0x3d12700000000000, + 0x3feb33a2b84f1a4b, 0x3d4d470000000000, + 0x3feb468415b747e7, 0xbd38380000000000, + 0x3feb59728de5593a, 0x3c98000000000000, + 0x3feb6c6e29f1c56a, 0x3d0ad00000000000, + 0x3feb7f76f2fb5e50, 0x3cde800000000000, + 0x3feb928cf22749b2, 0xbd04c00000000000, + 0x3feba5b030a10603, 0xbd0d700000000000, + 0x3febb8e0b79a6f66, 0x3d0d900000000000, + 0x3febcc1e904bc1ff, 0x3d02a00000000000, + 0x3febdf69c3f3a16f, 0xbd1f780000000000, + 0x3febf2c25bd71db8, 0xbd10a00000000000, + 0x3fec06286141b2e9, 0xbd11400000000000, + 0x3fec199bdd8552e0, 0x3d0be00000000000, + 0x3fec2d1cd9fa64ee, 0xbd09400000000000, + 0x3fec40ab5fffd02f, 0xbd0ed00000000000, + 0x3fec544778fafd15, 0x3d39660000000000, + 0x3fec67f12e57d0cb, 0xbd1a100000000000, + 0x3fec7ba88988c1b6, 0xbd58458000000000, + 0x3fec8f6d9406e733, 0xbd1a480000000000, + 0x3feca3405751c4df, 0x3ccb000000000000, + 0x3fecb720dcef9094, 0x3d01400000000000, + 0x3feccb0f2e6d1689, 0x3cf0200000000000, + 0x3fecdf0b555dc412, 0x3cf3600000000000, + 0x3fecf3155b5bab3b, 0xbd06900000000000, + 0x3fed072d4a0789bc, 0x3d09a00000000000, + 0x3fed1b532b08c8fa, 0xbd15e00000000000, + 0x3fed2f87080d8a85, 0x3d1d280000000000, + 0x3fed43c8eacaa203, 0x3d01a00000000000, + 0x3fed5818dcfba491, 0x3cdf000000000000, + 0x3fed6c76e862e6a1, 0xbd03a00000000000, + 0x3fed80e316c9834e, 0xbd0cd80000000000, + 0x3fed955d71ff6090, 0x3cf4c00000000000, + 0x3feda9e603db32ae, 0x3cff900000000000, + 0x3fedbe7cd63a8325, 0x3ce9800000000000, + 0x3fedd321f301b445, 0xbcf5200000000000, + 0x3fede7d5641c05bf, 0xbd1d700000000000, + 0x3fedfc97337b9aec, 0xbd16140000000000, + 0x3fee11676b197d5e, 0x3d0b480000000000, + 0x3fee264614f5a3e7, 0x3d40ce0000000000, + 0x3fee3b333b16ee5c, 0x3d0c680000000000, + 0x3fee502ee78b3fb4, 0xbd09300000000000, + 0x3fee653924676d68, 0xbce5000000000000, + 0x3fee7a51fbc74c44, 0xbd07f80000000000, + 0x3fee8f7977cdb726, 0xbcf3700000000000, + 0x3feea4afa2a490e8, 0x3ce5d00000000000, + 0x3feeb9f4867ccae4, 0x3d161a0000000000, + 0x3feecf482d8e680d, 0x3cf5500000000000, + 0x3feee4aaa2188514, 0x3cc6400000000000, + 0x3feefa1bee615a13, 0xbcee800000000000, + 0x3fef0f9c1cb64106, 0xbcfa880000000000, + 0x3fef252b376bb963, 0xbd2c900000000000, + 0x3fef3ac948dd7275, 0x3caa000000000000, + 0x3fef50765b6e4524, 0xbcf4f00000000000, + 0x3fef6632798844fd, 0x3cca800000000000, + 0x3fef7bfdad9cbe38, 0x3cfabc0000000000, + 0x3fef91d802243c82, 0xbcd4600000000000, + 0x3fefa7c1819e908e, 0xbd0b0c0000000000, + 0x3fefbdba3692d511, 0xbcc0e00000000000, + 0x3fefd3c22b8f7194, 0xbd10de8000000000, + 0x3fefe9d96b2a23ee, 0x3cee430000000000, + 0x3ff0000000000000, 0x0, + 0x3ff00b1afa5abcbe, 0xbcb3400000000000, + 0x3ff0163da9fb3303, 0xbd12170000000000, + 0x3ff02168143b0282, 0x3cba400000000000, + 0x3ff02c9a3e77806c, 0x3cef980000000000, + 0x3ff037d42e11bbca, 0xbcc7400000000000, + 0x3ff04315e86e7f89, 0x3cd8300000000000, + 0x3ff04e5f72f65467, 0xbd1a3f0000000000, + 0x3ff059b0d315855a, 0xbd02840000000000, + 0x3ff0650a0e3c1f95, 0x3cf1600000000000, + 0x3ff0706b29ddf71a, 0x3d15240000000000, + 0x3ff07bd42b72a82d, 0xbce9a00000000000, + 0x3ff0874518759bd0, 0x3ce6400000000000, + 0x3ff092bdf66607c8, 0xbd00780000000000, + 0x3ff09e3ecac6f383, 0xbc98000000000000, + 0x3ff0a9c79b1f3930, 0x3cffa00000000000, + 0x3ff0b5586cf988fc, 0xbcfac80000000000, + 0x3ff0c0f145e46c8a, 0x3cd9c00000000000, + 0x3ff0cc922b724816, 0x3d05200000000000, + 0x3ff0d83b23395dd8, 0xbcfad00000000000, + 0x3ff0e3ec32d3d1f3, 0x3d1bac0000000000, + 0x3ff0efa55fdfa9a6, 0xbd04e80000000000, + 0x3ff0fb66affed2f0, 0xbd0d300000000000, + 0x3ff1073028d7234b, 0x3cf1500000000000, + 0x3ff11301d0125b5b, 0x3cec000000000000, + 0x3ff11edbab5e2af9, 0x3d16bc0000000000, + 0x3ff12abdc06c31d5, 0x3ce8400000000000, + 0x3ff136a814f2047d, 0xbd0ed00000000000, + 0x3ff1429aaea92de9, 0x3ce8e00000000000, + 0x3ff14e95934f3138, 0x3ceb400000000000, + 0x3ff15a98c8a58e71, 0x3d05300000000000, + 0x3ff166a45471c3df, 0x3d03380000000000, + 0x3ff172b83c7d5211, 0x3d28d40000000000, + 0x3ff17ed48695bb9f, 0xbd05d00000000000, + 0x3ff18af9388c8d93, 0xbd1c880000000000, + 0x3ff1972658375d66, 0x3d11f00000000000, + 0x3ff1a35beb6fcba7, 0x3d10480000000000, + 0x3ff1af99f81387e3, 0xbd47390000000000, + 0x3ff1bbe084045d54, 0x3d24e40000000000, + 0x3ff1c82f95281c43, 0xbd0a200000000000, + 0x3ff1d4873168b9b2, 0x3ce3800000000000, + 0x3ff1e0e75eb44031, 0x3ceac00000000000, + 0x3ff1ed5022fcd938, 0x3d01900000000000, + 0x3ff1f9c18438cdf7, 0xbd1b780000000000, + 0x3ff2063b88628d8f, 0x3d2d940000000000, + 0x3ff212be3578a81e, 0x3cd8000000000000, + 0x3ff21f49917ddd41, 0x3d2b340000000000, + 0x3ff22bdda2791323, 0x3d19f80000000000, + 0x3ff2387a6e7561e7, 0xbd19c80000000000, + 0x3ff2451ffb821427, 0x3d02300000000000, + 0x3ff251ce4fb2a602, 0xbd13480000000000, + 0x3ff25e85711eceb0, 0x3d12700000000000, + 0x3ff26b4565e27d16, 0x3d11d00000000000, + 0x3ff2780e341de00f, 0x3d31ee0000000000, + 0x3ff284dfe1f5633e, 0xbd14c00000000000, + 0x3ff291ba7591bb30, 0xbd13d80000000000, + 0x3ff29e9df51fdf09, 0x3d08b00000000000, + 0x3ff2ab8a66d10e9b, 0xbd227c0000000000, + 0x3ff2b87fd0dada3a, 0x3d2a340000000000, + 0x3ff2c57e39771af9, 0xbd10800000000000, + 0x3ff2d285a6e402d9, 0xbd0ed00000000000, + 0x3ff2df961f641579, 0xbcf4200000000000, + 0x3ff2ecafa93e2ecf, 0xbd24980000000000, + 0x3ff2f9d24abd8822, 0xbd16300000000000, + 0x3ff306fe0a31b625, 0xbd32360000000000, + 0x3ff31432edeea50b, 0xbd70df8000000000, + 0x3ff32170fc4cd7b8, 0xbd22480000000000, + 0x3ff32eb83ba8e9a2, 0xbd25980000000000, + 0x3ff33c08b2641766, 0x3d1ed00000000000, + 0x3ff3496266e3fa27, 0xbcdc000000000000, + 0x3ff356c55f929f0f, 0xbd30d80000000000, + 0x3ff36431a2de88b9, 0x3d22c80000000000, + 0x3ff371a7373aaa39, 0x3d20600000000000, + 0x3ff37f26231e74fe, 0xbd16600000000000, + 0x3ff38cae6d05d838, 0xbd0ae00000000000, + 0x3ff39a401b713ec3, 0xbd44720000000000, + 0x3ff3a7db34e5a020, 0x3d08200000000000, + 0x3ff3b57fbfec6e95, 0x3d3e800000000000, + 0x3ff3c32dc313a8f2, 0x3cef800000000000, + 0x3ff3d0e544ede122, 0xbd17a00000000000, + 0x3ff3dea64c1234bb, 0x3d26300000000000, + 0x3ff3ec70df1c4ecc, 0xbd48a60000000000, + 0x3ff3fa4504ac7e8c, 0xbd3cdc0000000000, + 0x3ff40822c367a0bb, 0x3d25b80000000000, + 0x3ff4160a21f72e95, 0x3d1ec00000000000, + 0x3ff423fb27094646, 0xbd13600000000000, + 0x3ff431f5d950a920, 0x3d23980000000000, + 0x3ff43ffa3f84b9eb, 0x3cfa000000000000, + 0x3ff44e0860618919, 0xbcf6c00000000000, + 0x3ff45c2042a7d201, 0xbd0bc00000000000, + 0x3ff46a41ed1d0016, 0xbd12800000000000, + 0x3ff4786d668b3326, 0x3d30e00000000000, + 0x3ff486a2b5c13c00, 0xbd2d400000000000, + 0x3ff494e1e192af04, 0x3d0c200000000000, + 0x3ff4a32af0d7d372, 0xbd1e500000000000, + 0x3ff4b17dea6db801, 0x3d07800000000000, + 0x3ff4bfdad53629e1, 0xbd13800000000000, + 0x3ff4ce41b817c132, 0x3d00800000000000, + 0x3ff4dcb299fddddb, 0x3d2c700000000000, + 0x3ff4eb2d81d8ab96, 0xbd1ce00000000000, + 0x3ff4f9b2769d2d02, 0x3d19200000000000, + 0x3ff508417f4531c1, 0xbd08c00000000000, + 0x3ff516daa2cf662a, 0xbcfa000000000000, + 0x3ff5257de83f51ea, 0x3d4a080000000000, + 0x3ff5342b569d4eda, 0xbd26d80000000000, + 0x3ff542e2f4f6ac1a, 0xbd32440000000000, + 0x3ff551a4ca5d94db, 0x3d483c0000000000, + 0x3ff56070dde9116b, 0x3d24b00000000000, + 0x3ff56f4736b529de, 0x3d415a0000000000, + 0x3ff57e27dbe2c40e, 0xbd29e00000000000, + 0x3ff58d12d497c76f, 0xbd23080000000000, + 0x3ff59c0827ff0b4c, 0x3d4dec0000000000, + 0x3ff5ab07dd485427, 0xbcc4000000000000, + 0x3ff5ba11fba87af4, 0x3d30080000000000, + 0x3ff5c9268a59460b, 0xbd26c80000000000, + 0x3ff5d84590998e3f, 0x3d469a0000000000, + 0x3ff5e76f15ad20e1, 0xbd1b400000000000, + 0x3ff5f6a320dcebca, 0x3d17700000000000, + 0x3ff605e1b976dcb8, 0x3d26f80000000000, + 0x3ff6152ae6cdf715, 0x3d01000000000000, + 0x3ff6247eb03a5531, 0xbd15d00000000000, + 0x3ff633dd1d1929b5, 0xbd12d00000000000, + 0x3ff6434634ccc313, 0xbcea800000000000, + 0x3ff652b9febc8efa, 0xbd28600000000000, + 0x3ff6623882553397, 0x3d71fe0000000000, + 0x3ff671c1c708328e, 0xbd37200000000000, + 0x3ff68155d44ca97e, 0x3ce6800000000000, + 0x3ff690f4b19e9471, 0xbd29780000000000, +]; + +// exp2(x): compute the base 2 exponential of x +// +// Accuracy: Peak error < 0.503 ulp for normalized results. +// +// Method: (accurate tables) +// +// Reduce x: +// x = k + y, for integer k and |y| <= 1/2. +// Thus we have exp2(x) = 2**k * exp2(y). +// +// Reduce y: +// y = i/TBLSIZE + z - eps[i] for integer i near y * TBLSIZE. +// Thus we have exp2(y) = exp2(i/TBLSIZE) * exp2(z - eps[i]), +// with |z - eps[i]| <= 2**-9 + 2**-39 for the table used. +// +// We compute exp2(i/TBLSIZE) via table lookup and exp2(z - eps[i]) via +// a degree-5 minimax polynomial with maximum error under 1.3 * 2**-61. +// The values in exp2t[] and eps[] are chosen such that +// exp2t[i] = exp2(i/TBLSIZE + eps[i]), and eps[i] is a small offset such +// that exp2t[i] is accurate to 2**-64. +// +// Note that the range of i is +-TBLSIZE/2, so we actually index the tables +// by i0 = i + TBLSIZE/2. For cache efficiency, exp2t[] and eps[] are +// virtual tables, interleaved in the real table tbl[]. +// +// This method is due to Gal, with many details due to Gal and Bachelis: +// +// Gal, S. and Bachelis, B. An Accurate Elementary Mathematical Library +// for the IEEE Floating Point Standard. TOMS 17(1), 26-46 (1991). + +/// Exponential, base 2 (f64) +/// +/// Calculate `2^x`, that is, 2 raised to the power `x`. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn exp2(mut x: f64) -> f64 { + let redux = f64::from_bits(0x4338000000000000) / TBLSIZE as f64; + let p1 = f64::from_bits(0x3fe62e42fefa39ef); + let p2 = f64::from_bits(0x3fcebfbdff82c575); + let p3 = f64::from_bits(0x3fac6b08d704a0a6); + let p4 = f64::from_bits(0x3f83b2ab88f70400); + let p5 = f64::from_bits(0x3f55d88003875c74); + + // double_t r, t, z; + // uint32_t ix, i0; + // union {double f; uint64_t i;} u = {x}; + // union {uint32_t u; int32_t i;} k; + let x1p1023 = f64::from_bits(0x7fe0000000000000); + let x1p52 = f64::from_bits(0x4330000000000000); + let _0x1p_149 = f64::from_bits(0xb6a0000000000000); + + /* Filter out exceptional cases. */ + let ui = f64::to_bits(x); + let ix = (ui >> 32) & 0x7fffffff; + if ix >= 0x408ff000 { + /* |x| >= 1022 or nan */ + if ix >= 0x40900000 && ui >> 63 == 0 { + /* x >= 1024 or nan */ + /* overflow */ + x *= x1p1023; + return x; + } + if ix >= 0x7ff00000 { + /* -inf or -nan */ + return -1.0 / x; + } + if ui >> 63 != 0 { + /* x <= -1022 */ + /* underflow */ + if x <= -1075.0 || x - x1p52 + x1p52 != x { + force_eval!((_0x1p_149 / x) as f32); + } + if x <= -1075.0 { + return 0.0; + } + } + } else if ix < 0x3c900000 { + /* |x| < 0x1p-54 */ + return 1.0 + x; + } + + /* Reduce x, computing z, i0, and k. */ + let ui = f64::to_bits(x + redux); + let mut i0 = ui as u32; + i0 = i0.wrapping_add(TBLSIZE as u32 / 2); + let ku = i0 / TBLSIZE as u32 * TBLSIZE as u32; + let ki = div!(ku as i32, TBLSIZE as i32); + i0 %= TBLSIZE as u32; + let uf = f64::from_bits(ui) - redux; + let mut z = x - uf; + + /* Compute r = exp2(y) = exp2t[i0] * p(z - eps[i]). */ + let t = f64::from_bits(i!(TBL, 2 * i0 as usize)); /* exp2t[i0] */ + z -= f64::from_bits(i!(TBL, 2 * i0 as usize + 1)); /* eps[i0] */ + let r = t + t * z * (p1 + z * (p2 + z * (p3 + z * (p4 + z * p5)))); + + scalbn(r, ki) +} + +#[test] +fn i0_wrap_test() { + let x = -3.0 / 256.0; + assert_eq!(exp2(x), f64::from_bits(0x3fefbdba3692d514)); +} diff --git a/library/compiler-builtins/libm/src/math/exp2f.rs b/library/compiler-builtins/libm/src/math/exp2f.rs new file mode 100644 index 0000000000000..f452b6a20f802 --- /dev/null +++ b/library/compiler-builtins/libm/src/math/exp2f.rs @@ -0,0 +1,135 @@ +// origin: FreeBSD /usr/src/lib/msun/src/s_exp2f.c +//- +// Copyright (c) 2005 David Schultz +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +// OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +// SUCH DAMAGE. + +const TBLSIZE: usize = 16; + +static EXP2FT: [u64; TBLSIZE] = [ + 0x3fe6a09e667f3bcd, + 0x3fe7a11473eb0187, + 0x3fe8ace5422aa0db, + 0x3fe9c49182a3f090, + 0x3feae89f995ad3ad, + 0x3fec199bdd85529c, + 0x3fed5818dcfba487, + 0x3feea4afa2a490da, + 0x3ff0000000000000, + 0x3ff0b5586cf9890f, + 0x3ff172b83c7d517b, + 0x3ff2387a6e756238, + 0x3ff306fe0a31b715, + 0x3ff3dea64c123422, + 0x3ff4bfdad5362a27, + 0x3ff5ab07dd485429, +]; + +// exp2f(x): compute the base 2 exponential of x +// +// Accuracy: Peak error < 0.501 ulp; location of peak: -0.030110927. +// +// Method: (equally-spaced tables) +// +// Reduce x: +// x = k + y, for integer k and |y| <= 1/2. +// Thus we have exp2f(x) = 2**k * exp2(y). +// +// Reduce y: +// y = i/TBLSIZE + z for integer i near y * TBLSIZE. +// Thus we have exp2(y) = exp2(i/TBLSIZE) * exp2(z), +// with |z| <= 2**-(TBLSIZE+1). +// +// We compute exp2(i/TBLSIZE) via table lookup and exp2(z) via a +// degree-4 minimax polynomial with maximum error under 1.4 * 2**-33. +// Using double precision for everything except the reduction makes +// roundoff error insignificant and simplifies the scaling step. +// +// This method is due to Tang, but I do not use his suggested parameters: +// +// Tang, P. Table-driven Implementation of the Exponential Function +// in IEEE Floating-Point Arithmetic. TOMS 15(2), 144-157 (1989). + +/// Exponential, base 2 (f32) +/// +/// Calculate `2^x`, that is, 2 raised to the power `x`. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn exp2f(mut x: f32) -> f32 { + let redux = f32::from_bits(0x4b400000) / TBLSIZE as f32; + let p1 = f32::from_bits(0x3f317218); + let p2 = f32::from_bits(0x3e75fdf0); + let p3 = f32::from_bits(0x3d6359a4); + let p4 = f32::from_bits(0x3c1d964e); + + // double_t t, r, z; + // uint32_t ix, i0, k; + + let x1p127 = f32::from_bits(0x7f000000); + + /* Filter out exceptional cases. */ + let ui = f32::to_bits(x); + let ix = ui & 0x7fffffff; + if ix > 0x42fc0000 { + /* |x| > 126 */ + if ix > 0x7f800000 { + /* NaN */ + return x; + } + if (0x43000000..0x80000000).contains(&ui) { + /* x >= 128 */ + x *= x1p127; + return x; + } + if ui >= 0x80000000 { + /* x < -126 */ + if ui >= 0xc3160000 || (ui & 0x0000ffff != 0) { + force_eval!(f32::from_bits(0x80000001) / x); + } + if ui >= 0xc3160000 { + /* x <= -150 */ + return 0.0; + } + } + } else if ix <= 0x33000000 { + /* |x| <= 0x1p-25 */ + return 1.0 + x; + } + + /* Reduce x, computing z, i0, and k. */ + let ui = f32::to_bits(x + redux); + let mut i0 = ui; + i0 += TBLSIZE as u32 / 2; + let k = i0 / TBLSIZE as u32; + let ukf = f64::from_bits(((0x3ff + k) as u64) << 52); + i0 &= TBLSIZE as u32 - 1; + let mut uf = f32::from_bits(ui); + uf -= redux; + let z: f64 = (x - uf) as f64; + /* Compute r = exp2(y) = exp2ft[i0] * p(z). */ + let r: f64 = f64::from_bits(i!(EXP2FT, i0 as usize)); + let t: f64 = r * z; + let r: f64 = r + t * (p1 as f64 + z * p2 as f64) + t * (z * z) * (p3 as f64 + z * p4 as f64); + + /* Scale by 2**k */ + (r * ukf) as f32 +} diff --git a/library/compiler-builtins/libm/src/math/expf.rs b/library/compiler-builtins/libm/src/math/expf.rs new file mode 100644 index 0000000000000..8dc067ab0846f --- /dev/null +++ b/library/compiler-builtins/libm/src/math/expf.rs @@ -0,0 +1,97 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/e_expf.c */ +/* + * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. + */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ + +use super::scalbnf; + +const HALF: [f32; 2] = [0.5, -0.5]; +const LN2_HI: f32 = 6.9314575195e-01; /* 0x3f317200 */ +const LN2_LO: f32 = 1.4286067653e-06; /* 0x35bfbe8e */ +const INV_LN2: f32 = 1.4426950216e+00; /* 0x3fb8aa3b */ +/* + * Domain [-0.34568, 0.34568], range ~[-4.278e-9, 4.447e-9]: + * |x*(exp(x)+1)/(exp(x)-1) - p(x)| < 2**-27.74 + */ +const P1: f32 = 1.6666625440e-1; /* 0xaaaa8f.0p-26 */ +const P2: f32 = -2.7667332906e-3; /* -0xb55215.0p-32 */ + +/// Exponential, base *e* (f32) +/// +/// Calculate the exponential of `x`, that is, *e* raised to the power `x` +/// (where *e* is the base of the natural system of logarithms, approximately 2.71828). +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn expf(mut x: f32) -> f32 { + let x1p127 = f32::from_bits(0x7f000000); // 0x1p127f === 2 ^ 127 + let x1p_126 = f32::from_bits(0x800000); // 0x1p-126f === 2 ^ -126 /*original 0x1p-149f ??????????? */ + let mut hx = x.to_bits(); + let sign = (hx >> 31) as i32; /* sign bit of x */ + let signb: bool = sign != 0; + hx &= 0x7fffffff; /* high word of |x| */ + + /* special cases */ + if hx >= 0x42aeac50 { + /* if |x| >= -87.33655f or NaN */ + if hx > 0x7f800000 { + /* NaN */ + return x; + } + if (hx >= 0x42b17218) && (!signb) { + /* x >= 88.722839f */ + /* overflow */ + x *= x1p127; + return x; + } + if signb { + /* underflow */ + force_eval!(-x1p_126 / x); + if hx >= 0x42cff1b5 { + /* x <= -103.972084f */ + return 0.; + } + } + } + + /* argument reduction */ + let k: i32; + let hi: f32; + let lo: f32; + if hx > 0x3eb17218 { + /* if |x| > 0.5 ln2 */ + if hx > 0x3f851592 { + /* if |x| > 1.5 ln2 */ + k = (INV_LN2 * x + i!(HALF, sign as usize)) as i32; + } else { + k = 1 - sign - sign; + } + let kf = k as f32; + hi = x - kf * LN2_HI; /* k*ln2hi is exact here */ + lo = kf * LN2_LO; + x = hi - lo; + } else if hx > 0x39000000 { + /* |x| > 2**-14 */ + k = 0; + hi = x; + lo = 0.; + } else { + /* raise inexact */ + force_eval!(x1p127 + x); + return 1. + x; + } + + /* x is now in primary range */ + let xx = x * x; + let c = x - xx * (P1 + xx * P2); + let y = 1. + (x * c / (2. - c) - lo + hi); + if k == 0 { y } else { scalbnf(y, k) } +} diff --git a/library/compiler-builtins/libm/src/math/expm1.rs b/library/compiler-builtins/libm/src/math/expm1.rs new file mode 100644 index 0000000000000..f25153f32a34d --- /dev/null +++ b/library/compiler-builtins/libm/src/math/expm1.rs @@ -0,0 +1,144 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/s_expm1.c */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ + +use core::f64; + +const O_THRESHOLD: f64 = 7.09782712893383973096e+02; /* 0x40862E42, 0xFEFA39EF */ +const LN2_HI: f64 = 6.93147180369123816490e-01; /* 0x3fe62e42, 0xfee00000 */ +const LN2_LO: f64 = 1.90821492927058770002e-10; /* 0x3dea39ef, 0x35793c76 */ +const INVLN2: f64 = 1.44269504088896338700e+00; /* 0x3ff71547, 0x652b82fe */ +/* Scaled Q's: Qn_here = 2**n * Qn_above, for R(2*z) where z = hxs = x*x/2: */ +const Q1: f64 = -3.33333333333331316428e-02; /* BFA11111 111110F4 */ +const Q2: f64 = 1.58730158725481460165e-03; /* 3F5A01A0 19FE5585 */ +const Q3: f64 = -7.93650757867487942473e-05; /* BF14CE19 9EAADBB7 */ +const Q4: f64 = 4.00821782732936239552e-06; /* 3ED0CFCA 86E65239 */ +const Q5: f64 = -2.01099218183624371326e-07; /* BE8AFDB7 6E09C32D */ + +/// Exponential, base *e*, of x-1 (f64) +/// +/// Calculates the exponential of `x` and subtract 1, that is, *e* raised +/// to the power `x` minus 1 (where *e* is the base of the natural +/// system of logarithms, approximately 2.71828). +/// The result is accurate even for small values of `x`, +/// where using `exp(x)-1` would lose many significant digits. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn expm1(mut x: f64) -> f64 { + let hi: f64; + let lo: f64; + let k: i32; + let c: f64; + let mut t: f64; + let mut y: f64; + + let mut ui = x.to_bits(); + let hx = ((ui >> 32) & 0x7fffffff) as u32; + let sign = (ui >> 63) as i32; + + /* filter out huge and non-finite argument */ + if hx >= 0x4043687A { + /* if |x|>=56*ln2 */ + if x.is_nan() { + return x; + } + if sign != 0 { + return -1.0; + } + if x > O_THRESHOLD { + x *= f64::from_bits(0x7fe0000000000000); + return x; + } + } + + /* argument reduction */ + if hx > 0x3fd62e42 { + /* if |x| > 0.5 ln2 */ + if hx < 0x3FF0A2B2 { + /* and |x| < 1.5 ln2 */ + if sign == 0 { + hi = x - LN2_HI; + lo = LN2_LO; + k = 1; + } else { + hi = x + LN2_HI; + lo = -LN2_LO; + k = -1; + } + } else { + k = (INVLN2 * x + if sign != 0 { -0.5 } else { 0.5 }) as i32; + t = k as f64; + hi = x - t * LN2_HI; /* t*ln2_hi is exact here */ + lo = t * LN2_LO; + } + x = hi - lo; + c = (hi - x) - lo; + } else if hx < 0x3c900000 { + /* |x| < 2**-54, return x */ + if hx < 0x00100000 { + force_eval!(x); + } + return x; + } else { + c = 0.0; + k = 0; + } + + /* x is now in primary range */ + let hfx = 0.5 * x; + let hxs = x * hfx; + let r1 = 1.0 + hxs * (Q1 + hxs * (Q2 + hxs * (Q3 + hxs * (Q4 + hxs * Q5)))); + t = 3.0 - r1 * hfx; + let mut e = hxs * ((r1 - t) / (6.0 - x * t)); + if k == 0 { + /* c is 0 */ + return x - (x * e - hxs); + } + e = x * (e - c) - c; + e -= hxs; + /* exp(x) ~ 2^k (x_reduced - e + 1) */ + if k == -1 { + return 0.5 * (x - e) - 0.5; + } + if k == 1 { + if x < -0.25 { + return -2.0 * (e - (x + 0.5)); + } + return 1.0 + 2.0 * (x - e); + } + ui = ((0x3ff + k) as u64) << 52; /* 2^k */ + let twopk = f64::from_bits(ui); + if !(0..=56).contains(&k) { + /* suffice to return exp(x)-1 */ + y = x - e + 1.0; + if k == 1024 { + y = y * 2.0 * f64::from_bits(0x7fe0000000000000); + } else { + y = y * twopk; + } + return y - 1.0; + } + ui = ((0x3ff - k) as u64) << 52; /* 2^-k */ + let uf = f64::from_bits(ui); + if k < 20 { + y = (x - e + (1.0 - uf)) * twopk; + } else { + y = (x - (e + uf) + 1.0) * twopk; + } + y +} + +#[cfg(test)] +mod tests { + #[test] + fn sanity_check() { + assert_eq!(super::expm1(1.1), 2.0041660239464334); + } +} diff --git a/library/compiler-builtins/libm/src/math/expm1f.rs b/library/compiler-builtins/libm/src/math/expm1f.rs new file mode 100644 index 0000000000000..63dc86e37c8ce --- /dev/null +++ b/library/compiler-builtins/libm/src/math/expm1f.rs @@ -0,0 +1,134 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/s_expm1f.c */ +/* + * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. + */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ + +const O_THRESHOLD: f32 = 8.8721679688e+01; /* 0x42b17180 */ +const LN2_HI: f32 = 6.9313812256e-01; /* 0x3f317180 */ +const LN2_LO: f32 = 9.0580006145e-06; /* 0x3717f7d1 */ +const INV_LN2: f32 = 1.4426950216e+00; /* 0x3fb8aa3b */ +/* + * Domain [-0.34568, 0.34568], range ~[-6.694e-10, 6.696e-10]: + * |6 / x * (1 + 2 * (1 / (exp(x) - 1) - 1 / x)) - q(x)| < 2**-30.04 + * Scaled coefficients: Qn_here = 2**n * Qn_for_q (see s_expm1.c): + */ +const Q1: f32 = -3.3333212137e-2; /* -0x888868.0p-28 */ +const Q2: f32 = 1.5807170421e-3; /* 0xcf3010.0p-33 */ + +/// Exponential, base *e*, of x-1 (f32) +/// +/// Calculates the exponential of `x` and subtract 1, that is, *e* raised +/// to the power `x` minus 1 (where *e* is the base of the natural +/// system of logarithms, approximately 2.71828). +/// The result is accurate even for small values of `x`, +/// where using `exp(x)-1` would lose many significant digits. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn expm1f(mut x: f32) -> f32 { + let x1p127 = f32::from_bits(0x7f000000); // 0x1p127f === 2 ^ 127 + + let mut hx = x.to_bits(); + let sign = (hx >> 31) != 0; + hx &= 0x7fffffff; + + /* filter out huge and non-finite argument */ + if hx >= 0x4195b844 { + /* if |x|>=27*ln2 */ + if hx > 0x7f800000 { + /* NaN */ + return x; + } + if sign { + return -1.; + } + if x > O_THRESHOLD { + x *= x1p127; + return x; + } + } + + let k: i32; + let hi: f32; + let lo: f32; + let mut c = 0f32; + /* argument reduction */ + if hx > 0x3eb17218 { + /* if |x| > 0.5 ln2 */ + if hx < 0x3F851592 { + /* and |x| < 1.5 ln2 */ + if !sign { + hi = x - LN2_HI; + lo = LN2_LO; + k = 1; + } else { + hi = x + LN2_HI; + lo = -LN2_LO; + k = -1; + } + } else { + k = (INV_LN2 * x + (if sign { -0.5 } else { 0.5 })) as i32; + let t = k as f32; + hi = x - t * LN2_HI; /* t*ln2_hi is exact here */ + lo = t * LN2_LO; + } + x = hi - lo; + c = (hi - x) - lo; + } else if hx < 0x33000000 { + /* when |x|<2**-25, return x */ + if hx < 0x00800000 { + force_eval!(x * x); + } + return x; + } else { + k = 0; + } + + /* x is now in primary range */ + let hfx = 0.5 * x; + let hxs = x * hfx; + let r1 = 1. + hxs * (Q1 + hxs * Q2); + let t = 3. - r1 * hfx; + let mut e = hxs * ((r1 - t) / (6. - x * t)); + if k == 0 { + /* c is 0 */ + return x - (x * e - hxs); + } + e = x * (e - c) - c; + e -= hxs; + /* exp(x) ~ 2^k (x_reduced - e + 1) */ + if k == -1 { + return 0.5 * (x - e) - 0.5; + } + if k == 1 { + if x < -0.25 { + return -2. * (e - (x + 0.5)); + } + return 1. + 2. * (x - e); + } + let twopk = f32::from_bits(((0x7f + k) << 23) as u32); /* 2^k */ + if !(0..=56).contains(&k) { + /* suffice to return exp(x)-1 */ + let mut y = x - e + 1.; + if k == 128 { + y = y * 2. * x1p127; + } else { + y = y * twopk; + } + return y - 1.; + } + let uf = f32::from_bits(((0x7f - k) << 23) as u32); /* 2^-k */ + if k < 23 { + (x - e + (1. - uf)) * twopk + } else { + (x - (e + uf) + 1.) * twopk + } +} diff --git a/library/compiler-builtins/libm/src/math/expo2.rs b/library/compiler-builtins/libm/src/math/expo2.rs new file mode 100644 index 0000000000000..82e9b360a764f --- /dev/null +++ b/library/compiler-builtins/libm/src/math/expo2.rs @@ -0,0 +1,14 @@ +use super::{combine_words, exp}; + +/* exp(x)/2 for x >= log(DBL_MAX), slightly better than 0.5*exp(x/2)*exp(x/2) */ +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub(crate) fn expo2(x: f64) -> f64 { + /* k is such that k*ln2 has minimal relative error and x - kln2 > log(DBL_MIN) */ + const K: i32 = 2043; + let kln2 = f64::from_bits(0x40962066151add8b); + + /* note that k is odd and scale*scale overflows */ + let scale = combine_words(((0x3ff + K / 2) as u32) << 20, 0); + /* exp(x - k ln2) * 2**(k-1) */ + exp(x - kln2) * scale * scale +} diff --git a/library/compiler-builtins/libm/src/math/fabs.rs b/library/compiler-builtins/libm/src/math/fabs.rs new file mode 100644 index 0000000000000..0050a309fee54 --- /dev/null +++ b/library/compiler-builtins/libm/src/math/fabs.rs @@ -0,0 +1,116 @@ +/// Absolute value (magnitude) (f16) +/// +/// Calculates the absolute value (magnitude) of the argument `x`, +/// by direct manipulation of the bit representation of `x`. +#[cfg(f16_enabled)] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fabsf16(x: f16) -> f16 { + super::generic::fabs(x) +} + +/// Absolute value (magnitude) (f32) +/// +/// Calculates the absolute value (magnitude) of the argument `x`, +/// by direct manipulation of the bit representation of `x`. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fabsf(x: f32) -> f32 { + select_implementation! { + name: fabsf, + use_arch: all(target_arch = "wasm32", intrinsics_enabled), + args: x, + } + + super::generic::fabs(x) +} + +/// Absolute value (magnitude) (f64) +/// +/// Calculates the absolute value (magnitude) of the argument `x`, +/// by direct manipulation of the bit representation of `x`. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fabs(x: f64) -> f64 { + select_implementation! { + name: fabs, + use_arch: all(target_arch = "wasm32", intrinsics_enabled), + args: x, + } + + super::generic::fabs(x) +} + +/// Absolute value (magnitude) (f128) +/// +/// Calculates the absolute value (magnitude) of the argument `x`, +/// by direct manipulation of the bit representation of `x`. +#[cfg(f128_enabled)] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fabsf128(x: f128) -> f128 { + super::generic::fabs(x) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::support::Float; + + /// Based on https://en.cppreference.com/w/cpp/numeric/math/fabs + fn spec_test(f: impl Fn(F) -> F) { + assert_biteq!(f(F::ZERO), F::ZERO); + assert_biteq!(f(F::NEG_ZERO), F::ZERO); + assert_biteq!(f(F::INFINITY), F::INFINITY); + assert_biteq!(f(F::NEG_INFINITY), F::INFINITY); + assert!(f(F::NAN).is_nan()); + + // Not spec rewquired but we expect it + assert!(f(F::NAN).is_sign_positive()); + assert!(f(F::from_bits(F::NAN.to_bits() | F::SIGN_MASK)).is_sign_positive()); + } + + #[test] + #[cfg(f16_enabled)] + fn sanity_check_f16() { + assert_eq!(fabsf16(-1.0f16), 1.0); + assert_eq!(fabsf16(2.8f16), 2.8); + } + + #[test] + #[cfg(f16_enabled)] + fn spec_tests_f16() { + spec_test::(fabsf16); + } + + #[test] + fn sanity_check_f32() { + assert_eq!(fabsf(-1.0f32), 1.0); + assert_eq!(fabsf(2.8f32), 2.8); + } + + #[test] + fn spec_tests_f32() { + spec_test::(fabsf); + } + + #[test] + fn sanity_check_f64() { + assert_eq!(fabs(-1.0f64), 1.0); + assert_eq!(fabs(2.8f64), 2.8); + } + + #[test] + fn spec_tests_f64() { + spec_test::(fabs); + } + + #[test] + #[cfg(f128_enabled)] + fn sanity_check_f128() { + assert_eq!(fabsf128(-1.0f128), 1.0); + assert_eq!(fabsf128(2.8f128), 2.8); + } + + #[test] + #[cfg(f128_enabled)] + fn spec_tests_f128() { + spec_test::(fabsf128); + } +} diff --git a/library/compiler-builtins/libm/src/math/fabsf.rs b/library/compiler-builtins/libm/src/math/fabsf.rs new file mode 100644 index 0000000000000..e5820a26c5238 --- /dev/null +++ b/library/compiler-builtins/libm/src/math/fabsf.rs @@ -0,0 +1,39 @@ +/// Absolute value (magnitude) (f32) +/// +/// Calculates the absolute value (magnitude) of the argument `x`, +/// by direct manipulation of the bit representation of `x`. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fabsf(x: f32) -> f32 { + select_implementation! { + name: fabsf, + use_arch: all(target_arch = "wasm32", intrinsics_enabled), + args: x, + } + + super::generic::fabs(x) +} + +// PowerPC tests are failing on LLVM 13: https://github.com/rust-lang/rust/issues/88520 +#[cfg(not(target_arch = "powerpc64"))] +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn sanity_check() { + assert_eq!(fabsf(-1.0), 1.0); + assert_eq!(fabsf(2.8), 2.8); + } + + /// The spec: https://en.cppreference.com/w/cpp/numeric/math/fabs + #[test] + fn spec_tests() { + assert!(fabsf(f32::NAN).is_nan()); + for f in [0.0, -0.0].iter().copied() { + assert_eq!(fabsf(f), 0.0); + } + for f in [f32::INFINITY, f32::NEG_INFINITY].iter().copied() { + assert_eq!(fabsf(f), f32::INFINITY); + } + } +} diff --git a/library/compiler-builtins/libm/src/math/fabsf128.rs b/library/compiler-builtins/libm/src/math/fabsf128.rs new file mode 100644 index 0000000000000..46429ca494033 --- /dev/null +++ b/library/compiler-builtins/libm/src/math/fabsf128.rs @@ -0,0 +1,31 @@ +/// Absolute value (magnitude) (f128) +/// +/// Calculates the absolute value (magnitude) of the argument `x`, +/// by direct manipulation of the bit representation of `x`. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fabsf128(x: f128) -> f128 { + super::generic::fabs(x) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn sanity_check() { + assert_eq!(fabsf128(-1.0), 1.0); + assert_eq!(fabsf128(2.8), 2.8); + } + + /// The spec: https://en.cppreference.com/w/cpp/numeric/math/fabs + #[test] + fn spec_tests() { + assert!(fabsf128(f128::NAN).is_nan()); + for f in [0.0, -0.0].iter().copied() { + assert_eq!(fabsf128(f), 0.0); + } + for f in [f128::INFINITY, f128::NEG_INFINITY].iter().copied() { + assert_eq!(fabsf128(f), f128::INFINITY); + } + } +} diff --git a/library/compiler-builtins/libm/src/math/fabsf16.rs b/library/compiler-builtins/libm/src/math/fabsf16.rs new file mode 100644 index 0000000000000..eee42ac6a3c60 --- /dev/null +++ b/library/compiler-builtins/libm/src/math/fabsf16.rs @@ -0,0 +1,31 @@ +/// Absolute value (magnitude) (f16) +/// +/// Calculates the absolute value (magnitude) of the argument `x`, +/// by direct manipulation of the bit representation of `x`. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fabsf16(x: f16) -> f16 { + super::generic::fabs(x) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn sanity_check() { + assert_eq!(fabsf16(-1.0), 1.0); + assert_eq!(fabsf16(2.8), 2.8); + } + + /// The spec: https://en.cppreference.com/w/cpp/numeric/math/fabs + #[test] + fn spec_tests() { + assert!(fabsf16(f16::NAN).is_nan()); + for f in [0.0, -0.0].iter().copied() { + assert_eq!(fabsf16(f), 0.0); + } + for f in [f16::INFINITY, f16::NEG_INFINITY].iter().copied() { + assert_eq!(fabsf16(f), f16::INFINITY); + } + } +} diff --git a/library/compiler-builtins/libm/src/math/fdim.rs b/library/compiler-builtins/libm/src/math/fdim.rs new file mode 100644 index 0000000000000..082c5478b2aa6 --- /dev/null +++ b/library/compiler-builtins/libm/src/math/fdim.rs @@ -0,0 +1,53 @@ +/// Positive difference (f16) +/// +/// Determines the positive difference between arguments, returning: +/// * x - y if x > y, or +/// * +0 if x <= y, or +/// * NAN if either argument is NAN. +/// +/// A range error may occur. +#[cfg(f16_enabled)] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fdimf16(x: f16, y: f16) -> f16 { + super::generic::fdim(x, y) +} + +/// Positive difference (f32) +/// +/// Determines the positive difference between arguments, returning: +/// * x - y if x > y, or +/// * +0 if x <= y, or +/// * NAN if either argument is NAN. +/// +/// A range error may occur. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fdimf(x: f32, y: f32) -> f32 { + super::generic::fdim(x, y) +} + +/// Positive difference (f64) +/// +/// Determines the positive difference between arguments, returning: +/// * x - y if x > y, or +/// * +0 if x <= y, or +/// * NAN if either argument is NAN. +/// +/// A range error may occur. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fdim(x: f64, y: f64) -> f64 { + super::generic::fdim(x, y) +} + +/// Positive difference (f128) +/// +/// Determines the positive difference between arguments, returning: +/// * x - y if x > y, or +/// * +0 if x <= y, or +/// * NAN if either argument is NAN. +/// +/// A range error may occur. +#[cfg(f128_enabled)] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fdimf128(x: f128, y: f128) -> f128 { + super::generic::fdim(x, y) +} diff --git a/library/compiler-builtins/libm/src/math/fdimf.rs b/library/compiler-builtins/libm/src/math/fdimf.rs new file mode 100644 index 0000000000000..367ef517c63be --- /dev/null +++ b/library/compiler-builtins/libm/src/math/fdimf.rs @@ -0,0 +1,12 @@ +/// Positive difference (f32) +/// +/// Determines the positive difference between arguments, returning: +/// * x - y if x > y, or +/// * +0 if x <= y, or +/// * NAN if either argument is NAN. +/// +/// A range error may occur. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fdimf(x: f32, y: f32) -> f32 { + super::generic::fdim(x, y) +} diff --git a/library/compiler-builtins/libm/src/math/fdimf128.rs b/library/compiler-builtins/libm/src/math/fdimf128.rs new file mode 100644 index 0000000000000..6f3d1d0ff1d54 --- /dev/null +++ b/library/compiler-builtins/libm/src/math/fdimf128.rs @@ -0,0 +1,12 @@ +/// Positive difference (f128) +/// +/// Determines the positive difference between arguments, returning: +/// * x - y if x > y, or +/// * +0 if x <= y, or +/// * NAN if either argument is NAN. +/// +/// A range error may occur. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fdimf128(x: f128, y: f128) -> f128 { + super::generic::fdim(x, y) +} diff --git a/library/compiler-builtins/libm/src/math/fdimf16.rs b/library/compiler-builtins/libm/src/math/fdimf16.rs new file mode 100644 index 0000000000000..37bd688581797 --- /dev/null +++ b/library/compiler-builtins/libm/src/math/fdimf16.rs @@ -0,0 +1,12 @@ +/// Positive difference (f16) +/// +/// Determines the positive difference between arguments, returning: +/// * x - y if x > y, or +/// * +0 if x <= y, or +/// * NAN if either argument is NAN. +/// +/// A range error may occur. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fdimf16(x: f16, y: f16) -> f16 { + super::generic::fdim(x, y) +} diff --git a/library/compiler-builtins/libm/src/math/floor.rs b/library/compiler-builtins/libm/src/math/floor.rs new file mode 100644 index 0000000000000..3c5eab101d18e --- /dev/null +++ b/library/compiler-builtins/libm/src/math/floor.rs @@ -0,0 +1,46 @@ +/// Floor (f16) +/// +/// Finds the nearest integer less than or equal to `x`. +#[cfg(f16_enabled)] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn floorf16(x: f16) -> f16 { + return super::generic::floor(x); +} + +/// Floor (f64) +/// +/// Finds the nearest integer less than or equal to `x`. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn floor(x: f64) -> f64 { + select_implementation! { + name: floor, + use_arch: all(target_arch = "wasm32", intrinsics_enabled), + use_arch_required: all(target_arch = "x86", not(target_feature = "sse2")), + args: x, + } + + return super::generic::floor(x); +} + +/// Floor (f32) +/// +/// Finds the nearest integer less than or equal to `x`. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn floorf(x: f32) -> f32 { + select_implementation! { + name: floorf, + use_arch: all(target_arch = "wasm32", intrinsics_enabled), + args: x, + } + + return super::generic::floor(x); +} + +/// Floor (f128) +/// +/// Finds the nearest integer less than or equal to `x`. +#[cfg(f128_enabled)] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn floorf128(x: f128) -> f128 { + return super::generic::floor(x); +} diff --git a/library/compiler-builtins/libm/src/math/floorf.rs b/library/compiler-builtins/libm/src/math/floorf.rs new file mode 100644 index 0000000000000..16957b7f35573 --- /dev/null +++ b/library/compiler-builtins/libm/src/math/floorf.rs @@ -0,0 +1,13 @@ +/// Floor (f32) +/// +/// Finds the nearest integer less than or equal to `x`. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn floorf(x: f32) -> f32 { + select_implementation! { + name: floorf, + use_arch: all(target_arch = "wasm32", intrinsics_enabled), + args: x, + } + + return super::generic::floor(x); +} diff --git a/library/compiler-builtins/libm/src/math/floorf128.rs b/library/compiler-builtins/libm/src/math/floorf128.rs new file mode 100644 index 0000000000000..9a9fe4151152b --- /dev/null +++ b/library/compiler-builtins/libm/src/math/floorf128.rs @@ -0,0 +1,7 @@ +/// Floor (f128) +/// +/// Finds the nearest integer less than or equal to `x`. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn floorf128(x: f128) -> f128 { + return super::generic::floor(x); +} diff --git a/library/compiler-builtins/libm/src/math/floorf16.rs b/library/compiler-builtins/libm/src/math/floorf16.rs new file mode 100644 index 0000000000000..f9b868e04109d --- /dev/null +++ b/library/compiler-builtins/libm/src/math/floorf16.rs @@ -0,0 +1,7 @@ +/// Floor (f16) +/// +/// Finds the nearest integer less than or equal to `x`. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn floorf16(x: f16) -> f16 { + return super::generic::floor(x); +} diff --git a/library/compiler-builtins/libm/src/math/fma.rs b/library/compiler-builtins/libm/src/math/fma.rs new file mode 100644 index 0000000000000..5bf473cfe0634 --- /dev/null +++ b/library/compiler-builtins/libm/src/math/fma.rs @@ -0,0 +1,171 @@ +/* SPDX-License-Identifier: MIT */ +/* origin: musl src/math/fma.c, fmaf.c Ported to generic Rust algorithm in 2025, TG. */ + +use super::generic; +use crate::support::Round; + +// Placeholder so we can have `fmaf16` in the `Float` trait. +#[allow(unused)] +#[cfg(f16_enabled)] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub(crate) fn fmaf16(_x: f16, _y: f16, _z: f16) -> f16 { + unimplemented!() +} + +/// Floating multiply add (f32) +/// +/// Computes `(x*y)+z`, rounded as one ternary operation (i.e. calculated with infinite precision). +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fmaf(x: f32, y: f32, z: f32) -> f32 { + select_implementation! { + name: fmaf, + use_arch: any( + all(target_arch = "aarch64", target_feature = "neon"), + target_feature = "sse2", + ), + args: x, y, z, + } + + generic::fma_wide_round(x, y, z, Round::Nearest).val +} + +/// Fused multiply add (f64) +/// +/// Computes `(x*y)+z`, rounded as one ternary operation (i.e. calculated with infinite precision). +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fma(x: f64, y: f64, z: f64) -> f64 { + select_implementation! { + name: fma, + use_arch: any( + all(target_arch = "aarch64", target_feature = "neon"), + target_feature = "sse2", + ), + args: x, y, z, + } + + generic::fma_round(x, y, z, Round::Nearest).val +} + +/// Fused multiply add (f128) +/// +/// Computes `(x*y)+z`, rounded as one ternary operation (i.e. calculated with infinite precision). +#[cfg(f128_enabled)] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fmaf128(x: f128, y: f128, z: f128) -> f128 { + generic::fma_round(x, y, z, Round::Nearest).val +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::support::{CastFrom, CastInto, Float, FpResult, HInt, MinInt, Round, Status}; + + /// Test the generic `fma_round` algorithm for a given float. + fn spec_test(f: impl Fn(F, F, F) -> F) + where + F: Float, + F: CastFrom, + F: CastFrom, + F::Int: HInt, + u32: CastInto, + { + let x = F::from_bits(F::Int::ONE); + let y = F::from_bits(F::Int::ONE); + let z = F::ZERO; + + // 754-2020 says "When the exact result of (a × b) + c is non-zero yet the result of + // fusedMultiplyAdd is zero because of rounding, the zero result takes the sign of the + // exact result" + assert_biteq!(f(x, y, z), F::ZERO); + assert_biteq!(f(x, -y, z), F::NEG_ZERO); + assert_biteq!(f(-x, y, z), F::NEG_ZERO); + assert_biteq!(f(-x, -y, z), F::ZERO); + } + + #[test] + fn spec_test_f32() { + spec_test::(fmaf); + + // Also do a small check that the non-widening version works for f32 (this should ideally + // get tested some more). + spec_test::(|x, y, z| generic::fma_round(x, y, z, Round::Nearest).val); + } + + #[test] + fn spec_test_f64() { + spec_test::(fma); + + let expect_underflow = [ + ( + hf64!("0x1.0p-1070"), + hf64!("0x1.0p-1070"), + hf64!("0x1.ffffffffffffp-1023"), + hf64!("0x0.ffffffffffff8p-1022"), + ), + ( + // FIXME: we raise underflow but this should only be inexact (based on C and + // `rustc_apfloat`). + hf64!("0x1.0p-1070"), + hf64!("0x1.0p-1070"), + hf64!("-0x1.0p-1022"), + hf64!("-0x1.0p-1022"), + ), + ]; + + for (x, y, z, res) in expect_underflow { + let FpResult { val, status } = generic::fma_round(x, y, z, Round::Nearest); + assert_biteq!(val, res); + assert_eq!(status, Status::UNDERFLOW); + } + } + + #[test] + #[cfg(f128_enabled)] + fn spec_test_f128() { + spec_test::(fmaf128); + } + + #[test] + fn issue_263() { + let a = f32::from_bits(1266679807); + let b = f32::from_bits(1300234242); + let c = f32::from_bits(1115553792); + let expected = f32::from_bits(1501560833); + assert_eq!(fmaf(a, b, c), expected); + } + + #[test] + fn fma_segfault() { + // These two inputs cause fma to segfault on release due to overflow: + assert_eq!( + fma( + -0.0000000000000002220446049250313, + -0.0000000000000002220446049250313, + -0.0000000000000002220446049250313 + ), + -0.00000000000000022204460492503126, + ); + + let result = fma(-0.992, -0.992, -0.992); + //force rounding to storage format on x87 to prevent superious errors. + #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))] + let result = force_eval!(result); + assert_eq!(result, -0.007936000000000007,); + } + + #[test] + fn fma_sbb() { + assert_eq!( + fma(-(1.0 - f64::EPSILON), f64::MIN, f64::MIN), + -3991680619069439e277 + ); + } + + #[test] + fn fma_underflow() { + assert_eq!( + fma(1.1102230246251565e-16, -9.812526705433188e-305, 1.0894e-320), + 0.0, + ); + } +} diff --git a/library/compiler-builtins/libm/src/math/fmin_fmax.rs b/library/compiler-builtins/libm/src/math/fmin_fmax.rs new file mode 100644 index 0000000000000..2947b783e2fc5 --- /dev/null +++ b/library/compiler-builtins/libm/src/math/fmin_fmax.rs @@ -0,0 +1,167 @@ +/// Return the lesser of two arguments or, if either argument is NaN, the other argument. +/// +/// This coincides with IEEE 754-2011 `minNum`. The result disregards signed zero (meaning if +/// the inputs are -0.0 and +0.0, either may be returned). +#[cfg(f16_enabled)] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fminf16(x: f16, y: f16) -> f16 { + super::generic::fmin(x, y) +} + +/// Return the lesser of two arguments or, if either argument is NaN, the other argument. +/// +/// This coincides with IEEE 754-2011 `minNum`. The result disregards signed zero (meaning if +/// the inputs are -0.0 and +0.0, either may be returned). +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fminf(x: f32, y: f32) -> f32 { + super::generic::fmin(x, y) +} + +/// Return the lesser of two arguments or, if either argument is NaN, the other argument. +/// +/// This coincides with IEEE 754-2011 `minNum`. The result disregards signed zero (meaning if +/// the inputs are -0.0 and +0.0, either may be returned). +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fmin(x: f64, y: f64) -> f64 { + super::generic::fmin(x, y) +} + +/// Return the lesser of two arguments or, if either argument is NaN, the other argument. +/// +/// This coincides with IEEE 754-2011 `minNum`. The result disregards signed zero (meaning if +/// the inputs are -0.0 and +0.0, either may be returned). +#[cfg(f128_enabled)] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fminf128(x: f128, y: f128) -> f128 { + super::generic::fmin(x, y) +} + +/// Return the greater of two arguments or, if either argument is NaN, the other argument. +/// +/// This coincides with IEEE 754-2011 `maxNum`. The result disregards signed zero (meaning if +/// the inputs are -0.0 and +0.0, either may be returned). +#[cfg(f16_enabled)] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fmaxf16(x: f16, y: f16) -> f16 { + super::generic::fmax(x, y) +} + +/// Return the greater of two arguments or, if either argument is NaN, the other argument. +/// +/// This coincides with IEEE 754-2011 `maxNum`. The result disregards signed zero (meaning if +/// the inputs are -0.0 and +0.0, either may be returned). +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fmaxf(x: f32, y: f32) -> f32 { + super::generic::fmax(x, y) +} + +/// Return the greater of two arguments or, if either argument is NaN, the other argument. +/// +/// This coincides with IEEE 754-2011 `maxNum`. The result disregards signed zero (meaning if +/// the inputs are -0.0 and +0.0, either may be returned). +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fmax(x: f64, y: f64) -> f64 { + super::generic::fmax(x, y) +} + +/// Return the greater of two arguments or, if either argument is NaN, the other argument. +/// +/// This coincides with IEEE 754-2011 `maxNum`. The result disregards signed zero (meaning if +/// the inputs are -0.0 and +0.0, either may be returned). +#[cfg(f128_enabled)] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fmaxf128(x: f128, y: f128) -> f128 { + super::generic::fmax(x, y) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::support::{Float, Hexf}; + + fn fmin_spec_test(f: impl Fn(F, F) -> F) { + let cases = [ + (F::ZERO, F::ZERO, F::ZERO), + (F::ONE, F::ONE, F::ONE), + (F::ZERO, F::ONE, F::ZERO), + (F::ONE, F::ZERO, F::ZERO), + (F::ZERO, F::NEG_ONE, F::NEG_ONE), + (F::NEG_ONE, F::ZERO, F::NEG_ONE), + (F::INFINITY, F::ZERO, F::ZERO), + (F::NEG_INFINITY, F::ZERO, F::NEG_INFINITY), + (F::NAN, F::ZERO, F::ZERO), + (F::ZERO, F::NAN, F::ZERO), + (F::NAN, F::NAN, F::NAN), + ]; + + for (x, y, res) in cases { + let val = f(x, y); + assert_biteq!(val, res, "fmin({}, {})", Hexf(x), Hexf(y)); + } + } + + #[test] + #[cfg(f16_enabled)] + fn fmin_spec_tests_f16() { + fmin_spec_test::(fminf16); + } + + #[test] + fn fmin_spec_tests_f32() { + fmin_spec_test::(fminf); + } + + #[test] + fn fmin_spec_tests_f64() { + fmin_spec_test::(fmin); + } + + #[test] + #[cfg(f128_enabled)] + fn fmin_spec_tests_f128() { + fmin_spec_test::(fminf128); + } + + fn fmax_spec_test(f: impl Fn(F, F) -> F) { + let cases = [ + (F::ZERO, F::ZERO, F::ZERO), + (F::ONE, F::ONE, F::ONE), + (F::ZERO, F::ONE, F::ONE), + (F::ONE, F::ZERO, F::ONE), + (F::ZERO, F::NEG_ONE, F::ZERO), + (F::NEG_ONE, F::ZERO, F::ZERO), + (F::INFINITY, F::ZERO, F::INFINITY), + (F::NEG_INFINITY, F::ZERO, F::ZERO), + (F::NAN, F::ZERO, F::ZERO), + (F::ZERO, F::NAN, F::ZERO), + (F::NAN, F::NAN, F::NAN), + ]; + + for (x, y, res) in cases { + let val = f(x, y); + assert_biteq!(val, res, "fmax({}, {})", Hexf(x), Hexf(y)); + } + } + + #[test] + #[cfg(f16_enabled)] + fn fmax_spec_tests_f16() { + fmax_spec_test::(fmaxf16); + } + + #[test] + fn fmax_spec_tests_f32() { + fmax_spec_test::(fmaxf); + } + + #[test] + fn fmax_spec_tests_f64() { + fmax_spec_test::(fmax); + } + + #[test] + #[cfg(f128_enabled)] + fn fmax_spec_tests_f128() { + fmax_spec_test::(fmaxf128); + } +} diff --git a/library/compiler-builtins/libm/src/math/fminimum_fmaximum.rs b/library/compiler-builtins/libm/src/math/fminimum_fmaximum.rs new file mode 100644 index 0000000000000..b7999e27392b1 --- /dev/null +++ b/library/compiler-builtins/libm/src/math/fminimum_fmaximum.rs @@ -0,0 +1,163 @@ +/// Return the lesser of two arguments or, if either argument is NaN, the other argument. +/// +/// This coincides with IEEE 754-2019 `minimum`. The result orders -0.0 < 0.0. +#[cfg(f16_enabled)] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fminimumf16(x: f16, y: f16) -> f16 { + super::generic::fminimum(x, y) +} + +/// Return the lesser of two arguments or, if either argument is NaN, the other argument. +/// +/// This coincides with IEEE 754-2019 `minimum`. The result orders -0.0 < 0.0. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fminimum(x: f64, y: f64) -> f64 { + super::generic::fminimum(x, y) +} + +/// Return the lesser of two arguments or, if either argument is NaN, the other argument. +/// +/// This coincides with IEEE 754-2019 `minimum`. The result orders -0.0 < 0.0. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fminimumf(x: f32, y: f32) -> f32 { + super::generic::fminimum(x, y) +} + +/// Return the lesser of two arguments or, if either argument is NaN, the other argument. +/// +/// This coincides with IEEE 754-2019 `minimum`. The result orders -0.0 < 0.0. +#[cfg(f128_enabled)] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fminimumf128(x: f128, y: f128) -> f128 { + super::generic::fminimum(x, y) +} + +/// Return the greater of two arguments or, if either argument is NaN, the other argument. +/// +/// This coincides with IEEE 754-2019 `maximum`. The result orders -0.0 < 0.0. +#[cfg(f16_enabled)] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fmaximumf16(x: f16, y: f16) -> f16 { + super::generic::fmaximum(x, y) +} + +/// Return the greater of two arguments or, if either argument is NaN, the other argument. +/// +/// This coincides with IEEE 754-2019 `maximum`. The result orders -0.0 < 0.0. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fmaximumf(x: f32, y: f32) -> f32 { + super::generic::fmaximum(x, y) +} + +/// Return the greater of two arguments or, if either argument is NaN, the other argument. +/// +/// This coincides with IEEE 754-2019 `maximum`. The result orders -0.0 < 0.0. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fmaximum(x: f64, y: f64) -> f64 { + super::generic::fmaximum(x, y) +} + +/// Return the greater of two arguments or, if either argument is NaN, the other argument. +/// +/// This coincides with IEEE 754-2019 `maximum`. The result orders -0.0 < 0.0. +#[cfg(f128_enabled)] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fmaximumf128(x: f128, y: f128) -> f128 { + super::generic::fmaximum(x, y) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::support::{Float, Hexf}; + + fn fminimum_spec_test(f: impl Fn(F, F) -> F) { + let cases = [ + (F::ZERO, F::ZERO, F::ZERO), + (F::ONE, F::ONE, F::ONE), + (F::ZERO, F::ONE, F::ZERO), + (F::ONE, F::ZERO, F::ZERO), + (F::ZERO, F::NEG_ONE, F::NEG_ONE), + (F::NEG_ONE, F::ZERO, F::NEG_ONE), + (F::INFINITY, F::ZERO, F::ZERO), + (F::NEG_INFINITY, F::ZERO, F::NEG_INFINITY), + (F::NAN, F::ZERO, F::NAN), + (F::ZERO, F::NAN, F::NAN), + (F::NAN, F::NAN, F::NAN), + (F::ZERO, F::NEG_ZERO, F::NEG_ZERO), + (F::NEG_ZERO, F::ZERO, F::NEG_ZERO), + ]; + + for (x, y, res) in cases { + let val = f(x, y); + assert_biteq!(val, res, "fminimum({}, {})", Hexf(x), Hexf(y)); + } + } + + #[test] + #[cfg(f16_enabled)] + fn fminimum_spec_tests_f16() { + fminimum_spec_test::(fminimumf16); + } + + #[test] + fn fminimum_spec_tests_f32() { + fminimum_spec_test::(fminimumf); + } + + #[test] + fn fminimum_spec_tests_f64() { + fminimum_spec_test::(fminimum); + } + + #[test] + #[cfg(f128_enabled)] + fn fminimum_spec_tests_f128() { + fminimum_spec_test::(fminimumf128); + } + + fn fmaximum_spec_test(f: impl Fn(F, F) -> F) { + let cases = [ + (F::ZERO, F::ZERO, F::ZERO), + (F::ONE, F::ONE, F::ONE), + (F::ZERO, F::ONE, F::ONE), + (F::ONE, F::ZERO, F::ONE), + (F::ZERO, F::NEG_ONE, F::ZERO), + (F::NEG_ONE, F::ZERO, F::ZERO), + (F::INFINITY, F::ZERO, F::INFINITY), + (F::NEG_INFINITY, F::ZERO, F::ZERO), + (F::NAN, F::ZERO, F::NAN), + (F::ZERO, F::NAN, F::NAN), + (F::NAN, F::NAN, F::NAN), + (F::ZERO, F::NEG_ZERO, F::ZERO), + (F::NEG_ZERO, F::ZERO, F::ZERO), + ]; + + for (x, y, res) in cases { + let val = f(x, y); + assert_biteq!(val, res, "fmaximum({}, {})", Hexf(x), Hexf(y)); + } + } + + #[test] + #[cfg(f16_enabled)] + fn fmaximum_spec_tests_f16() { + fmaximum_spec_test::(fmaximumf16); + } + + #[test] + fn fmaximum_spec_tests_f32() { + fmaximum_spec_test::(fmaximumf); + } + + #[test] + fn fmaximum_spec_tests_f64() { + fmaximum_spec_test::(fmaximum); + } + + #[test] + #[cfg(f128_enabled)] + fn fmaximum_spec_tests_f128() { + fmaximum_spec_test::(fmaximumf128); + } +} diff --git a/library/compiler-builtins/libm/src/math/fminimum_fmaximum_num.rs b/library/compiler-builtins/libm/src/math/fminimum_fmaximum_num.rs new file mode 100644 index 0000000000000..180d21f72b74c --- /dev/null +++ b/library/compiler-builtins/libm/src/math/fminimum_fmaximum_num.rs @@ -0,0 +1,163 @@ +/// Return the lesser of two arguments or, if either argument is NaN, NaN. +/// +/// This coincides with IEEE 754-2019 `minimumNumber`. The result orders -0.0 < 0.0. +#[cfg(f16_enabled)] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fminimum_numf16(x: f16, y: f16) -> f16 { + super::generic::fminimum_num(x, y) +} + +/// Return the lesser of two arguments or, if either argument is NaN, NaN. +/// +/// This coincides with IEEE 754-2019 `minimumNumber`. The result orders -0.0 < 0.0. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fminimum_numf(x: f32, y: f32) -> f32 { + super::generic::fminimum_num(x, y) +} + +/// Return the lesser of two arguments or, if either argument is NaN, NaN. +/// +/// This coincides with IEEE 754-2019 `minimumNumber`. The result orders -0.0 < 0.0. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fminimum_num(x: f64, y: f64) -> f64 { + super::generic::fminimum_num(x, y) +} + +/// Return the lesser of two arguments or, if either argument is NaN, NaN. +/// +/// This coincides with IEEE 754-2019 `minimumNumber`. The result orders -0.0 < 0.0. +#[cfg(f128_enabled)] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fminimum_numf128(x: f128, y: f128) -> f128 { + super::generic::fminimum_num(x, y) +} + +/// Return the greater of two arguments or, if either argument is NaN, NaN. +/// +/// This coincides with IEEE 754-2019 `maximumNumber`. The result orders -0.0 < 0.0. +#[cfg(f16_enabled)] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fmaximum_numf16(x: f16, y: f16) -> f16 { + super::generic::fmaximum_num(x, y) +} + +/// Return the greater of two arguments or, if either argument is NaN, NaN. +/// +/// This coincides with IEEE 754-2019 `maximumNumber`. The result orders -0.0 < 0.0. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fmaximum_numf(x: f32, y: f32) -> f32 { + super::generic::fmaximum_num(x, y) +} + +/// Return the greater of two arguments or, if either argument is NaN, NaN. +/// +/// This coincides with IEEE 754-2019 `maximumNumber`. The result orders -0.0 < 0.0. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fmaximum_num(x: f64, y: f64) -> f64 { + super::generic::fmaximum_num(x, y) +} + +/// Return the greater of two arguments or, if either argument is NaN, NaN. +/// +/// This coincides with IEEE 754-2019 `maximumNumber`. The result orders -0.0 < 0.0. +#[cfg(f128_enabled)] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fmaximum_numf128(x: f128, y: f128) -> f128 { + super::generic::fmaximum_num(x, y) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::support::{Float, Hexf}; + + fn fminimum_num_spec_test(f: impl Fn(F, F) -> F) { + let cases = [ + (F::ZERO, F::ZERO, F::ZERO), + (F::ONE, F::ONE, F::ONE), + (F::ZERO, F::ONE, F::ZERO), + (F::ONE, F::ZERO, F::ZERO), + (F::ZERO, F::NEG_ONE, F::NEG_ONE), + (F::NEG_ONE, F::ZERO, F::NEG_ONE), + (F::INFINITY, F::ZERO, F::ZERO), + (F::NEG_INFINITY, F::ZERO, F::NEG_INFINITY), + (F::NAN, F::ZERO, F::ZERO), + (F::ZERO, F::NAN, F::ZERO), + (F::NAN, F::NAN, F::NAN), + (F::ZERO, F::NEG_ZERO, F::NEG_ZERO), + (F::NEG_ZERO, F::ZERO, F::NEG_ZERO), + ]; + + for (x, y, res) in cases { + let val = f(x, y); + assert_biteq!(val, res, "fminimum_num({}, {})", Hexf(x), Hexf(y)); + } + } + + #[test] + #[cfg(f16_enabled)] + fn fminimum_num_spec_tests_f16() { + fminimum_num_spec_test::(fminimum_numf16); + } + + #[test] + fn fminimum_num_spec_tests_f32() { + fminimum_num_spec_test::(fminimum_numf); + } + + #[test] + fn fminimum_num_spec_tests_f64() { + fminimum_num_spec_test::(fminimum_num); + } + + #[test] + #[cfg(f128_enabled)] + fn fminimum_num_spec_tests_f128() { + fminimum_num_spec_test::(fminimum_numf128); + } + + fn fmaximum_num_spec_test(f: impl Fn(F, F) -> F) { + let cases = [ + (F::ZERO, F::ZERO, F::ZERO), + (F::ONE, F::ONE, F::ONE), + (F::ZERO, F::ONE, F::ONE), + (F::ONE, F::ZERO, F::ONE), + (F::ZERO, F::NEG_ONE, F::ZERO), + (F::NEG_ONE, F::ZERO, F::ZERO), + (F::INFINITY, F::ZERO, F::INFINITY), + (F::NEG_INFINITY, F::ZERO, F::ZERO), + (F::NAN, F::ZERO, F::ZERO), + (F::ZERO, F::NAN, F::ZERO), + (F::NAN, F::NAN, F::NAN), + (F::ZERO, F::NEG_ZERO, F::ZERO), + (F::NEG_ZERO, F::ZERO, F::ZERO), + ]; + + for (x, y, res) in cases { + let val = f(x, y); + assert_biteq!(val, res, "fmaximum_num({}, {})", Hexf(x), Hexf(y)); + } + } + + #[test] + #[cfg(f16_enabled)] + fn fmaximum_num_spec_tests_f16() { + fmaximum_num_spec_test::(fmaximum_numf16); + } + + #[test] + fn fmaximum_num_spec_tests_f32() { + fmaximum_num_spec_test::(fmaximum_numf); + } + + #[test] + fn fmaximum_num_spec_tests_f64() { + fmaximum_num_spec_test::(fmaximum_num); + } + + #[test] + #[cfg(f128_enabled)] + fn fmaximum_num_spec_tests_f128() { + fmaximum_num_spec_test::(fmaximum_numf128); + } +} diff --git a/library/compiler-builtins/libm/src/math/fmod.rs b/library/compiler-builtins/libm/src/math/fmod.rs new file mode 100644 index 0000000000000..c4752b92578fc --- /dev/null +++ b/library/compiler-builtins/libm/src/math/fmod.rs @@ -0,0 +1,25 @@ +/// Calculate the remainder of `x / y`, the precise result of `x - trunc(x / y) * y`. +#[cfg(f16_enabled)] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fmodf16(x: f16, y: f16) -> f16 { + super::generic::fmod(x, y) +} + +/// Calculate the remainder of `x / y`, the precise result of `x - trunc(x / y) * y`. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fmodf(x: f32, y: f32) -> f32 { + super::generic::fmod(x, y) +} + +/// Calculate the remainder of `x / y`, the precise result of `x - trunc(x / y) * y`. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fmod(x: f64, y: f64) -> f64 { + super::generic::fmod(x, y) +} + +/// Calculate the remainder of `x / y`, the precise result of `x - trunc(x / y) * y`. +#[cfg(f128_enabled)] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fmodf128(x: f128, y: f128) -> f128 { + super::generic::fmod(x, y) +} diff --git a/library/compiler-builtins/libm/src/math/fmodf.rs b/library/compiler-builtins/libm/src/math/fmodf.rs new file mode 100644 index 0000000000000..4e95696e20d63 --- /dev/null +++ b/library/compiler-builtins/libm/src/math/fmodf.rs @@ -0,0 +1,5 @@ +/// Calculate the remainder of `x / y`, the precise result of `x - trunc(x / y) * y`. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fmodf(x: f32, y: f32) -> f32 { + super::generic::fmod(x, y) +} diff --git a/library/compiler-builtins/libm/src/math/fmodf128.rs b/library/compiler-builtins/libm/src/math/fmodf128.rs new file mode 100644 index 0000000000000..ff0e0493e26b6 --- /dev/null +++ b/library/compiler-builtins/libm/src/math/fmodf128.rs @@ -0,0 +1,5 @@ +/// Calculate the remainder of `x / y`, the precise result of `x - trunc(x / y) * y`. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fmodf128(x: f128, y: f128) -> f128 { + super::generic::fmod(x, y) +} diff --git a/library/compiler-builtins/libm/src/math/fmodf16.rs b/library/compiler-builtins/libm/src/math/fmodf16.rs new file mode 100644 index 0000000000000..11972a7de4ff0 --- /dev/null +++ b/library/compiler-builtins/libm/src/math/fmodf16.rs @@ -0,0 +1,5 @@ +/// Calculate the remainder of `x / y`, the precise result of `x - trunc(x / y) * y`. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fmodf16(x: f16, y: f16) -> f16 { + super::generic::fmod(x, y) +} diff --git a/library/compiler-builtins/libm/src/math/frexp.rs b/library/compiler-builtins/libm/src/math/frexp.rs new file mode 100644 index 0000000000000..de7a64fdae1af --- /dev/null +++ b/library/compiler-builtins/libm/src/math/frexp.rs @@ -0,0 +1,21 @@ +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn frexp(x: f64) -> (f64, i32) { + let mut y = x.to_bits(); + let ee = ((y >> 52) & 0x7ff) as i32; + + if ee == 0 { + if x != 0.0 { + let x1p64 = f64::from_bits(0x43f0000000000000); + let (x, e) = frexp(x * x1p64); + return (x, e - 64); + } + return (x, 0); + } else if ee == 0x7ff { + return (x, 0); + } + + let e = ee - 0x3fe; + y &= 0x800fffffffffffff; + y |= 0x3fe0000000000000; + return (f64::from_bits(y), e); +} diff --git a/library/compiler-builtins/libm/src/math/frexpf.rs b/library/compiler-builtins/libm/src/math/frexpf.rs new file mode 100644 index 0000000000000..0ec91c2d3507d --- /dev/null +++ b/library/compiler-builtins/libm/src/math/frexpf.rs @@ -0,0 +1,22 @@ +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn frexpf(x: f32) -> (f32, i32) { + let mut y = x.to_bits(); + let ee: i32 = ((y >> 23) & 0xff) as i32; + + if ee == 0 { + if x != 0.0 { + let x1p64 = f32::from_bits(0x5f800000); + let (x, e) = frexpf(x * x1p64); + return (x, e - 64); + } else { + return (x, 0); + } + } else if ee == 0xff { + return (x, 0); + } + + let e = ee - 0x7e; + y &= 0x807fffff; + y |= 0x3f000000; + (f32::from_bits(y), e) +} diff --git a/library/compiler-builtins/libm/src/math/generic/ceil.rs b/library/compiler-builtins/libm/src/math/generic/ceil.rs new file mode 100644 index 0000000000000..1072ba7c29b63 --- /dev/null +++ b/library/compiler-builtins/libm/src/math/generic/ceil.rs @@ -0,0 +1,174 @@ +/* SPDX-License-Identifier: MIT */ +/* origin: musl src/math/ceilf.c */ + +//! Generic `ceil` algorithm. +//! +//! Note that this uses the algorithm from musl's `ceilf` rather than `ceil` or `ceill` because +//! performance seems to be better (based on icount) and it does not seem to experience rounding +//! errors on i386. + +use crate::support::{Float, FpResult, Int, IntTy, MinInt, Status}; + +#[inline] +pub fn ceil(x: F) -> F { + ceil_status(x).val +} + +#[inline] +pub fn ceil_status(x: F) -> FpResult { + let zero = IntTy::::ZERO; + + let mut ix = x.to_bits(); + let e = x.exp_unbiased(); + + // If the represented value has no fractional part, no truncation is needed. + if e >= F::SIG_BITS as i32 { + return FpResult::ok(x); + } + + let status; + let res = if e >= 0 { + // |x| >= 1.0 + let m = F::SIG_MASK >> e.unsigned(); + if (ix & m) == zero { + // Portion to be masked is already zero; no adjustment needed. + return FpResult::ok(x); + } + + // Otherwise, raise an inexact exception. + status = Status::INEXACT; + + if x.is_sign_positive() { + ix += m; + } + + ix &= !m; + F::from_bits(ix) + } else { + // |x| < 1.0, raise an inexact exception since truncation will happen (unless x == 0). + if ix & F::SIG_MASK == F::Int::ZERO { + status = Status::OK; + } else { + status = Status::INEXACT; + } + + if x.is_sign_negative() { + // -1.0 < x <= -0.0; rounding up goes toward -0.0. + F::NEG_ZERO + } else if ix << 1 != zero { + // 0.0 < x < 1.0; rounding up goes toward +1.0. + F::ONE + } else { + // +0.0 remains unchanged + x + } + }; + + FpResult::new(res, status) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::support::Hexf; + + /// Test against https://en.cppreference.com/w/cpp/numeric/math/ceil + fn spec_test(cases: &[(F, F, Status)]) { + let roundtrip = [ + F::ZERO, + F::ONE, + F::NEG_ONE, + F::NEG_ZERO, + F::INFINITY, + F::NEG_INFINITY, + ]; + + for x in roundtrip { + let FpResult { val, status } = ceil_status(x); + assert_biteq!(val, x, "{}", Hexf(x)); + assert_eq!(status, Status::OK, "{}", Hexf(x)); + } + + for &(x, res, res_stat) in cases { + let FpResult { val, status } = ceil_status(x); + assert_biteq!(val, res, "{}", Hexf(x)); + assert_eq!(status, res_stat, "{}", Hexf(x)); + } + } + + /* Skipping f16 / f128 "sanity_check"s due to rejected literal lexing at MSRV */ + + #[test] + #[cfg(f16_enabled)] + fn spec_tests_f16() { + let cases = [ + (0.1, 1.0, Status::INEXACT), + (-0.1, -0.0, Status::INEXACT), + (0.9, 1.0, Status::INEXACT), + (-0.9, -0.0, Status::INEXACT), + (1.1, 2.0, Status::INEXACT), + (-1.1, -1.0, Status::INEXACT), + (1.9, 2.0, Status::INEXACT), + (-1.9, -1.0, Status::INEXACT), + ]; + spec_test::(&cases); + } + + #[test] + fn sanity_check_f32() { + assert_eq!(ceil(1.1f32), 2.0); + assert_eq!(ceil(2.9f32), 3.0); + } + + #[test] + fn spec_tests_f32() { + let cases = [ + (0.1, 1.0, Status::INEXACT), + (-0.1, -0.0, Status::INEXACT), + (0.9, 1.0, Status::INEXACT), + (-0.9, -0.0, Status::INEXACT), + (1.1, 2.0, Status::INEXACT), + (-1.1, -1.0, Status::INEXACT), + (1.9, 2.0, Status::INEXACT), + (-1.9, -1.0, Status::INEXACT), + ]; + spec_test::(&cases); + } + + #[test] + fn sanity_check_f64() { + assert_eq!(ceil(1.1f64), 2.0); + assert_eq!(ceil(2.9f64), 3.0); + } + + #[test] + fn spec_tests_f64() { + let cases = [ + (0.1, 1.0, Status::INEXACT), + (-0.1, -0.0, Status::INEXACT), + (0.9, 1.0, Status::INEXACT), + (-0.9, -0.0, Status::INEXACT), + (1.1, 2.0, Status::INEXACT), + (-1.1, -1.0, Status::INEXACT), + (1.9, 2.0, Status::INEXACT), + (-1.9, -1.0, Status::INEXACT), + ]; + spec_test::(&cases); + } + + #[test] + #[cfg(f128_enabled)] + fn spec_tests_f128() { + let cases = [ + (0.1, 1.0, Status::INEXACT), + (-0.1, -0.0, Status::INEXACT), + (0.9, 1.0, Status::INEXACT), + (-0.9, -0.0, Status::INEXACT), + (1.1, 2.0, Status::INEXACT), + (-1.1, -1.0, Status::INEXACT), + (1.9, 2.0, Status::INEXACT), + (-1.9, -1.0, Status::INEXACT), + ]; + spec_test::(&cases); + } +} diff --git a/library/compiler-builtins/libm/src/math/generic/copysign.rs b/library/compiler-builtins/libm/src/math/generic/copysign.rs new file mode 100644 index 0000000000000..da9ce38788521 --- /dev/null +++ b/library/compiler-builtins/libm/src/math/generic/copysign.rs @@ -0,0 +1,11 @@ +use crate::support::Float; + +/// Copy the sign of `y` to `x`. +#[inline] +pub fn copysign(x: F, y: F) -> F { + let mut ux = x.to_bits(); + let uy = y.to_bits(); + ux &= !F::SIGN_MASK; + ux |= uy & F::SIGN_MASK; + F::from_bits(ux) +} diff --git a/library/compiler-builtins/libm/src/math/generic/fabs.rs b/library/compiler-builtins/libm/src/math/generic/fabs.rs new file mode 100644 index 0000000000000..0adfa57d91b33 --- /dev/null +++ b/library/compiler-builtins/libm/src/math/generic/fabs.rs @@ -0,0 +1,8 @@ +use crate::support::Float; + +/// Absolute value. +#[inline] +pub fn fabs(x: F) -> F { + let abs_mask = !F::SIGN_MASK; + F::from_bits(x.to_bits() & abs_mask) +} diff --git a/library/compiler-builtins/libm/src/math/generic/fdim.rs b/library/compiler-builtins/libm/src/math/generic/fdim.rs new file mode 100644 index 0000000000000..289e5fd96f86a --- /dev/null +++ b/library/compiler-builtins/libm/src/math/generic/fdim.rs @@ -0,0 +1,6 @@ +use crate::support::Float; + +#[inline] +pub fn fdim(x: F, y: F) -> F { + if x <= y { F::ZERO } else { x - y } +} diff --git a/library/compiler-builtins/libm/src/math/generic/floor.rs b/library/compiler-builtins/libm/src/math/generic/floor.rs new file mode 100644 index 0000000000000..e6dfd8866a425 --- /dev/null +++ b/library/compiler-builtins/libm/src/math/generic/floor.rs @@ -0,0 +1,157 @@ +/* SPDX-License-Identifier: MIT + * origin: musl src/math/floor.c */ + +//! Generic `floor` algorithm. +//! +//! Note that this uses the algorithm from musl's `floorf` rather than `floor` or `floorl` because +//! performance seems to be better (based on icount) and it does not seem to experience rounding +//! errors on i386. + +use crate::support::{Float, FpResult, Int, IntTy, MinInt, Status}; + +#[inline] +pub fn floor(x: F) -> F { + floor_status(x).val +} + +#[inline] +pub fn floor_status(x: F) -> FpResult { + let zero = IntTy::::ZERO; + + let mut ix = x.to_bits(); + let e = x.exp_unbiased(); + + // If the represented value has no fractional part, no truncation is needed. + if e >= F::SIG_BITS as i32 { + return FpResult::ok(x); + } + + let status; + let res = if e >= 0 { + // |x| >= 1.0 + let m = F::SIG_MASK >> e.unsigned(); + if ix & m == zero { + // Portion to be masked is already zero; no adjustment needed. + return FpResult::ok(x); + } + + // Otherwise, raise an inexact exception. + status = Status::INEXACT; + + if x.is_sign_negative() { + ix += m; + } + + ix &= !m; + F::from_bits(ix) + } else { + // |x| < 1.0, raise an inexact exception since truncation will happen. + if ix & F::SIG_MASK == F::Int::ZERO { + status = Status::OK; + } else { + status = Status::INEXACT; + } + + if x.is_sign_positive() { + // 0.0 <= x < 1.0; rounding down goes toward +0.0. + F::ZERO + } else if ix << 1 != zero { + // -1.0 < x < 0.0; rounding down goes toward -1.0. + F::NEG_ONE + } else { + // -0.0 remains unchanged + x + } + }; + + FpResult::new(res, status) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::support::Hexf; + + /// Test against https://en.cppreference.com/w/cpp/numeric/math/floor + fn spec_test(cases: &[(F, F, Status)]) { + let roundtrip = [ + F::ZERO, + F::ONE, + F::NEG_ONE, + F::NEG_ZERO, + F::INFINITY, + F::NEG_INFINITY, + ]; + + for x in roundtrip { + let FpResult { val, status } = floor_status(x); + assert_biteq!(val, x, "{}", Hexf(x)); + assert_eq!(status, Status::OK, "{}", Hexf(x)); + } + + for &(x, res, res_stat) in cases { + let FpResult { val, status } = floor_status(x); + assert_biteq!(val, res, "{}", Hexf(x)); + assert_eq!(status, res_stat, "{}", Hexf(x)); + } + } + + /* Skipping f16 / f128 "sanity_check"s and spec cases due to rejected literal lexing at MSRV */ + + #[test] + #[cfg(f16_enabled)] + fn spec_tests_f16() { + let cases = []; + spec_test::(&cases); + } + + #[test] + fn sanity_check_f32() { + assert_eq!(floor(0.5f32), 0.0); + assert_eq!(floor(1.1f32), 1.0); + assert_eq!(floor(2.9f32), 2.0); + } + + #[test] + fn spec_tests_f32() { + let cases = [ + (0.1, 0.0, Status::INEXACT), + (-0.1, -1.0, Status::INEXACT), + (0.9, 0.0, Status::INEXACT), + (-0.9, -1.0, Status::INEXACT), + (1.1, 1.0, Status::INEXACT), + (-1.1, -2.0, Status::INEXACT), + (1.9, 1.0, Status::INEXACT), + (-1.9, -2.0, Status::INEXACT), + ]; + spec_test::(&cases); + } + + #[test] + fn sanity_check_f64() { + assert_eq!(floor(1.1f64), 1.0); + assert_eq!(floor(2.9f64), 2.0); + } + + #[test] + fn spec_tests_f64() { + let cases = [ + (0.1, 0.0, Status::INEXACT), + (-0.1, -1.0, Status::INEXACT), + (0.9, 0.0, Status::INEXACT), + (-0.9, -1.0, Status::INEXACT), + (1.1, 1.0, Status::INEXACT), + (-1.1, -2.0, Status::INEXACT), + (1.9, 1.0, Status::INEXACT), + (-1.9, -2.0, Status::INEXACT), + ]; + spec_test::(&cases); + } + + #[test] + #[cfg(f128_enabled)] + fn spec_tests_f128() { + let cases = []; + spec_test::(&cases); + } +} diff --git a/library/compiler-builtins/libm/src/math/generic/fma.rs b/library/compiler-builtins/libm/src/math/generic/fma.rs new file mode 100644 index 0000000000000..aaf459d1b6147 --- /dev/null +++ b/library/compiler-builtins/libm/src/math/generic/fma.rs @@ -0,0 +1,278 @@ +/* SPDX-License-Identifier: MIT */ +/* origin: musl src/math/fma.c. Ported to generic Rust algorithm in 2025, TG. */ + +use crate::support::{ + CastFrom, CastInto, DInt, Float, FpResult, HInt, Int, IntTy, MinInt, Round, Status, +}; + +/// Fused multiply-add that works when there is not a larger float size available. Computes +/// `(x * y) + z`. +#[inline] +pub fn fma_round(x: F, y: F, z: F, _round: Round) -> FpResult +where + F: Float, + F: CastFrom, + F: CastFrom, + F::Int: HInt, + u32: CastInto, +{ + let one = IntTy::::ONE; + let zero = IntTy::::ZERO; + + // Normalize such that the top of the mantissa is zero and we have a guard bit. + let nx = Norm::from_float(x); + let ny = Norm::from_float(y); + let nz = Norm::from_float(z); + + if nx.is_zero_nan_inf() || ny.is_zero_nan_inf() { + // Value will overflow, defer to non-fused operations. + return FpResult::ok(x * y + z); + } + + if nz.is_zero_nan_inf() { + if nz.is_zero() { + // Empty add component means we only need to multiply. + return FpResult::ok(x * y); + } + // `z` is NaN or infinity, which sets the result. + return FpResult::ok(z); + } + + // multiply: r = x * y + let zhi: F::Int; + let zlo: F::Int; + let (mut rlo, mut rhi) = nx.m.widen_mul(ny.m).lo_hi(); + + // Exponent result of multiplication + let mut e: i32 = nx.e + ny.e; + // Needed shift to align `z` to the multiplication result + let mut d: i32 = nz.e - e; + let sbits = F::BITS as i32; + + // Scale `z`. Shift `z <<= kz`, `r >>= kr`, so `kz+kr == d`, set `e = e+kr` (== ez-kz) + if d > 0 { + // The magnitude of `z` is larger than `x * y` + if d < sbits { + // Maximum shift of one `F::BITS` means shifted `z` will fit into `2 * F::BITS`. Shift + // it into `(zhi, zlo)`. No exponent adjustment necessary. + zlo = nz.m << d; + zhi = nz.m >> (sbits - d); + } else { + // Shift larger than `sbits`, `z` only needs the top half `zhi`. Place it there (acts + // as a shift by `sbits`). + zlo = zero; + zhi = nz.m; + d -= sbits; + + // `z`'s exponent is large enough that it now needs to be taken into account. + e = nz.e - sbits; + + if d == 0 { + // Exactly `sbits`, nothing to do + } else if d < sbits { + // Remaining shift fits within `sbits`. Leave `z` in place, shift `x * y` + rlo = (rhi << (sbits - d)) | (rlo >> d); + // Set the sticky bit + rlo |= IntTy::::from((rlo << (sbits - d)) != zero); + rhi = rhi >> d; + } else { + // `z`'s magnitude is enough that `x * y` is irrelevant. It was nonzero, so set + // the sticky bit. + rlo = one; + rhi = zero; + } + } + } else { + // `z`'s magnitude once shifted fits entirely within `zlo` + zhi = zero; + d = -d; + if d == 0 { + // No shift needed + zlo = nz.m; + } else if d < sbits { + // Shift s.t. `nz.m` fits into `zlo` + let sticky = IntTy::::from((nz.m << (sbits - d)) != zero); + zlo = (nz.m >> d) | sticky; + } else { + // Would be entirely shifted out, only set the sticky bit + zlo = one; + } + } + + /* addition */ + + let mut neg = nx.neg ^ ny.neg; + let samesign: bool = !neg ^ nz.neg; + let mut rhi_nonzero = true; + + if samesign { + // r += z + rlo = rlo.wrapping_add(zlo); + rhi += zhi + IntTy::::from(rlo < zlo); + } else { + // r -= z + let (res, borrow) = rlo.overflowing_sub(zlo); + rlo = res; + rhi = rhi.wrapping_sub(zhi.wrapping_add(IntTy::::from(borrow))); + if (rhi >> (F::BITS - 1)) != zero { + rlo = rlo.signed().wrapping_neg().unsigned(); + rhi = rhi.signed().wrapping_neg().unsigned() - IntTy::::from(rlo != zero); + neg = !neg; + } + rhi_nonzero = rhi != zero; + } + + /* Construct result */ + + // Shift result into `rhi`, left-aligned. Last bit is sticky + if rhi_nonzero { + // `d` > 0, need to shift both `rhi` and `rlo` into result + e += sbits; + d = rhi.leading_zeros() as i32 - 1; + rhi = (rhi << d) | (rlo >> (sbits - d)); + // Update sticky + rhi |= IntTy::::from((rlo << d) != zero); + } else if rlo != zero { + // `rhi` is zero, `rlo` is the entire result and needs to be shifted + d = rlo.leading_zeros() as i32 - 1; + if d < 0 { + // Shift and set sticky + rhi = (rlo >> 1) | (rlo & one); + } else { + rhi = rlo << d; + } + } else { + // exact +/- 0.0 + return FpResult::ok(x * y + z); + } + + e -= d; + + // Use int->float conversion to populate the significand. + // i is in [1 << (BITS - 2), (1 << (BITS - 1)) - 1] + let mut i: F::SignedInt = rhi.signed(); + + if neg { + i = -i; + } + + // `|r|` is in `[0x1p62,0x1p63]` for `f64` + let mut r: F = F::cast_from_lossy(i); + + /* Account for subnormal and rounding */ + + // Unbiased exponent for the maximum value of `r` + let max_pow = F::BITS - 1 + F::EXP_BIAS; + + let mut status = Status::OK; + + if e < -(max_pow as i32 - 2) { + // Result is subnormal before rounding + if e == -(max_pow as i32 - 1) { + let mut c = F::from_parts(false, max_pow, zero); + if neg { + c = -c; + } + + if r == c { + // Min normal after rounding, + status.set_underflow(true); + r = F::MIN_POSITIVE_NORMAL.copysign(r); + return FpResult::new(r, status); + } + + if (rhi << (F::SIG_BITS + 1)) != zero { + // Account for truncated bits. One bit will be lost in the `scalbn` call, add + // another top bit to avoid double rounding if inexact. + let iu: F::Int = (rhi >> 1) | (rhi & one) | (one << (F::BITS - 2)); + i = iu.signed(); + + if neg { + i = -i; + } + + r = F::cast_from_lossy(i); + + // Remove the top bit + r = F::cast_from(2i8) * r - c; + status.set_underflow(true); + } + } else { + // Only round once when scaled + d = F::EXP_BITS as i32 - 1; + let sticky = IntTy::::from(rhi << (F::BITS as i32 - d) != zero); + i = (((rhi >> d) | sticky) << d).signed(); + + if neg { + i = -i; + } + + r = F::cast_from_lossy(i); + } + } + + // Use our exponent to scale the final value. + FpResult::new(super::scalbn(r, e), status) +} + +/// Representation of `F` that has handled subnormals. +#[derive(Clone, Copy, Debug)] +struct Norm { + /// Normalized significand with one guard bit, unsigned. + m: F::Int, + /// Exponent of the mantissa such that `m * 2^e = x`. Accounts for the shift in the mantissa + /// and the guard bit; that is, 1.0 will normalize as `m = 1 << 53` and `e = -53`. + e: i32, + neg: bool, +} + +impl Norm { + /// Unbias the exponent and account for the mantissa's precision, including the guard bit. + const EXP_UNBIAS: u32 = F::EXP_BIAS + F::SIG_BITS + 1; + + /// Values greater than this had a saturated exponent (infinity or NaN), OR were zero and we + /// adjusted the exponent such that it exceeds this threashold. + const ZERO_INF_NAN: u32 = F::EXP_SAT - Self::EXP_UNBIAS; + + fn from_float(x: F) -> Self { + let mut ix = x.to_bits(); + let mut e = x.ex() as i32; + let neg = x.is_sign_negative(); + if e == 0 { + // Normalize subnormals by multiplication + let scale_i = F::BITS - 1; + let scale_f = F::from_parts(false, scale_i + F::EXP_BIAS, F::Int::ZERO); + let scaled = x * scale_f; + ix = scaled.to_bits(); + e = scaled.ex() as i32; + e = if e == 0 { + // If the exponent is still zero, the input was zero. Artifically set this value + // such that the final `e` will exceed `ZERO_INF_NAN`. + 1 << F::EXP_BITS + } else { + // Otherwise, account for the scaling we just did. + e - scale_i as i32 + }; + } + + e -= Self::EXP_UNBIAS as i32; + + // Absolute value, set the implicit bit, and shift to create a guard bit + ix &= F::SIG_MASK; + ix |= F::IMPLICIT_BIT; + ix <<= 1; + + Self { m: ix, e, neg } + } + + /// True if the value was zero, infinity, or NaN. + fn is_zero_nan_inf(self) -> bool { + self.e >= Self::ZERO_INF_NAN as i32 + } + + /// The only value we have + fn is_zero(self) -> bool { + // The only exponent that strictly exceeds this value is our sentinel value for zero. + self.e > Self::ZERO_INF_NAN as i32 + } +} diff --git a/library/compiler-builtins/libm/src/math/generic/fma_wide.rs b/library/compiler-builtins/libm/src/math/generic/fma_wide.rs new file mode 100644 index 0000000000000..a2ef59d3e3d6f --- /dev/null +++ b/library/compiler-builtins/libm/src/math/generic/fma_wide.rs @@ -0,0 +1,73 @@ +use crate::support::{ + CastFrom, CastInto, DFloat, Float, FpResult, HFloat, IntTy, MinInt, Round, Status, +}; + +/// Fma implementation when a hardware-backed larger float type is available. For `f32` and `f64`, +/// `f64` has enough precision to represent the `f32` in its entirety, except for double rounding. +#[inline] +pub fn fma_wide_round(x: F, y: F, z: F, round: Round) -> FpResult +where + F: Float + HFloat, + B: Float + DFloat, + B::Int: CastInto, + i32: CastFrom, +{ + let one = IntTy::::ONE; + + let xy: B = x.widen() * y.widen(); + let mut result: B = xy + z.widen(); + let mut ui: B::Int = result.to_bits(); + let re = result.ex(); + let zb: B = z.widen(); + + let prec_diff = B::SIG_BITS - F::SIG_BITS; + let excess_prec = ui & ((one << prec_diff) - one); + let halfway = one << (prec_diff - 1); + + // Common case: the larger precision is fine if... + // This is not a halfway case + if excess_prec != halfway + // Or the result is NaN + || re == B::EXP_SAT + // Or the result is exact + || (result - xy == zb && result - zb == xy) + // Or the mode is something other than round to nearest + || round != Round::Nearest + { + let min_inexact_exp = (B::EXP_BIAS as i32 + F::EXP_MIN_SUBNORM) as u32; + let max_inexact_exp = (B::EXP_BIAS as i32 + F::EXP_MIN) as u32; + + let mut status = Status::OK; + + if (min_inexact_exp..max_inexact_exp).contains(&re) && status.inexact() { + // This branch is never hit; requires previous operations to set a status + status.set_inexact(false); + + result = xy + z.widen(); + if status.inexact() { + status.set_underflow(true); + } else { + status.set_inexact(true); + } + } + + return FpResult { + val: result.narrow(), + status, + }; + } + + let neg = ui >> (B::BITS - 1) != IntTy::::ZERO; + let err = if neg == (zb > xy) { + xy - result + zb + } else { + zb - result + xy + }; + if neg == (err < B::ZERO) { + ui += one; + } else { + ui -= one; + } + + FpResult::ok(B::from_bits(ui).narrow()) +} diff --git a/library/compiler-builtins/libm/src/math/generic/fmax.rs b/library/compiler-builtins/libm/src/math/generic/fmax.rs new file mode 100644 index 0000000000000..54207e4b3285f --- /dev/null +++ b/library/compiler-builtins/libm/src/math/generic/fmax.rs @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: MIT OR Apache-2.0 */ +//! IEEE 754-2011 `maxNum`. This has been superseded by IEEE 754-2019 `maximumNumber`. +//! +//! Per the spec, returns the canonicalized result of: +//! - `x` if `x > y` +//! - `y` if `y > x` +//! - The other number if one is NaN +//! - Otherwise, either `x` or `y`, canonicalized +//! - -0.0 and +0.0 may be disregarded (unlike newer operations) +//! +//! Excluded from our implementation is sNaN handling. +//! +//! More on the differences: [link]. +//! +//! [link]: https://grouper.ieee.org/groups/msc/ANSI_IEEE-Std-754-2019/background/minNum_maxNum_Removal_Demotion_v3.pdf + +use crate::support::Float; + +#[inline] +pub fn fmax(x: F, y: F) -> F { + let res = if x.is_nan() || x < y { y } else { x }; + // Canonicalize + res * F::ONE +} diff --git a/library/compiler-builtins/libm/src/math/generic/fmaximum.rs b/library/compiler-builtins/libm/src/math/generic/fmaximum.rs new file mode 100644 index 0000000000000..4b6295bc0c6bc --- /dev/null +++ b/library/compiler-builtins/libm/src/math/generic/fmaximum.rs @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: MIT OR Apache-2.0 */ +//! IEEE 754-2019 `maximum`. +//! +//! Per the spec, returns the canonicalized result of: +//! - `x` if `x > y` +//! - `y` if `y > x` +//! - qNaN if either operation is NaN +//! - Logic following +0.0 > -0.0 +//! +//! Excluded from our implementation is sNaN handling. + +use crate::support::Float; + +#[inline] +pub fn fmaximum(x: F, y: F) -> F { + let res = if x.is_nan() { + x + } else if y.is_nan() { + y + } else if x > y || (y.to_bits() == F::NEG_ZERO.to_bits() && x.is_sign_positive()) { + x + } else { + y + }; + + // Canonicalize + res * F::ONE +} diff --git a/library/compiler-builtins/libm/src/math/generic/fmaximum_num.rs b/library/compiler-builtins/libm/src/math/generic/fmaximum_num.rs new file mode 100644 index 0000000000000..2e97ff6d36905 --- /dev/null +++ b/library/compiler-builtins/libm/src/math/generic/fmaximum_num.rs @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: MIT OR Apache-2.0 */ +//! IEEE 754-2019 `maximumNumber`. +//! +//! Per the spec, returns: +//! - `x` if `x > y` +//! - `y` if `y > x` +//! - Non-NaN if one operand is NaN +//! - Logic following +0.0 > -0.0 +//! - Either `x` or `y` if `x == y` and the signs are the same +//! - qNaN if either operand is a NaN +//! +//! Excluded from our implementation is sNaN handling. + +use crate::support::Float; + +#[inline] +pub fn fmaximum_num(x: F, y: F) -> F { + let res = + if x.is_nan() || x < y || (x.to_bits() == F::NEG_ZERO.to_bits() && y.is_sign_positive()) { + y + } else { + x + }; + + // Canonicalize + res * F::ONE +} diff --git a/library/compiler-builtins/libm/src/math/generic/fmin.rs b/library/compiler-builtins/libm/src/math/generic/fmin.rs new file mode 100644 index 0000000000000..0f86364d230b1 --- /dev/null +++ b/library/compiler-builtins/libm/src/math/generic/fmin.rs @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: MIT OR Apache-2.0 */ +//! IEEE 754-2008 `minNum`. This has been superseded by IEEE 754-2019 `minimumNumber`. +//! +//! Per the spec, returns the canonicalized result of: +//! - `x` if `x < y` +//! - `y` if `y < x` +//! - The other number if one is NaN +//! - Otherwise, either `x` or `y`, canonicalized +//! - -0.0 and +0.0 may be disregarded (unlike newer operations) +//! +//! Excluded from our implementation is sNaN handling. +//! +//! More on the differences: [link]. +//! +//! [link]: https://grouper.ieee.org/groups/msc/ANSI_IEEE-Std-754-2019/background/minNum_maxNum_Removal_Demotion_v3.pdf + +use crate::support::Float; + +#[inline] +pub fn fmin(x: F, y: F) -> F { + let res = if y.is_nan() || x < y { x } else { y }; + // Canonicalize + res * F::ONE +} diff --git a/library/compiler-builtins/libm/src/math/generic/fminimum.rs b/library/compiler-builtins/libm/src/math/generic/fminimum.rs new file mode 100644 index 0000000000000..9dc0b64be3f2c --- /dev/null +++ b/library/compiler-builtins/libm/src/math/generic/fminimum.rs @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: MIT OR Apache-2.0 */ +//! IEEE 754-2019 `minimum`. +//! +//! Per the spec, returns the canonicalized result of: +//! - `x` if `x < y` +//! - `y` if `y < x` +//! - qNaN if either operation is NaN +//! - Logic following +0.0 > -0.0 +//! +//! Excluded from our implementation is sNaN handling. + +use crate::support::Float; + +#[inline] +pub fn fminimum(x: F, y: F) -> F { + let res = if x.is_nan() { + x + } else if y.is_nan() { + y + } else if x < y || (x.to_bits() == F::NEG_ZERO.to_bits() && y.is_sign_positive()) { + x + } else { + y + }; + + // Canonicalize + res * F::ONE +} diff --git a/library/compiler-builtins/libm/src/math/generic/fminimum_num.rs b/library/compiler-builtins/libm/src/math/generic/fminimum_num.rs new file mode 100644 index 0000000000000..40db8b18957bc --- /dev/null +++ b/library/compiler-builtins/libm/src/math/generic/fminimum_num.rs @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: MIT OR Apache-2.0 */ +//! IEEE 754-2019 `minimum`. +//! +//! Per the spec, returns: +//! - `x` if `x < y` +//! - `y` if `y < x` +//! - Non-NaN if one operand is NaN +//! - Logic following +0.0 > -0.0 +//! - Either `x` or `y` if `x == y` and the signs are the same +//! - qNaN if either operand is a NaN +//! +//! Excluded from our implementation is sNaN handling. + +use crate::support::Float; + +#[inline] +pub fn fminimum_num(x: F, y: F) -> F { + let res = + if y.is_nan() || x < y || (x.to_bits() == F::NEG_ZERO.to_bits() && y.is_sign_positive()) { + x + } else { + y + }; + + // Canonicalize + res * F::ONE +} diff --git a/library/compiler-builtins/libm/src/math/generic/fmod.rs b/library/compiler-builtins/libm/src/math/generic/fmod.rs new file mode 100644 index 0000000000000..29acc8a4d5df5 --- /dev/null +++ b/library/compiler-builtins/libm/src/math/generic/fmod.rs @@ -0,0 +1,68 @@ +/* SPDX-License-Identifier: MIT OR Apache-2.0 */ +use crate::support::{CastFrom, Float, Int, MinInt}; + +#[inline] +pub fn fmod(x: F, y: F) -> F { + let _1 = F::Int::ONE; + let sx = x.to_bits() & F::SIGN_MASK; + let ux = x.to_bits() & !F::SIGN_MASK; + let uy = y.to_bits() & !F::SIGN_MASK; + + // Cases that return NaN: + // NaN % _ + // Inf % _ + // _ % NaN + // _ % 0 + let x_nan_or_inf = ux & F::EXP_MASK == F::EXP_MASK; + let y_nan_or_zero = uy.wrapping_sub(_1) & F::EXP_MASK == F::EXP_MASK; + if x_nan_or_inf | y_nan_or_zero { + return (x * y) / (x * y); + } + + if ux < uy { + // |x| < |y| + return x; + } + + let (num, ex) = into_sig_exp::(ux); + let (div, ey) = into_sig_exp::(uy); + + // To compute `(num << ex) % (div << ey)`, first + // evaluate `rem = (num << (ex - ey)) % div` ... + let rem = reduction(num, ex - ey, div); + // ... so the result will be `rem << ey` + + if rem.is_zero() { + // Return zero with the sign of `x` + return F::from_bits(sx); + }; + + // We would shift `rem` up by `ey`, but have to stop at `F::SIG_BITS` + let shift = ey.min(F::SIG_BITS - rem.ilog2()); + // Anything past that is added to the exponent field + let bits = (rem << shift) + (F::Int::cast_from(ey - shift) << F::SIG_BITS); + F::from_bits(sx + bits) +} + +/// Given the bits of a finite float, return a tuple of +/// - the mantissa with the implicit bit (0 if subnormal, 1 otherwise) +/// - the additional exponent past 1, (0 for subnormal, 0 or more otherwise) +fn into_sig_exp(mut bits: F::Int) -> (F::Int, u32) { + bits &= !F::SIGN_MASK; + // Subtract 1 from the exponent, clamping at 0 + let sat = bits.checked_sub(F::IMPLICIT_BIT).unwrap_or(F::Int::ZERO); + ( + bits - (sat & F::EXP_MASK), + u32::cast_from(sat >> F::SIG_BITS), + ) +} + +/// Compute the remainder `(x * 2.pow(e)) % y` without overflow. +fn reduction(mut x: I, e: u32, y: I) -> I { + x %= y; + for _ in 0..e { + x <<= 1; + x = x.checked_sub(y).unwrap_or(x); + } + x +} diff --git a/library/compiler-builtins/libm/src/math/generic/mod.rs b/library/compiler-builtins/libm/src/math/generic/mod.rs new file mode 100644 index 0000000000000..9d497a03f5447 --- /dev/null +++ b/library/compiler-builtins/libm/src/math/generic/mod.rs @@ -0,0 +1,42 @@ +// Note: generic functions are marked `#[inline]` because, even though generic functions are +// typically inlined, this does not seem to always be the case. + +mod ceil; +mod copysign; +mod fabs; +mod fdim; +mod floor; +mod fma; +mod fma_wide; +mod fmax; +mod fmaximum; +mod fmaximum_num; +mod fmin; +mod fminimum; +mod fminimum_num; +mod fmod; +mod rint; +mod round; +mod scalbn; +mod sqrt; +mod trunc; + +pub use ceil::ceil; +pub use copysign::copysign; +pub use fabs::fabs; +pub use fdim::fdim; +pub use floor::floor; +pub use fma::fma_round; +pub use fma_wide::fma_wide_round; +pub use fmax::fmax; +pub use fmaximum::fmaximum; +pub use fmaximum_num::fmaximum_num; +pub use fmin::fmin; +pub use fminimum::fminimum; +pub use fminimum_num::fminimum_num; +pub use fmod::fmod; +pub use rint::rint_round; +pub use round::round; +pub use scalbn::scalbn; +pub use sqrt::sqrt; +pub use trunc::trunc; diff --git a/library/compiler-builtins/libm/src/math/generic/rint.rs b/library/compiler-builtins/libm/src/math/generic/rint.rs new file mode 100644 index 0000000000000..c5bc27d3de6bc --- /dev/null +++ b/library/compiler-builtins/libm/src/math/generic/rint.rs @@ -0,0 +1,130 @@ +/* SPDX-License-Identifier: MIT */ +/* origin: musl src/math/rint.c */ + +use crate::support::{Float, FpResult, Round}; + +/// IEEE 754-2019 `roundToIntegralExact`, which respects rounding mode and raises inexact if +/// applicable. +#[inline] +pub fn rint_round(x: F, _round: Round) -> FpResult { + let toint = F::ONE / F::EPSILON; + let e = x.ex(); + let positive = x.is_sign_positive(); + + // On i386 `force_eval!` must be used to force rounding via storage to memory. Otherwise, + // the excess precission from x87 would cause an incorrect final result. + let force = |x| { + if cfg!(x86_no_sse) && (F::BITS == 32 || F::BITS == 64) { + force_eval!(x) + } else { + x + } + }; + + let res = if e >= F::EXP_BIAS + F::SIG_BITS { + // No fractional part; exact result can be returned. + x + } else { + // Apply a net-zero adjustment that nudges `y` in the direction of the rounding mode. For + // Rust this is always nearest, but ideally it would take `round` into account. + let y = if positive { + force(force(x) + toint) - toint + } else { + force(force(x) - toint) + toint + }; + + if y == F::ZERO { + // A zero result takes the sign of the input. + if positive { F::ZERO } else { F::NEG_ZERO } + } else { + y + } + }; + + FpResult::ok(res) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::support::{Hexf, Status}; + + fn spec_test(cases: &[(F, F, Status)]) { + let roundtrip = [ + F::ZERO, + F::ONE, + F::NEG_ONE, + F::NEG_ZERO, + F::INFINITY, + F::NEG_INFINITY, + ]; + + for x in roundtrip { + let FpResult { val, status } = rint_round(x, Round::Nearest); + assert_biteq!(val, x, "rint_round({})", Hexf(x)); + assert_eq!(status, Status::OK, "{}", Hexf(x)); + } + + for &(x, res, res_stat) in cases { + let FpResult { val, status } = rint_round(x, Round::Nearest); + assert_biteq!(val, res, "rint_round({})", Hexf(x)); + assert_eq!(status, res_stat, "{}", Hexf(x)); + } + } + + #[test] + #[cfg(f16_enabled)] + fn spec_tests_f16() { + let cases = []; + spec_test::(&cases); + } + + #[test] + fn spec_tests_f32() { + let cases = [ + (0.1, 0.0, Status::OK), + (-0.1, -0.0, Status::OK), + (0.5, 0.0, Status::OK), + (-0.5, -0.0, Status::OK), + (0.9, 1.0, Status::OK), + (-0.9, -1.0, Status::OK), + (1.1, 1.0, Status::OK), + (-1.1, -1.0, Status::OK), + (1.5, 2.0, Status::OK), + (-1.5, -2.0, Status::OK), + (1.9, 2.0, Status::OK), + (-1.9, -2.0, Status::OK), + (2.8, 3.0, Status::OK), + (-2.8, -3.0, Status::OK), + ]; + spec_test::(&cases); + } + + #[test] + fn spec_tests_f64() { + let cases = [ + (0.1, 0.0, Status::OK), + (-0.1, -0.0, Status::OK), + (0.5, 0.0, Status::OK), + (-0.5, -0.0, Status::OK), + (0.9, 1.0, Status::OK), + (-0.9, -1.0, Status::OK), + (1.1, 1.0, Status::OK), + (-1.1, -1.0, Status::OK), + (1.5, 2.0, Status::OK), + (-1.5, -2.0, Status::OK), + (1.9, 2.0, Status::OK), + (-1.9, -2.0, Status::OK), + (2.8, 3.0, Status::OK), + (-2.8, -3.0, Status::OK), + ]; + spec_test::(&cases); + } + + #[test] + #[cfg(f128_enabled)] + fn spec_tests_f128() { + let cases = []; + spec_test::(&cases); + } +} diff --git a/library/compiler-builtins/libm/src/math/generic/round.rs b/library/compiler-builtins/libm/src/math/generic/round.rs new file mode 100644 index 0000000000000..16739f01d8775 --- /dev/null +++ b/library/compiler-builtins/libm/src/math/generic/round.rs @@ -0,0 +1,83 @@ +use super::{copysign, trunc}; +use crate::support::{Float, MinInt}; + +#[inline] +pub fn round(x: F) -> F { + let f0p5 = F::from_parts(false, F::EXP_BIAS - 1, F::Int::ZERO); // 0.5 + let f0p25 = F::from_parts(false, F::EXP_BIAS - 2, F::Int::ZERO); // 0.25 + + trunc(x + copysign(f0p5 - f0p25 * F::EPSILON, x)) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + #[cfg(f16_enabled)] + fn zeroes_f16() { + assert_biteq!(round(0.0_f16), 0.0_f16); + assert_biteq!(round(-0.0_f16), -0.0_f16); + } + + #[test] + #[cfg(f16_enabled)] + fn sanity_check_f16() { + assert_eq!(round(-1.0_f16), -1.0); + assert_eq!(round(2.8_f16), 3.0); + assert_eq!(round(-0.5_f16), -1.0); + assert_eq!(round(0.5_f16), 1.0); + assert_eq!(round(-1.5_f16), -2.0); + assert_eq!(round(1.5_f16), 2.0); + } + + #[test] + fn zeroes_f32() { + assert_biteq!(round(0.0_f32), 0.0_f32); + assert_biteq!(round(-0.0_f32), -0.0_f32); + } + + #[test] + fn sanity_check_f32() { + assert_eq!(round(-1.0_f32), -1.0); + assert_eq!(round(2.8_f32), 3.0); + assert_eq!(round(-0.5_f32), -1.0); + assert_eq!(round(0.5_f32), 1.0); + assert_eq!(round(-1.5_f32), -2.0); + assert_eq!(round(1.5_f32), 2.0); + } + + #[test] + fn zeroes_f64() { + assert_biteq!(round(0.0_f64), 0.0_f64); + assert_biteq!(round(-0.0_f64), -0.0_f64); + } + + #[test] + fn sanity_check_f64() { + assert_eq!(round(-1.0_f64), -1.0); + assert_eq!(round(2.8_f64), 3.0); + assert_eq!(round(-0.5_f64), -1.0); + assert_eq!(round(0.5_f64), 1.0); + assert_eq!(round(-1.5_f64), -2.0); + assert_eq!(round(1.5_f64), 2.0); + } + + #[test] + #[cfg(f128_enabled)] + fn zeroes_f128() { + assert_biteq!(round(0.0_f128), 0.0_f128); + assert_biteq!(round(-0.0_f128), -0.0_f128); + } + + #[test] + #[cfg(f128_enabled)] + fn sanity_check_f128() { + assert_eq!(round(-1.0_f128), -1.0); + assert_eq!(round(2.8_f128), 3.0); + assert_eq!(round(-0.5_f128), -1.0); + assert_eq!(round(0.5_f128), 1.0); + assert_eq!(round(-1.5_f128), -2.0); + assert_eq!(round(1.5_f128), 2.0); + } +} diff --git a/library/compiler-builtins/libm/src/math/generic/scalbn.rs b/library/compiler-builtins/libm/src/math/generic/scalbn.rs new file mode 100644 index 0000000000000..6dd9b1a9b84a4 --- /dev/null +++ b/library/compiler-builtins/libm/src/math/generic/scalbn.rs @@ -0,0 +1,121 @@ +use crate::support::{CastFrom, CastInto, Float, IntTy, MinInt}; + +/// Scale the exponent. +/// +/// From N3220: +/// +/// > The scalbn and scalbln functions compute `x * b^n`, where `b = FLT_RADIX` if the return type +/// > of the function is a standard floating type, or `b = 10` if the return type of the function +/// > is a decimal floating type. A range error occurs for some finite x, depending on n. +/// > +/// > [...] +/// > +/// > * `scalbn(±0, n)` returns `±0`. +/// > * `scalbn(x, 0)` returns `x`. +/// > * `scalbn(±∞, n)` returns `±∞`. +/// > +/// > If the calculation does not overflow or underflow, the returned value is exact and +/// > independent of the current rounding direction mode. +#[inline] +pub fn scalbn(mut x: F, mut n: i32) -> F +where + u32: CastInto, + F::Int: CastFrom, + F::Int: CastFrom, +{ + let zero = IntTy::::ZERO; + + // Bits including the implicit bit + let sig_total_bits = F::SIG_BITS + 1; + + // Maximum and minimum values when biased + let exp_max = F::EXP_MAX; + let exp_min = F::EXP_MIN; + + // 2 ^ Emax, maximum positive with null significand (0x1p1023 for f64) + let f_exp_max = F::from_parts(false, F::EXP_BIAS << 1, zero); + + // 2 ^ Emin, minimum positive normal with null significand (0x1p-1022 for f64) + let f_exp_min = F::from_parts(false, 1, zero); + + // 2 ^ sig_total_bits, moltiplier to normalize subnormals (0x1p53 for f64) + let f_pow_subnorm = F::from_parts(false, sig_total_bits + F::EXP_BIAS, zero); + + /* + * The goal is to multiply `x` by a scale factor that applies `n`. However, there are cases + * where `2^n` is not representable by `F` but the result should be, e.g. `x = 2^Emin` with + * `n = -EMin + 2` (one out of range of 2^Emax). To get around this, reduce the magnitude of + * the final scale operation by prescaling by the max/min power representable by `F`. + */ + + if n > exp_max { + // Worse case positive `n`: `x` is the minimum subnormal value, the result is `F::MAX`. + // This can be reached by three scaling multiplications (two here and one final). + debug_assert!(-exp_min + F::SIG_BITS as i32 + exp_max <= exp_max * 3); + + x *= f_exp_max; + n -= exp_max; + if n > exp_max { + x *= f_exp_max; + n -= exp_max; + if n > exp_max { + n = exp_max; + } + } + } else if n < exp_min { + // When scaling toward 0, the prescaling is limited to a value that does not allow `x` to + // go subnormal. This avoids double rounding. + if F::BITS > 16 { + // `mul` s.t. `!(x * mul).is_subnormal() ∀ x` + let mul = f_exp_min * f_pow_subnorm; + let add = -exp_min - sig_total_bits as i32; + + // Worse case negative `n`: `x` is the maximum positive value, the result is `F::MIN`. + // This must be reachable by three scaling multiplications (two here and one final). + debug_assert!(-exp_min + F::SIG_BITS as i32 + exp_max <= add * 2 + -exp_min); + + x *= mul; + n += add; + + if n < exp_min { + x *= mul; + n += add; + + if n < exp_min { + n = exp_min; + } + } + } else { + // `f16` is unique compared to other float types in that the difference between the + // minimum exponent and the significand bits (`add = -exp_min - sig_total_bits`) is + // small, only three. The above method depend on decrementing `n` by `add` two times; + // for other float types this works out because `add` is a substantial fraction of + // the exponent range. For `f16`, however, 3 is relatively small compared to the + // exponent range (which is 39), so that requires ~10 prescale rounds rather than two. + // + // Work aroudn this by using a different algorithm that calculates the prescale + // dynamically based on the maximum possible value. This adds more operations per round + // since it needs to construct the scale, but works better in the general case. + let add = -(n + sig_total_bits as i32).clamp(exp_min, sig_total_bits as i32); + let mul = F::from_parts(false, (F::EXP_BIAS as i32 - add) as u32, zero); + + x *= mul; + n += add; + + if n < exp_min { + let add = -(n + sig_total_bits as i32).clamp(exp_min, sig_total_bits as i32); + let mul = F::from_parts(false, (F::EXP_BIAS as i32 - add) as u32, zero); + + x *= mul; + n += add; + + if n < exp_min { + n = exp_min; + } + } + } + } + + let scale = F::from_parts(false, (F::EXP_BIAS as i32 + n) as u32, zero); + x * scale +} diff --git a/library/compiler-builtins/libm/src/math/generic/sqrt.rs b/library/compiler-builtins/libm/src/math/generic/sqrt.rs new file mode 100644 index 0000000000000..9481c4cdb7bbe --- /dev/null +++ b/library/compiler-builtins/libm/src/math/generic/sqrt.rs @@ -0,0 +1,541 @@ +/* SPDX-License-Identifier: MIT */ +/* origin: musl src/math/sqrt.c. Ported to generic Rust algorithm in 2025, TG. */ + +//! Generic square root algorithm. +//! +//! This routine operates around `m_u2`, a U.2 (fixed point with two integral bits) mantissa +//! within the range [1, 4). A table lookup provides an initial estimate, then goldschmidt +//! iterations at various widths are used to approach the real values. +//! +//! For the iterations, `r` is a U0 number that approaches `1/sqrt(m_u2)`, and `s` is a U2 number +//! that approaches `sqrt(m_u2)`. Recall that m_u2 ∈ [1, 4). +//! +//! With Newton-Raphson iterations, this would be: +//! +//! - `w = r * r w ~ 1 / m` +//! - `u = 3 - m * w u ~ 3 - m * w = 3 - m / m = 2` +//! - `r = r * u / 2 r ~ r` +//! +//! (Note that the righthand column does not show anything analytically meaningful (i.e. r ~ r), +//! since the value of performing one iteration is in reducing the error representable by `~`). +//! +//! Instead of Newton-Raphson iterations, Goldschmidt iterations are used to calculate +//! `s = m * r`: +//! +//! - `s = m * r s ~ m / sqrt(m)` +//! - `u = 3 - s * r u ~ 3 - (m / sqrt(m)) * (1 / sqrt(m)) = 3 - m / m = 2` +//! - `r = r * u / 2 r ~ r` +//! - `s = s * u / 2 s ~ s` +//! +//! The above is precise because it uses the original value `m`. There is also a faster version +//! that performs fewer steps but does not use `m`: +//! +//! - `u = 3 - s * r u ~ 3 - 1` +//! - `r = r * u / 2 r ~ r` +//! - `s = s * u / 2 s ~ s` +//! +//! Rounding errors accumulate faster with the second version, so it is only used for subsequent +//! iterations within the same width integer. The first version is always used for the first +//! iteration at a new width in order to avoid this accumulation. +//! +//! Goldschmidt has the advantage over Newton-Raphson that `sqrt(x)` and `1/sqrt(x)` are +//! computed at the same time, i.e. there is no need to calculate `1/sqrt(x)` and invert it. + +use crate::support::{ + CastFrom, CastInto, DInt, Float, FpResult, HInt, Int, IntTy, MinInt, Round, Status, cold_path, +}; + +#[inline] +pub fn sqrt(x: F) -> F +where + F: Float + SqrtHelper, + F::Int: HInt, + F::Int: From, + F::Int: From, + F::Int: CastInto, + F::Int: CastInto, + u32: CastInto, +{ + sqrt_round(x, Round::Nearest).val +} + +#[inline] +pub fn sqrt_round(x: F, _round: Round) -> FpResult +where + F: Float + SqrtHelper, + F::Int: HInt, + F::Int: From, + F::Int: From, + F::Int: CastInto, + F::Int: CastInto, + u32: CastInto, +{ + let zero = IntTy::::ZERO; + let one = IntTy::::ONE; + + let mut ix = x.to_bits(); + + // Top is the exponent and sign, which may or may not be shifted. If the float fits into a + // `u32`, we can get by without paying shifting costs. + let noshift = F::BITS <= u32::BITS; + let (mut top, special_case) = if noshift { + let exp_lsb = one << F::SIG_BITS; + let special_case = ix.wrapping_sub(exp_lsb) >= F::EXP_MASK - exp_lsb; + (Exp::NoShift(()), special_case) + } else { + let top = u32::cast_from(ix >> F::SIG_BITS); + let special_case = top.wrapping_sub(1) >= F::EXP_SAT - 1; + (Exp::Shifted(top), special_case) + }; + + // Handle NaN, zero, and out of domain (<= 0) + if special_case { + cold_path(); + + // +/-0 + if ix << 1 == zero { + return FpResult::ok(x); + } + + // Positive infinity + if ix == F::EXP_MASK { + return FpResult::ok(x); + } + + // NaN or negative + if ix > F::EXP_MASK { + return FpResult::new(F::NAN, Status::INVALID); + } + + // Normalize subnormals by multiplying by 1.0 << SIG_BITS (e.g. 0x1p52 for doubles). + let scaled = x * F::from_parts(false, F::SIG_BITS + F::EXP_BIAS, zero); + ix = scaled.to_bits(); + match top { + Exp::Shifted(ref mut v) => { + *v = scaled.ex(); + *v = (*v).wrapping_sub(F::SIG_BITS); + } + Exp::NoShift(()) => { + ix = ix.wrapping_sub((F::SIG_BITS << F::SIG_BITS).cast()); + } + } + } + + // Reduce arguments such that `x = 4^e * m`: + // + // - m_u2 ∈ [1, 4), a fixed point U2.BITS number + // - 2^e is the exponent part of the result + let (m_u2, exp) = match top { + Exp::Shifted(top) => { + // We now know `x` is positive, so `top` is just its (biased) exponent + let mut e = top; + // Construct a fixed point representation of the mantissa. + let mut m_u2 = (ix | F::IMPLICIT_BIT) << F::EXP_BITS; + let even = (e & 1) != 0; + if even { + m_u2 >>= 1; + } + e = (e.wrapping_add(F::EXP_SAT >> 1)) >> 1; + (m_u2, Exp::Shifted(e)) + } + Exp::NoShift(()) => { + let even = ix & (one << F::SIG_BITS) != zero; + + // Exponent part of the return value + let mut e_noshift = ix >> 1; + // ey &= (F::EXP_MASK << 2) >> 2; // clear the top exponent bit (result = 1.0) + e_noshift += (F::EXP_MASK ^ (F::SIGN_MASK >> 1)) >> 1; + e_noshift &= F::EXP_MASK; + + let m1 = (ix << F::EXP_BITS) | F::SIGN_MASK; + let m0 = (ix << (F::EXP_BITS - 1)) & !F::SIGN_MASK; + let m_u2 = if even { m0 } else { m1 }; + + (m_u2, Exp::NoShift(e_noshift)) + } + }; + + // Extract the top 6 bits of the significand with the lowest bit of the exponent. + let i = usize::cast_from(ix >> (F::SIG_BITS - 6)) & 0b1111111; + + // Start with an initial guess for `r = 1 / sqrt(m)` from the table, and shift `m` as an + // initial value for `s = sqrt(m)`. See the module documentation for details. + let r1_u0: F::ISet1 = F::ISet1::cast_from(RSQRT_TAB[i]) << (F::ISet1::BITS - 16); + let s1_u2: F::ISet1 = ((m_u2) >> (F::BITS - F::ISet1::BITS)).cast(); + + // Perform iterations, if any, at quarter width (used for `f128`). + let (r1_u0, _s1_u2) = goldschmidt::(r1_u0, s1_u2, F::SET1_ROUNDS, false); + + // Widen values and perform iterations at half width (used for `f64` and `f128`). + let r2_u0: F::ISet2 = F::ISet2::from(r1_u0) << (F::ISet2::BITS - F::ISet1::BITS); + let s2_u2: F::ISet2 = ((m_u2) >> (F::BITS - F::ISet2::BITS)).cast(); + let (r2_u0, _s2_u2) = goldschmidt::(r2_u0, s2_u2, F::SET2_ROUNDS, false); + + // Perform final iterations at full width (used for all float types). + let r_u0: F::Int = F::Int::from(r2_u0) << (F::BITS - F::ISet2::BITS); + let s_u2: F::Int = m_u2; + let (_r_u0, s_u2) = goldschmidt::(r_u0, s_u2, F::FINAL_ROUNDS, true); + + // Shift back to mantissa position. + let mut m = s_u2 >> (F::EXP_BITS - 2); + + // The musl source includes the following comment (with literals replaced): + // + // > s < sqrt(m) < s + 0x1.09p-SIG_BITS + // > compute nearest rounded result: the nearest result to SIG_BITS bits is either s or + // > s+0x1p-SIG_BITS, we can decide by comparing (2^SIG_BITS s + 0.5)^2 to 2^(2*SIG_BITS) m. + // + // Expanding this with , with `SIG_BITS = p` and adjusting based on the operations done to + // `d0` and `d1`: + // + // - `2^(2p)m ≟ ((2^p)m + 0.5)^2` + // - `2^(2p)m ≟ 2^(2p)m^2 + (2^p)m + 0.25` + // - `2^(2p)m - m^2 ≟ (2^(2p) - 1)m^2 + (2^p)m + 0.25` + // - `(1 - 2^(2p))m + m^2 ≟ (1 - 2^(2p))m^2 + (1 - 2^p)m + 0.25` (?) + // + // I do not follow how the rounding bit is extracted from this comparison with the below + // operations. In any case, the algorithm is well tested. + + // The value needed to shift `m_u2` by to create `m*2^(2p)`. `2p = 2 * F::SIG_BITS`, + // `F::BITS - 2` accounts for the offset that `m_u2` already has. + let shift = 2 * F::SIG_BITS - (F::BITS - 2); + + // `2^(2p)m - m^2` + let d0 = (m_u2 << shift).wrapping_sub(m.wrapping_mul(m)); + // `m - 2^(2p)m + m^2` + let d1 = m.wrapping_sub(d0); + m += d1 >> (F::BITS - 1); + m &= F::SIG_MASK; + + match exp { + Exp::Shifted(e) => m |= IntTy::::cast_from(e) << F::SIG_BITS, + Exp::NoShift(e) => m |= e, + }; + + let mut y = F::from_bits(m); + + // FIXME(f16): the fenv math does not work for `f16` + if F::BITS > 16 { + // Handle rounding and inexact. `(m + 1)^2 == 2^shift m` is exact; for all other cases, add + // a tiny value to cause fenv effects. + let d2 = d1.wrapping_add(m).wrapping_add(one); + let mut tiny = if d2 == zero { + cold_path(); + zero + } else { + F::IMPLICIT_BIT + }; + + tiny |= (d1 ^ d2) & F::SIGN_MASK; + let t = F::from_bits(tiny); + y = y + t; + } + + FpResult::ok(y) +} + +/// Multiply at the wider integer size, returning the high half. +fn wmulh(a: I, b: I) -> I { + a.widen_mul(b).hi() +} + +/// Perform `count` goldschmidt iterations, returning `(r_u0, s_u?)`. +/// +/// - `r_u0` is the reciprocal `r ~ 1 / sqrt(m)`, as U0. +/// - `s_u2` is the square root, `s ~ sqrt(m)`, as U2. +/// - `count` is the number of iterations to perform. +/// - `final_set` should be true if this is the last round (same-sized integer). If so, the +/// returned `s` will be U3, for later shifting. Otherwise, the returned `s` is U2. +/// +/// Note that performance relies on the optimizer being able to unroll these loops (reasonably +/// trivial, `count` is a constant when called). +#[inline] +fn goldschmidt(mut r_u0: I, mut s_u2: I, count: u32, final_set: bool) -> (I, I) +where + F: SqrtHelper, + I: HInt + From, +{ + let three_u2 = I::from(0b11u8) << (I::BITS - 2); + let mut u_u0 = r_u0; + + for i in 0..count { + // First iteration: `s = m*r` (`u_u0 = r_u0` set above) + // Subsequent iterations: `s=s*u/2` + s_u2 = wmulh(s_u2, u_u0); + + // Perform `s /= 2` if: + // + // 1. This is not the first iteration (the first iteration is `s = m*r`)... + // 2. ... and this is not the last set of iterations + // 3. ... or, if this is the last set, it is not the last iteration + // + // This step is not performed for the final iteration because the shift is combined with + // a later shift (moving `s` into the mantissa). + if i > 0 && (!final_set || i + 1 < count) { + s_u2 <<= 1; + } + + // u = 3 - s*r + let d_u2 = wmulh(s_u2, r_u0); + u_u0 = three_u2.wrapping_sub(d_u2); + + // r = r*u/2 + r_u0 = wmulh(r_u0, u_u0) << 1; + } + + (r_u0, s_u2) +} + +/// Representation of whether we shift the exponent into a `u32`, or modify it in place to save +/// the shift operations. +enum Exp { + /// The exponent has been shifted to a `u32` and is LSB-aligned. + Shifted(u32), + /// The exponent is in its natural position in integer repr. + NoShift(T), +} + +/// Size-specific constants related to the square root routine. +pub trait SqrtHelper: Float { + /// Integer for the first set of rounds. If unused, set to the same type as the next set. + type ISet1: HInt + Into + CastFrom + From; + /// Integer for the second set of rounds. If unused, set to the same type as the next set. + type ISet2: HInt + From + From; + + /// Number of rounds at `ISet1`. + const SET1_ROUNDS: u32 = 0; + /// Number of rounds at `ISet2`. + const SET2_ROUNDS: u32 = 0; + /// Number of rounds at `Self::Int`. + const FINAL_ROUNDS: u32; +} + +#[cfg(f16_enabled)] +impl SqrtHelper for f16 { + type ISet1 = u16; // unused + type ISet2 = u16; // unused + + const FINAL_ROUNDS: u32 = 2; +} + +impl SqrtHelper for f32 { + type ISet1 = u32; // unused + type ISet2 = u32; // unused + + const FINAL_ROUNDS: u32 = 3; +} + +impl SqrtHelper for f64 { + type ISet1 = u32; // unused + type ISet2 = u32; + + const SET2_ROUNDS: u32 = 2; + const FINAL_ROUNDS: u32 = 2; +} + +#[cfg(f128_enabled)] +impl SqrtHelper for f128 { + type ISet1 = u32; + type ISet2 = u64; + + const SET1_ROUNDS: u32 = 1; + const SET2_ROUNDS: u32 = 2; + const FINAL_ROUNDS: u32 = 2; +} + +/// A U0.16 representation of `1/sqrt(x)`. +/// +/// The index is a 7-bit number consisting of a single exponent bit and 6 bits of significand. +#[rustfmt::skip] +static RSQRT_TAB: [u16; 128] = [ + 0xb451, 0xb2f0, 0xb196, 0xb044, 0xaef9, 0xadb6, 0xac79, 0xab43, + 0xaa14, 0xa8eb, 0xa7c8, 0xa6aa, 0xa592, 0xa480, 0xa373, 0xa26b, + 0xa168, 0xa06a, 0x9f70, 0x9e7b, 0x9d8a, 0x9c9d, 0x9bb5, 0x9ad1, + 0x99f0, 0x9913, 0x983a, 0x9765, 0x9693, 0x95c4, 0x94f8, 0x9430, + 0x936b, 0x92a9, 0x91ea, 0x912e, 0x9075, 0x8fbe, 0x8f0a, 0x8e59, + 0x8daa, 0x8cfe, 0x8c54, 0x8bac, 0x8b07, 0x8a64, 0x89c4, 0x8925, + 0x8889, 0x87ee, 0x8756, 0x86c0, 0x862b, 0x8599, 0x8508, 0x8479, + 0x83ec, 0x8361, 0x82d8, 0x8250, 0x81c9, 0x8145, 0x80c2, 0x8040, + 0xff02, 0xfd0e, 0xfb25, 0xf947, 0xf773, 0xf5aa, 0xf3ea, 0xf234, + 0xf087, 0xeee3, 0xed47, 0xebb3, 0xea27, 0xe8a3, 0xe727, 0xe5b2, + 0xe443, 0xe2dc, 0xe17a, 0xe020, 0xdecb, 0xdd7d, 0xdc34, 0xdaf1, + 0xd9b3, 0xd87b, 0xd748, 0xd61a, 0xd4f1, 0xd3cd, 0xd2ad, 0xd192, + 0xd07b, 0xcf69, 0xce5b, 0xcd51, 0xcc4a, 0xcb48, 0xca4a, 0xc94f, + 0xc858, 0xc764, 0xc674, 0xc587, 0xc49d, 0xc3b7, 0xc2d4, 0xc1f4, + 0xc116, 0xc03c, 0xbf65, 0xbe90, 0xbdbe, 0xbcef, 0xbc23, 0xbb59, + 0xba91, 0xb9cc, 0xb90a, 0xb84a, 0xb78c, 0xb6d0, 0xb617, 0xb560, +]; + +#[cfg(test)] +mod tests { + use super::*; + + /// Test behavior specified in IEEE 754 `squareRoot`. + fn spec_test() + where + F: Float + SqrtHelper, + F::Int: HInt, + F::Int: From, + F::Int: From, + F::Int: CastInto, + F::Int: CastInto, + u32: CastInto, + { + // Values that should return a NaN and raise invalid + let nan = [F::NEG_INFINITY, F::NEG_ONE, F::NAN, F::MIN]; + + // Values that return unaltered + let roundtrip = [F::ZERO, F::NEG_ZERO, F::INFINITY]; + + for x in nan { + let FpResult { val, status } = sqrt_round(x, Round::Nearest); + assert!(val.is_nan()); + assert!(status == Status::INVALID); + } + + for x in roundtrip { + let FpResult { val, status } = sqrt_round(x, Round::Nearest); + assert_biteq!(val, x); + assert!(status == Status::OK); + } + } + + #[test] + #[cfg(f16_enabled)] + fn sanity_check_f16() { + assert_biteq!(sqrt(100.0f16), 10.0); + assert_biteq!(sqrt(4.0f16), 2.0); + } + + #[test] + #[cfg(f16_enabled)] + fn spec_tests_f16() { + spec_test::(); + } + + #[test] + #[cfg(f16_enabled)] + #[allow(clippy::approx_constant)] + fn conformance_tests_f16() { + let cases = [ + (f16::PI, 0x3f17_u16), + // 10_000.0, using a hex literal for MSRV hack (Rust < 1.67 checks literal widths as + // part of the AST, so the `cfg` is irrelevant here). + (f16::from_bits(0x70e2), 0x5640_u16), + (f16::from_bits(0x0000000f), 0x13bf_u16), + (f16::INFINITY, f16::INFINITY.to_bits()), + ]; + + for (input, output) in cases { + assert_biteq!( + sqrt(input), + f16::from_bits(output), + "input: {input:?} ({:#018x})", + input.to_bits() + ); + } + } + + #[test] + fn sanity_check_f32() { + assert_biteq!(sqrt(100.0f32), 10.0); + assert_biteq!(sqrt(4.0f32), 2.0); + } + + #[test] + fn spec_tests_f32() { + spec_test::(); + } + + #[test] + #[allow(clippy::approx_constant)] + fn conformance_tests_f32() { + let cases = [ + (f32::PI, 0x3fe2dfc5_u32), + (10000.0f32, 0x42c80000_u32), + (f32::from_bits(0x0000000f), 0x1b2f456f_u32), + (f32::INFINITY, f32::INFINITY.to_bits()), + ]; + + for (input, output) in cases { + assert_biteq!( + sqrt(input), + f32::from_bits(output), + "input: {input:?} ({:#018x})", + input.to_bits() + ); + } + } + + #[test] + fn sanity_check_f64() { + assert_biteq!(sqrt(100.0f64), 10.0); + assert_biteq!(sqrt(4.0f64), 2.0); + } + + #[test] + fn spec_tests_f64() { + spec_test::(); + } + + #[test] + #[allow(clippy::approx_constant)] + fn conformance_tests_f64() { + let cases = [ + (f64::PI, 0x3ffc5bf891b4ef6a_u64), + (10000.0, 0x4059000000000000_u64), + (f64::from_bits(0x0000000f), 0x1e7efbdeb14f4eda_u64), + (f64::INFINITY, f64::INFINITY.to_bits()), + ]; + + for (input, output) in cases { + assert_biteq!( + sqrt(input), + f64::from_bits(output), + "input: {input:?} ({:#018x})", + input.to_bits() + ); + } + } + + #[test] + #[cfg(f128_enabled)] + fn sanity_check_f128() { + assert_biteq!(sqrt(100.0f128), 10.0); + assert_biteq!(sqrt(4.0f128), 2.0); + } + + #[test] + #[cfg(f128_enabled)] + fn spec_tests_f128() { + spec_test::(); + } + + #[test] + #[cfg(f128_enabled)] + #[allow(clippy::approx_constant)] + fn conformance_tests_f128() { + let cases = [ + (f128::PI, 0x3fffc5bf891b4ef6aa79c3b0520d5db9_u128), + // 10_000.0, see `f16` for reasoning. + ( + f128::from_bits(0x400c3880000000000000000000000000), + 0x40059000000000000000000000000000_u128, + ), + ( + f128::from_bits(0x0000000f), + 0x1fc9efbdeb14f4ed9b17ae807907e1e9_u128, + ), + (f128::INFINITY, f128::INFINITY.to_bits()), + ]; + + for (input, output) in cases { + assert_biteq!( + sqrt(input), + f128::from_bits(output), + "input: {input:?} ({:#018x})", + input.to_bits() + ); + } + } +} diff --git a/library/compiler-builtins/libm/src/math/generic/trunc.rs b/library/compiler-builtins/libm/src/math/generic/trunc.rs new file mode 100644 index 0000000000000..d5b444d15dfc0 --- /dev/null +++ b/library/compiler-builtins/libm/src/math/generic/trunc.rs @@ -0,0 +1,148 @@ +/* SPDX-License-Identifier: MIT + * origin: musl src/math/trunc.c */ + +use crate::support::{Float, FpResult, Int, IntTy, MinInt, Status}; + +#[inline] +pub fn trunc(x: F) -> F { + trunc_status(x).val +} + +#[inline] +pub fn trunc_status(x: F) -> FpResult { + let mut xi: F::Int = x.to_bits(); + let e: i32 = x.exp_unbiased(); + + // C1: The represented value has no fractional part, so no truncation is needed + if e >= F::SIG_BITS as i32 { + return FpResult::ok(x); + } + + let mask = if e < 0 { + // C2: If the exponent is negative, the result will be zero so we mask out everything + // except the sign. + F::SIGN_MASK + } else { + // C3: Otherwise, we mask out the last `e` bits of the significand. + !(F::SIG_MASK >> e.unsigned()) + }; + + // C4: If the to-be-masked-out portion is already zero, we have an exact result + if (xi & !mask) == IntTy::::ZERO { + return FpResult::ok(x); + } + + // C5: Otherwise the result is inexact and we will truncate. Raise `FE_INEXACT`, mask the + // result, and return. + + let status = if xi & F::SIG_MASK == F::Int::ZERO { + Status::OK + } else { + Status::INEXACT + }; + xi &= mask; + FpResult::new(F::from_bits(xi), status) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::support::Hexf; + + fn spec_test(cases: &[(F, F, Status)]) { + let roundtrip = [ + F::ZERO, + F::ONE, + F::NEG_ONE, + F::NEG_ZERO, + F::INFINITY, + F::NEG_INFINITY, + ]; + + for x in roundtrip { + let FpResult { val, status } = trunc_status(x); + assert_biteq!(val, x, "{}", Hexf(x)); + assert_eq!(status, Status::OK, "{}", Hexf(x)); + } + + for &(x, res, res_stat) in cases { + let FpResult { val, status } = trunc_status(x); + assert_biteq!(val, res, "{}", Hexf(x)); + assert_eq!(status, res_stat, "{}", Hexf(x)); + } + } + + /* Skipping f16 / f128 "sanity_check"s and spec cases due to rejected literal lexing at MSRV */ + + #[test] + #[cfg(f16_enabled)] + fn spec_tests_f16() { + let cases = []; + spec_test::(&cases); + } + + #[test] + fn sanity_check_f32() { + assert_eq!(trunc(0.5f32), 0.0); + assert_eq!(trunc(1.1f32), 1.0); + assert_eq!(trunc(2.9f32), 2.0); + } + + #[test] + fn spec_tests_f32() { + let cases = [ + (0.1, 0.0, Status::INEXACT), + (-0.1, -0.0, Status::INEXACT), + (0.9, 0.0, Status::INEXACT), + (-0.9, -0.0, Status::INEXACT), + (1.1, 1.0, Status::INEXACT), + (-1.1, -1.0, Status::INEXACT), + (1.9, 1.0, Status::INEXACT), + (-1.9, -1.0, Status::INEXACT), + ]; + spec_test::(&cases); + + assert_biteq!(trunc(1.1f32), 1.0); + assert_biteq!(trunc(1.1f64), 1.0); + + // C1 + assert_biteq!(trunc(hf32!("0x1p23")), hf32!("0x1p23")); + assert_biteq!(trunc(hf64!("0x1p52")), hf64!("0x1p52")); + assert_biteq!(trunc(hf32!("-0x1p23")), hf32!("-0x1p23")); + assert_biteq!(trunc(hf64!("-0x1p52")), hf64!("-0x1p52")); + + // C2 + assert_biteq!(trunc(hf32!("0x1p-1")), 0.0); + assert_biteq!(trunc(hf64!("0x1p-1")), 0.0); + assert_biteq!(trunc(hf32!("-0x1p-1")), -0.0); + assert_biteq!(trunc(hf64!("-0x1p-1")), -0.0); + } + + #[test] + fn sanity_check_f64() { + assert_eq!(trunc(1.1f64), 1.0); + assert_eq!(trunc(2.9f64), 2.0); + } + + #[test] + fn spec_tests_f64() { + let cases = [ + (0.1, 0.0, Status::INEXACT), + (-0.1, -0.0, Status::INEXACT), + (0.9, 0.0, Status::INEXACT), + (-0.9, -0.0, Status::INEXACT), + (1.1, 1.0, Status::INEXACT), + (-1.1, -1.0, Status::INEXACT), + (1.9, 1.0, Status::INEXACT), + (-1.9, -1.0, Status::INEXACT), + ]; + spec_test::(&cases); + } + + #[test] + #[cfg(f128_enabled)] + fn spec_tests_f128() { + let cases = []; + spec_test::(&cases); + } +} diff --git a/library/compiler-builtins/libm/src/math/hypot.rs b/library/compiler-builtins/libm/src/math/hypot.rs new file mode 100644 index 0000000000000..da458ea1d05f1 --- /dev/null +++ b/library/compiler-builtins/libm/src/math/hypot.rs @@ -0,0 +1,74 @@ +use core::f64; + +use super::sqrt; + +const SPLIT: f64 = 134217728. + 1.; // 0x1p27 + 1 === (2 ^ 27) + 1 + +fn sq(x: f64) -> (f64, f64) { + let xh: f64; + let xl: f64; + let xc: f64; + + xc = x * SPLIT; + xh = x - xc + xc; + xl = x - xh; + let hi = x * x; + let lo = xh * xh - hi + 2. * xh * xl + xl * xl; + (hi, lo) +} + +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn hypot(mut x: f64, mut y: f64) -> f64 { + let x1p700 = f64::from_bits(0x6bb0000000000000); // 0x1p700 === 2 ^ 700 + let x1p_700 = f64::from_bits(0x1430000000000000); // 0x1p-700 === 2 ^ -700 + + let mut uxi = x.to_bits(); + let mut uyi = y.to_bits(); + let uti; + let ex: i64; + let ey: i64; + let mut z: f64; + + /* arrange |x| >= |y| */ + uxi &= -1i64 as u64 >> 1; + uyi &= -1i64 as u64 >> 1; + if uxi < uyi { + uti = uxi; + uxi = uyi; + uyi = uti; + } + + /* special cases */ + ex = (uxi >> 52) as i64; + ey = (uyi >> 52) as i64; + x = f64::from_bits(uxi); + y = f64::from_bits(uyi); + /* note: hypot(inf,nan) == inf */ + if ey == 0x7ff { + return y; + } + if ex == 0x7ff || uyi == 0 { + return x; + } + /* note: hypot(x,y) ~= x + y*y/x/2 with inexact for small y/x */ + /* 64 difference is enough for ld80 double_t */ + if ex - ey > 64 { + return x + y; + } + + /* precise sqrt argument in nearest rounding mode without overflow */ + /* xh*xh must not overflow and xl*xl must not underflow in sq */ + z = 1.; + if ex > 0x3ff + 510 { + z = x1p700; + x *= x1p_700; + y *= x1p_700; + } else if ey < 0x3ff - 450 { + z = x1p_700; + x *= x1p700; + y *= x1p700; + } + let (hx, lx) = sq(x); + let (hy, ly) = sq(y); + z * sqrt(ly + lx + hy + hx) +} diff --git a/library/compiler-builtins/libm/src/math/hypotf.rs b/library/compiler-builtins/libm/src/math/hypotf.rs new file mode 100644 index 0000000000000..576eebb334314 --- /dev/null +++ b/library/compiler-builtins/libm/src/math/hypotf.rs @@ -0,0 +1,43 @@ +use core::f32; + +use super::sqrtf; + +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn hypotf(mut x: f32, mut y: f32) -> f32 { + let x1p90 = f32::from_bits(0x6c800000); // 0x1p90f === 2 ^ 90 + let x1p_90 = f32::from_bits(0x12800000); // 0x1p-90f === 2 ^ -90 + + let mut uxi = x.to_bits(); + let mut uyi = y.to_bits(); + let uti; + let mut z: f32; + + uxi &= -1i32 as u32 >> 1; + uyi &= -1i32 as u32 >> 1; + if uxi < uyi { + uti = uxi; + uxi = uyi; + uyi = uti; + } + + x = f32::from_bits(uxi); + y = f32::from_bits(uyi); + if uyi == 0xff << 23 { + return y; + } + if uxi >= 0xff << 23 || uyi == 0 || uxi - uyi >= 25 << 23 { + return x + y; + } + + z = 1.; + if uxi >= (0x7f + 60) << 23 { + z = x1p90; + x *= x1p_90; + y *= x1p_90; + } else if uyi < (0x7f - 60) << 23 { + z = x1p_90; + x *= x1p90; + y *= x1p90; + } + z * sqrtf((x as f64 * x as f64 + y as f64 * y as f64) as f32) +} diff --git a/library/compiler-builtins/libm/src/math/ilogb.rs b/library/compiler-builtins/libm/src/math/ilogb.rs new file mode 100644 index 0000000000000..5b41f7b1dc0b7 --- /dev/null +++ b/library/compiler-builtins/libm/src/math/ilogb.rs @@ -0,0 +1,32 @@ +const FP_ILOGBNAN: i32 = -1 - 0x7fffffff; +const FP_ILOGB0: i32 = FP_ILOGBNAN; + +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn ilogb(x: f64) -> i32 { + let mut i: u64 = x.to_bits(); + let e = ((i >> 52) & 0x7ff) as i32; + + if e == 0 { + i <<= 12; + if i == 0 { + force_eval!(0.0 / 0.0); + return FP_ILOGB0; + } + /* subnormal x */ + let mut e = -0x3ff; + while (i >> 63) == 0 { + e -= 1; + i <<= 1; + } + e + } else if e == 0x7ff { + force_eval!(0.0 / 0.0); + if (i << 12) != 0 { + FP_ILOGBNAN + } else { + i32::MAX + } + } else { + e - 0x3ff + } +} diff --git a/library/compiler-builtins/libm/src/math/ilogbf.rs b/library/compiler-builtins/libm/src/math/ilogbf.rs new file mode 100644 index 0000000000000..3585d6d36f164 --- /dev/null +++ b/library/compiler-builtins/libm/src/math/ilogbf.rs @@ -0,0 +1,28 @@ +const FP_ILOGBNAN: i32 = -1 - 0x7fffffff; +const FP_ILOGB0: i32 = FP_ILOGBNAN; + +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn ilogbf(x: f32) -> i32 { + let mut i = x.to_bits(); + let e = ((i >> 23) & 0xff) as i32; + + if e == 0 { + i <<= 9; + if i == 0 { + force_eval!(0.0 / 0.0); + return FP_ILOGB0; + } + /* subnormal x */ + let mut e = -0x7f; + while (i >> 31) == 0 { + e -= 1; + i <<= 1; + } + e + } else if e == 0xff { + force_eval!(0.0 / 0.0); + if (i << 9) != 0 { FP_ILOGBNAN } else { i32::MAX } + } else { + e - 0x7f + } +} diff --git a/library/compiler-builtins/libm/src/math/j0.rs b/library/compiler-builtins/libm/src/math/j0.rs new file mode 100644 index 0000000000000..99d656f0d08aa --- /dev/null +++ b/library/compiler-builtins/libm/src/math/j0.rs @@ -0,0 +1,426 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/e_j0.c */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunSoft, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ +/* j0(x), y0(x) + * Bessel function of the first and second kinds of order zero. + * Method -- j0(x): + * 1. For tiny x, we use j0(x) = 1 - x^2/4 + x^4/64 - ... + * 2. Reduce x to |x| since j0(x)=j0(-x), and + * for x in (0,2) + * j0(x) = 1-z/4+ z^2*R0/S0, where z = x*x; + * (precision: |j0-1+z/4-z^2R0/S0 |<2**-63.67 ) + * for x in (2,inf) + * j0(x) = sqrt(2/(pi*x))*(p0(x)*cos(x0)-q0(x)*sin(x0)) + * where x0 = x-pi/4. It is better to compute sin(x0),cos(x0) + * as follow: + * cos(x0) = cos(x)cos(pi/4)+sin(x)sin(pi/4) + * = 1/sqrt(2) * (cos(x) + sin(x)) + * sin(x0) = sin(x)cos(pi/4)-cos(x)sin(pi/4) + * = 1/sqrt(2) * (sin(x) - cos(x)) + * (To avoid cancellation, use + * sin(x) +- cos(x) = -cos(2x)/(sin(x) -+ cos(x)) + * to compute the worse one.) + * + * 3 Special cases + * j0(nan)= nan + * j0(0) = 1 + * j0(inf) = 0 + * + * Method -- y0(x): + * 1. For x<2. + * Since + * y0(x) = 2/pi*(j0(x)*(ln(x/2)+Euler) + x^2/4 - ...) + * therefore y0(x)-2/pi*j0(x)*ln(x) is an even function. + * We use the following function to approximate y0, + * y0(x) = U(z)/V(z) + (2/pi)*(j0(x)*ln(x)), z= x^2 + * where + * U(z) = u00 + u01*z + ... + u06*z^6 + * V(z) = 1 + v01*z + ... + v04*z^4 + * with absolute approximation error bounded by 2**-72. + * Note: For tiny x, U/V = u0 and j0(x)~1, hence + * y0(tiny) = u0 + (2/pi)*ln(tiny), (choose tiny<2**-27) + * 2. For x>=2. + * y0(x) = sqrt(2/(pi*x))*(p0(x)*cos(x0)+q0(x)*sin(x0)) + * where x0 = x-pi/4. It is better to compute sin(x0),cos(x0) + * by the method mentioned above. + * 3. Special cases: y0(0)=-inf, y0(x<0)=NaN, y0(inf)=0. + */ + +use super::{cos, fabs, get_high_word, get_low_word, log, sin, sqrt}; +const INVSQRTPI: f64 = 5.64189583547756279280e-01; /* 0x3FE20DD7, 0x50429B6D */ +const TPI: f64 = 6.36619772367581382433e-01; /* 0x3FE45F30, 0x6DC9C883 */ + +/* common method when |x|>=2 */ +fn common(ix: u32, x: f64, y0: bool) -> f64 { + let s: f64; + let mut c: f64; + let mut ss: f64; + let mut cc: f64; + let z: f64; + + /* + * j0(x) = sqrt(2/(pi*x))*(p0(x)*cos(x-pi/4)-q0(x)*sin(x-pi/4)) + * y0(x) = sqrt(2/(pi*x))*(p0(x)*sin(x-pi/4)+q0(x)*cos(x-pi/4)) + * + * sin(x-pi/4) = (sin(x) - cos(x))/sqrt(2) + * cos(x-pi/4) = (sin(x) + cos(x))/sqrt(2) + * sin(x) +- cos(x) = -cos(2x)/(sin(x) -+ cos(x)) + */ + s = sin(x); + c = cos(x); + if y0 { + c = -c; + } + cc = s + c; + /* avoid overflow in 2*x, big ulp error when x>=0x1p1023 */ + if ix < 0x7fe00000 { + ss = s - c; + z = -cos(2.0 * x); + if s * c < 0.0 { + cc = z / ss; + } else { + ss = z / cc; + } + if ix < 0x48000000 { + if y0 { + ss = -ss; + } + cc = pzero(x) * cc - qzero(x) * ss; + } + } + return INVSQRTPI * cc / sqrt(x); +} + +/* R0/S0 on [0, 2.00] */ +const R02: f64 = 1.56249999999999947958e-02; /* 0x3F8FFFFF, 0xFFFFFFFD */ +const R03: f64 = -1.89979294238854721751e-04; /* 0xBF28E6A5, 0xB61AC6E9 */ +const R04: f64 = 1.82954049532700665670e-06; /* 0x3EBEB1D1, 0x0C503919 */ +const R05: f64 = -4.61832688532103189199e-09; /* 0xBE33D5E7, 0x73D63FCE */ +const S01: f64 = 1.56191029464890010492e-02; /* 0x3F8FFCE8, 0x82C8C2A4 */ +const S02: f64 = 1.16926784663337450260e-04; /* 0x3F1EA6D2, 0xDD57DBF4 */ +const S03: f64 = 5.13546550207318111446e-07; /* 0x3EA13B54, 0xCE84D5A9 */ +const S04: f64 = 1.16614003333790000205e-09; /* 0x3E1408BC, 0xF4745D8F */ + +/// Zeroth order of the [Bessel function](https://en.wikipedia.org/wiki/Bessel_function) of the first kind (f64). +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn j0(mut x: f64) -> f64 { + let z: f64; + let r: f64; + let s: f64; + let mut ix: u32; + + ix = get_high_word(x); + ix &= 0x7fffffff; + + /* j0(+-inf)=0, j0(nan)=nan */ + if ix >= 0x7ff00000 { + return 1.0 / (x * x); + } + x = fabs(x); + + if ix >= 0x40000000 { + /* |x| >= 2 */ + /* large ulp error near zeros: 2.4, 5.52, 8.6537,.. */ + return common(ix, x, false); + } + + /* 1 - x*x/4 + x*x*R(x^2)/S(x^2) */ + if ix >= 0x3f200000 { + /* |x| >= 2**-13 */ + /* up to 4ulp error close to 2 */ + z = x * x; + r = z * (R02 + z * (R03 + z * (R04 + z * R05))); + s = 1.0 + z * (S01 + z * (S02 + z * (S03 + z * S04))); + return (1.0 + x / 2.0) * (1.0 - x / 2.0) + z * (r / s); + } + + /* 1 - x*x/4 */ + /* prevent underflow */ + /* inexact should be raised when x!=0, this is not done correctly */ + if ix >= 0x38000000 { + /* |x| >= 2**-127 */ + x = 0.25 * x * x; + } + return 1.0 - x; +} + +const U00: f64 = -7.38042951086872317523e-02; /* 0xBFB2E4D6, 0x99CBD01F */ +const U01: f64 = 1.76666452509181115538e-01; /* 0x3FC69D01, 0x9DE9E3FC */ +const U02: f64 = -1.38185671945596898896e-02; /* 0xBF8C4CE8, 0xB16CFA97 */ +const U03: f64 = 3.47453432093683650238e-04; /* 0x3F36C54D, 0x20B29B6B */ +const U04: f64 = -3.81407053724364161125e-06; /* 0xBECFFEA7, 0x73D25CAD */ +const U05: f64 = 1.95590137035022920206e-08; /* 0x3E550057, 0x3B4EABD4 */ +const U06: f64 = -3.98205194132103398453e-11; /* 0xBDC5E43D, 0x693FB3C8 */ +const V01: f64 = 1.27304834834123699328e-02; /* 0x3F8A1270, 0x91C9C71A */ +const V02: f64 = 7.60068627350353253702e-05; /* 0x3F13ECBB, 0xF578C6C1 */ +const V03: f64 = 2.59150851840457805467e-07; /* 0x3E91642D, 0x7FF202FD */ +const V04: f64 = 4.41110311332675467403e-10; /* 0x3DFE5018, 0x3BD6D9EF */ + +/// Zeroth order of the [Bessel function](https://en.wikipedia.org/wiki/Bessel_function) of the second kind (f64). +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn y0(x: f64) -> f64 { + let z: f64; + let u: f64; + let v: f64; + let ix: u32; + let lx: u32; + + ix = get_high_word(x); + lx = get_low_word(x); + + /* y0(nan)=nan, y0(<0)=nan, y0(0)=-inf, y0(inf)=0 */ + if ((ix << 1) | lx) == 0 { + return -1.0 / 0.0; + } + if (ix >> 31) != 0 { + return 0.0 / 0.0; + } + if ix >= 0x7ff00000 { + return 1.0 / x; + } + + if ix >= 0x40000000 { + /* x >= 2 */ + /* large ulp errors near zeros: 3.958, 7.086,.. */ + return common(ix, x, true); + } + + /* U(x^2)/V(x^2) + (2/pi)*j0(x)*log(x) */ + if ix >= 0x3e400000 { + /* x >= 2**-27 */ + /* large ulp error near the first zero, x ~= 0.89 */ + z = x * x; + u = U00 + z * (U01 + z * (U02 + z * (U03 + z * (U04 + z * (U05 + z * U06))))); + v = 1.0 + z * (V01 + z * (V02 + z * (V03 + z * V04))); + return u / v + TPI * (j0(x) * log(x)); + } + return U00 + TPI * log(x); +} + +/* The asymptotic expansions of pzero is + * 1 - 9/128 s^2 + 11025/98304 s^4 - ..., where s = 1/x. + * For x >= 2, We approximate pzero by + * pzero(x) = 1 + (R/S) + * where R = pR0 + pR1*s^2 + pR2*s^4 + ... + pR5*s^10 + * S = 1 + pS0*s^2 + ... + pS4*s^10 + * and + * | pzero(x)-1-R/S | <= 2 ** ( -60.26) + */ +const PR8: [f64; 6] = [ + /* for x in [inf, 8]=1/[0,0.125] */ + 0.00000000000000000000e+00, /* 0x00000000, 0x00000000 */ + -7.03124999999900357484e-02, /* 0xBFB1FFFF, 0xFFFFFD32 */ + -8.08167041275349795626e+00, /* 0xC02029D0, 0xB44FA779 */ + -2.57063105679704847262e+02, /* 0xC0701102, 0x7B19E863 */ + -2.48521641009428822144e+03, /* 0xC0A36A6E, 0xCD4DCAFC */ + -5.25304380490729545272e+03, /* 0xC0B4850B, 0x36CC643D */ +]; +const PS8: [f64; 5] = [ + 1.16534364619668181717e+02, /* 0x405D2233, 0x07A96751 */ + 3.83374475364121826715e+03, /* 0x40ADF37D, 0x50596938 */ + 4.05978572648472545552e+04, /* 0x40E3D2BB, 0x6EB6B05F */ + 1.16752972564375915681e+05, /* 0x40FC810F, 0x8F9FA9BD */ + 4.76277284146730962675e+04, /* 0x40E74177, 0x4F2C49DC */ +]; + +const PR5: [f64; 6] = [ + /* for x in [8,4.5454]=1/[0.125,0.22001] */ + -1.14125464691894502584e-11, /* 0xBDA918B1, 0x47E495CC */ + -7.03124940873599280078e-02, /* 0xBFB1FFFF, 0xE69AFBC6 */ + -4.15961064470587782438e+00, /* 0xC010A370, 0xF90C6BBF */ + -6.76747652265167261021e+01, /* 0xC050EB2F, 0x5A7D1783 */ + -3.31231299649172967747e+02, /* 0xC074B3B3, 0x6742CC63 */ + -3.46433388365604912451e+02, /* 0xC075A6EF, 0x28A38BD7 */ +]; +const PS5: [f64; 5] = [ + 6.07539382692300335975e+01, /* 0x404E6081, 0x0C98C5DE */ + 1.05125230595704579173e+03, /* 0x40906D02, 0x5C7E2864 */ + 5.97897094333855784498e+03, /* 0x40B75AF8, 0x8FBE1D60 */ + 9.62544514357774460223e+03, /* 0x40C2CCB8, 0xFA76FA38 */ + 2.40605815922939109441e+03, /* 0x40A2CC1D, 0xC70BE864 */ +]; + +const PR3: [f64; 6] = [ + /* for x in [4.547,2.8571]=1/[0.2199,0.35001] */ + -2.54704601771951915620e-09, /* 0xBE25E103, 0x6FE1AA86 */ + -7.03119616381481654654e-02, /* 0xBFB1FFF6, 0xF7C0E24B */ + -2.40903221549529611423e+00, /* 0xC00345B2, 0xAEA48074 */ + -2.19659774734883086467e+01, /* 0xC035F74A, 0x4CB94E14 */ + -5.80791704701737572236e+01, /* 0xC04D0A22, 0x420A1A45 */ + -3.14479470594888503854e+01, /* 0xC03F72AC, 0xA892D80F */ +]; +const PS3: [f64; 5] = [ + 3.58560338055209726349e+01, /* 0x4041ED92, 0x84077DD3 */ + 3.61513983050303863820e+02, /* 0x40769839, 0x464A7C0E */ + 1.19360783792111533330e+03, /* 0x4092A66E, 0x6D1061D6 */ + 1.12799679856907414432e+03, /* 0x40919FFC, 0xB8C39B7E */ + 1.73580930813335754692e+02, /* 0x4065B296, 0xFC379081 */ +]; + +const PR2: [f64; 6] = [ + /* for x in [2.8570,2]=1/[0.3499,0.5] */ + -8.87534333032526411254e-08, /* 0xBE77D316, 0xE927026D */ + -7.03030995483624743247e-02, /* 0xBFB1FF62, 0x495E1E42 */ + -1.45073846780952986357e+00, /* 0xBFF73639, 0x8A24A843 */ + -7.63569613823527770791e+00, /* 0xC01E8AF3, 0xEDAFA7F3 */ + -1.11931668860356747786e+01, /* 0xC02662E6, 0xC5246303 */ + -3.23364579351335335033e+00, /* 0xC009DE81, 0xAF8FE70F */ +]; +const PS2: [f64; 5] = [ + 2.22202997532088808441e+01, /* 0x40363865, 0x908B5959 */ + 1.36206794218215208048e+02, /* 0x4061069E, 0x0EE8878F */ + 2.70470278658083486789e+02, /* 0x4070E786, 0x42EA079B */ + 1.53875394208320329881e+02, /* 0x40633C03, 0x3AB6FAFF */ + 1.46576176948256193810e+01, /* 0x402D50B3, 0x44391809 */ +]; + +fn pzero(x: f64) -> f64 { + let p: &[f64; 6]; + let q: &[f64; 5]; + let z: f64; + let r: f64; + let s: f64; + let mut ix: u32; + + ix = get_high_word(x); + ix &= 0x7fffffff; + if ix >= 0x40200000 { + p = &PR8; + q = &PS8; + } else if ix >= 0x40122E8B { + p = &PR5; + q = &PS5; + } else if ix >= 0x4006DB6D { + p = &PR3; + q = &PS3; + } else + /*ix >= 0x40000000*/ + { + p = &PR2; + q = &PS2; + } + z = 1.0 / (x * x); + r = p[0] + z * (p[1] + z * (p[2] + z * (p[3] + z * (p[4] + z * p[5])))); + s = 1.0 + z * (q[0] + z * (q[1] + z * (q[2] + z * (q[3] + z * q[4])))); + return 1.0 + r / s; +} + +/* For x >= 8, the asymptotic expansions of qzero is + * -1/8 s + 75/1024 s^3 - ..., where s = 1/x. + * We approximate pzero by + * qzero(x) = s*(-1.25 + (R/S)) + * where R = qR0 + qR1*s^2 + qR2*s^4 + ... + qR5*s^10 + * S = 1 + qS0*s^2 + ... + qS5*s^12 + * and + * | qzero(x)/s +1.25-R/S | <= 2 ** ( -61.22) + */ +const QR8: [f64; 6] = [ + /* for x in [inf, 8]=1/[0,0.125] */ + 0.00000000000000000000e+00, /* 0x00000000, 0x00000000 */ + 7.32421874999935051953e-02, /* 0x3FB2BFFF, 0xFFFFFE2C */ + 1.17682064682252693899e+01, /* 0x40278952, 0x5BB334D6 */ + 5.57673380256401856059e+02, /* 0x40816D63, 0x15301825 */ + 8.85919720756468632317e+03, /* 0x40C14D99, 0x3E18F46D */ + 3.70146267776887834771e+04, /* 0x40E212D4, 0x0E901566 */ +]; +const QS8: [f64; 6] = [ + 1.63776026895689824414e+02, /* 0x406478D5, 0x365B39BC */ + 8.09834494656449805916e+03, /* 0x40BFA258, 0x4E6B0563 */ + 1.42538291419120476348e+05, /* 0x41016652, 0x54D38C3F */ + 8.03309257119514397345e+05, /* 0x412883DA, 0x83A52B43 */ + 8.40501579819060512818e+05, /* 0x4129A66B, 0x28DE0B3D */ + -3.43899293537866615225e+05, /* 0xC114FD6D, 0x2C9530C5 */ +]; + +const QR5: [f64; 6] = [ + /* for x in [8,4.5454]=1/[0.125,0.22001] */ + 1.84085963594515531381e-11, /* 0x3DB43D8F, 0x29CC8CD9 */ + 7.32421766612684765896e-02, /* 0x3FB2BFFF, 0xD172B04C */ + 5.83563508962056953777e+00, /* 0x401757B0, 0xB9953DD3 */ + 1.35111577286449829671e+02, /* 0x4060E392, 0x0A8788E9 */ + 1.02724376596164097464e+03, /* 0x40900CF9, 0x9DC8C481 */ + 1.98997785864605384631e+03, /* 0x409F17E9, 0x53C6E3A6 */ +]; +const QS5: [f64; 6] = [ + 8.27766102236537761883e+01, /* 0x4054B1B3, 0xFB5E1543 */ + 2.07781416421392987104e+03, /* 0x40A03BA0, 0xDA21C0CE */ + 1.88472887785718085070e+04, /* 0x40D267D2, 0x7B591E6D */ + 5.67511122894947329769e+04, /* 0x40EBB5E3, 0x97E02372 */ + 3.59767538425114471465e+04, /* 0x40E19118, 0x1F7A54A0 */ + -5.35434275601944773371e+03, /* 0xC0B4EA57, 0xBEDBC609 */ +]; + +const QR3: [f64; 6] = [ + /* for x in [4.547,2.8571]=1/[0.2199,0.35001] */ + 4.37741014089738620906e-09, /* 0x3E32CD03, 0x6ADECB82 */ + 7.32411180042911447163e-02, /* 0x3FB2BFEE, 0x0E8D0842 */ + 3.34423137516170720929e+00, /* 0x400AC0FC, 0x61149CF5 */ + 4.26218440745412650017e+01, /* 0x40454F98, 0x962DAEDD */ + 1.70808091340565596283e+02, /* 0x406559DB, 0xE25EFD1F */ + 1.66733948696651168575e+02, /* 0x4064D77C, 0x81FA21E0 */ +]; +const QS3: [f64; 6] = [ + 4.87588729724587182091e+01, /* 0x40486122, 0xBFE343A6 */ + 7.09689221056606015736e+02, /* 0x40862D83, 0x86544EB3 */ + 3.70414822620111362994e+03, /* 0x40ACF04B, 0xE44DFC63 */ + 6.46042516752568917582e+03, /* 0x40B93C6C, 0xD7C76A28 */ + 2.51633368920368957333e+03, /* 0x40A3A8AA, 0xD94FB1C0 */ + -1.49247451836156386662e+02, /* 0xC062A7EB, 0x201CF40F */ +]; + +const QR2: [f64; 6] = [ + /* for x in [2.8570,2]=1/[0.3499,0.5] */ + 1.50444444886983272379e-07, /* 0x3E84313B, 0x54F76BDB */ + 7.32234265963079278272e-02, /* 0x3FB2BEC5, 0x3E883E34 */ + 1.99819174093815998816e+00, /* 0x3FFFF897, 0xE727779C */ + 1.44956029347885735348e+01, /* 0x402CFDBF, 0xAAF96FE5 */ + 3.16662317504781540833e+01, /* 0x403FAA8E, 0x29FBDC4A */ + 1.62527075710929267416e+01, /* 0x403040B1, 0x71814BB4 */ +]; +const QS2: [f64; 6] = [ + 3.03655848355219184498e+01, /* 0x403E5D96, 0xF7C07AED */ + 2.69348118608049844624e+02, /* 0x4070D591, 0xE4D14B40 */ + 8.44783757595320139444e+02, /* 0x408A6645, 0x22B3BF22 */ + 8.82935845112488550512e+02, /* 0x408B977C, 0x9C5CC214 */ + 2.12666388511798828631e+02, /* 0x406A9553, 0x0E001365 */ + -5.31095493882666946917e+00, /* 0xC0153E6A, 0xF8B32931 */ +]; + +fn qzero(x: f64) -> f64 { + let p: &[f64; 6]; + let q: &[f64; 6]; + let s: f64; + let r: f64; + let z: f64; + let mut ix: u32; + + ix = get_high_word(x); + ix &= 0x7fffffff; + if ix >= 0x40200000 { + p = &QR8; + q = &QS8; + } else if ix >= 0x40122E8B { + p = &QR5; + q = &QS5; + } else if ix >= 0x4006DB6D { + p = &QR3; + q = &QS3; + } else + /*ix >= 0x40000000*/ + { + p = &QR2; + q = &QS2; + } + z = 1.0 / (x * x); + r = p[0] + z * (p[1] + z * (p[2] + z * (p[3] + z * (p[4] + z * p[5])))); + s = 1.0 + z * (q[0] + z * (q[1] + z * (q[2] + z * (q[3] + z * (q[4] + z * q[5]))))); + return (-0.125 + r / s) / x; +} diff --git a/library/compiler-builtins/libm/src/math/j0f.rs b/library/compiler-builtins/libm/src/math/j0f.rs new file mode 100644 index 0000000000000..25e5b325c8cc0 --- /dev/null +++ b/library/compiler-builtins/libm/src/math/j0f.rs @@ -0,0 +1,363 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/e_j0f.c */ +/* + * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. + */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ + +use super::{cosf, fabsf, logf, sinf, sqrtf}; + +const INVSQRTPI: f32 = 5.6418961287e-01; /* 0x3f106ebb */ +const TPI: f32 = 6.3661974669e-01; /* 0x3f22f983 */ + +fn common(ix: u32, x: f32, y0: bool) -> f32 { + let z: f32; + let s: f32; + let mut c: f32; + let mut ss: f32; + let mut cc: f32; + /* + * j0(x) = 1/sqrt(pi) * (P(0,x)*cc - Q(0,x)*ss) / sqrt(x) + * y0(x) = 1/sqrt(pi) * (P(0,x)*ss + Q(0,x)*cc) / sqrt(x) + */ + s = sinf(x); + c = cosf(x); + if y0 { + c = -c; + } + cc = s + c; + if ix < 0x7f000000 { + ss = s - c; + z = -cosf(2.0 * x); + if s * c < 0.0 { + cc = z / ss; + } else { + ss = z / cc; + } + if ix < 0x58800000 { + if y0 { + ss = -ss; + } + cc = pzerof(x) * cc - qzerof(x) * ss; + } + } + return INVSQRTPI * cc / sqrtf(x); +} + +/* R0/S0 on [0, 2.00] */ +const R02: f32 = 1.5625000000e-02; /* 0x3c800000 */ +const R03: f32 = -1.8997929874e-04; /* 0xb947352e */ +const R04: f32 = 1.8295404516e-06; /* 0x35f58e88 */ +const R05: f32 = -4.6183270541e-09; /* 0xb19eaf3c */ +const S01: f32 = 1.5619102865e-02; /* 0x3c7fe744 */ +const S02: f32 = 1.1692678527e-04; /* 0x38f53697 */ +const S03: f32 = 5.1354652442e-07; /* 0x3509daa6 */ +const S04: f32 = 1.1661400734e-09; /* 0x30a045e8 */ + +/// Zeroth order of the [Bessel function](https://en.wikipedia.org/wiki/Bessel_function) of the first kind (f32). +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn j0f(mut x: f32) -> f32 { + let z: f32; + let r: f32; + let s: f32; + let mut ix: u32; + + ix = x.to_bits(); + ix &= 0x7fffffff; + if ix >= 0x7f800000 { + return 1.0 / (x * x); + } + x = fabsf(x); + + if ix >= 0x40000000 { + /* |x| >= 2 */ + /* large ulp error near zeros */ + return common(ix, x, false); + } + if ix >= 0x3a000000 { + /* |x| >= 2**-11 */ + /* up to 4ulp error near 2 */ + z = x * x; + r = z * (R02 + z * (R03 + z * (R04 + z * R05))); + s = 1.0 + z * (S01 + z * (S02 + z * (S03 + z * S04))); + return (1.0 + x / 2.0) * (1.0 - x / 2.0) + z * (r / s); + } + if ix >= 0x21800000 { + /* |x| >= 2**-60 */ + x = 0.25 * x * x; + } + return 1.0 - x; +} + +const U00: f32 = -7.3804296553e-02; /* 0xbd9726b5 */ +const U01: f32 = 1.7666645348e-01; /* 0x3e34e80d */ +const U02: f32 = -1.3818567619e-02; /* 0xbc626746 */ +const U03: f32 = 3.4745343146e-04; /* 0x39b62a69 */ +const U04: f32 = -3.8140706238e-06; /* 0xb67ff53c */ +const U05: f32 = 1.9559013964e-08; /* 0x32a802ba */ +const U06: f32 = -3.9820518410e-11; /* 0xae2f21eb */ +const V01: f32 = 1.2730483897e-02; /* 0x3c509385 */ +const V02: f32 = 7.6006865129e-05; /* 0x389f65e0 */ +const V03: f32 = 2.5915085189e-07; /* 0x348b216c */ +const V04: f32 = 4.4111031494e-10; /* 0x2ff280c2 */ + +/// Zeroth order of the [Bessel function](https://en.wikipedia.org/wiki/Bessel_function) of the second kind (f32). +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn y0f(x: f32) -> f32 { + let z: f32; + let u: f32; + let v: f32; + let ix: u32; + + ix = x.to_bits(); + if (ix & 0x7fffffff) == 0 { + return -1.0 / 0.0; + } + if (ix >> 31) != 0 { + return 0.0 / 0.0; + } + if ix >= 0x7f800000 { + return 1.0 / x; + } + if ix >= 0x40000000 { + /* |x| >= 2.0 */ + /* large ulp error near zeros */ + return common(ix, x, true); + } + if ix >= 0x39000000 { + /* x >= 2**-13 */ + /* large ulp error at x ~= 0.89 */ + z = x * x; + u = U00 + z * (U01 + z * (U02 + z * (U03 + z * (U04 + z * (U05 + z * U06))))); + v = 1.0 + z * (V01 + z * (V02 + z * (V03 + z * V04))); + return u / v + TPI * (j0f(x) * logf(x)); + } + return U00 + TPI * logf(x); +} + +/* The asymptotic expansions of pzero is + * 1 - 9/128 s^2 + 11025/98304 s^4 - ..., where s = 1/x. + * For x >= 2, We approximate pzero by + * pzero(x) = 1 + (R/S) + * where R = pR0 + pR1*s^2 + pR2*s^4 + ... + pR5*s^10 + * S = 1 + pS0*s^2 + ... + pS4*s^10 + * and + * | pzero(x)-1-R/S | <= 2 ** ( -60.26) + */ +const PR8: [f32; 6] = [ + /* for x in [inf, 8]=1/[0,0.125] */ + 0.0000000000e+00, /* 0x00000000 */ + -7.0312500000e-02, /* 0xbd900000 */ + -8.0816707611e+00, /* 0xc1014e86 */ + -2.5706311035e+02, /* 0xc3808814 */ + -2.4852163086e+03, /* 0xc51b5376 */ + -5.2530439453e+03, /* 0xc5a4285a */ +]; +const PS8: [f32; 5] = [ + 1.1653436279e+02, /* 0x42e91198 */ + 3.8337448730e+03, /* 0x456f9beb */ + 4.0597855469e+04, /* 0x471e95db */ + 1.1675296875e+05, /* 0x47e4087c */ + 4.7627726562e+04, /* 0x473a0bba */ +]; +const PR5: [f32; 6] = [ + /* for x in [8,4.5454]=1/[0.125,0.22001] */ + -1.1412546255e-11, /* 0xad48c58a */ + -7.0312492549e-02, /* 0xbd8fffff */ + -4.1596107483e+00, /* 0xc0851b88 */ + -6.7674766541e+01, /* 0xc287597b */ + -3.3123129272e+02, /* 0xc3a59d9b */ + -3.4643338013e+02, /* 0xc3ad3779 */ +]; +const PS5: [f32; 5] = [ + 6.0753936768e+01, /* 0x42730408 */ + 1.0512523193e+03, /* 0x44836813 */ + 5.9789707031e+03, /* 0x45bad7c4 */ + 9.6254453125e+03, /* 0x461665c8 */ + 2.4060581055e+03, /* 0x451660ee */ +]; + +const PR3: [f32; 6] = [ + /* for x in [4.547,2.8571]=1/[0.2199,0.35001] */ + -2.5470459075e-09, /* 0xb12f081b */ + -7.0311963558e-02, /* 0xbd8fffb8 */ + -2.4090321064e+00, /* 0xc01a2d95 */ + -2.1965976715e+01, /* 0xc1afba52 */ + -5.8079170227e+01, /* 0xc2685112 */ + -3.1447946548e+01, /* 0xc1fb9565 */ +]; +const PS3: [f32; 5] = [ + 3.5856033325e+01, /* 0x420f6c94 */ + 3.6151397705e+02, /* 0x43b4c1ca */ + 1.1936077881e+03, /* 0x44953373 */ + 1.1279968262e+03, /* 0x448cffe6 */ + 1.7358093262e+02, /* 0x432d94b8 */ +]; + +const PR2: [f32; 6] = [ + /* for x in [2.8570,2]=1/[0.3499,0.5] */ + -8.8753431271e-08, /* 0xb3be98b7 */ + -7.0303097367e-02, /* 0xbd8ffb12 */ + -1.4507384300e+00, /* 0xbfb9b1cc */ + -7.6356959343e+00, /* 0xc0f4579f */ + -1.1193166733e+01, /* 0xc1331736 */ + -3.2336456776e+00, /* 0xc04ef40d */ +]; +const PS2: [f32; 5] = [ + 2.2220300674e+01, /* 0x41b1c32d */ + 1.3620678711e+02, /* 0x430834f0 */ + 2.7047027588e+02, /* 0x43873c32 */ + 1.5387539673e+02, /* 0x4319e01a */ + 1.4657617569e+01, /* 0x416a859a */ +]; + +fn pzerof(x: f32) -> f32 { + let p: &[f32; 6]; + let q: &[f32; 5]; + let z: f32; + let r: f32; + let s: f32; + let mut ix: u32; + + ix = x.to_bits(); + ix &= 0x7fffffff; + if ix >= 0x41000000 { + p = &PR8; + q = &PS8; + } else if ix >= 0x409173eb { + p = &PR5; + q = &PS5; + } else if ix >= 0x4036d917 { + p = &PR3; + q = &PS3; + } else + /*ix >= 0x40000000*/ + { + p = &PR2; + q = &PS2; + } + z = 1.0 / (x * x); + r = p[0] + z * (p[1] + z * (p[2] + z * (p[3] + z * (p[4] + z * p[5])))); + s = 1.0 + z * (q[0] + z * (q[1] + z * (q[2] + z * (q[3] + z * q[4])))); + return 1.0 + r / s; +} + +/* For x >= 8, the asymptotic expansions of qzero is + * -1/8 s + 75/1024 s^3 - ..., where s = 1/x. + * We approximate pzero by + * qzero(x) = s*(-1.25 + (R/S)) + * where R = qR0 + qR1*s^2 + qR2*s^4 + ... + qR5*s^10 + * S = 1 + qS0*s^2 + ... + qS5*s^12 + * and + * | qzero(x)/s +1.25-R/S | <= 2 ** ( -61.22) + */ +const QR8: [f32; 6] = [ + /* for x in [inf, 8]=1/[0,0.125] */ + 0.0000000000e+00, /* 0x00000000 */ + 7.3242187500e-02, /* 0x3d960000 */ + 1.1768206596e+01, /* 0x413c4a93 */ + 5.5767340088e+02, /* 0x440b6b19 */ + 8.8591972656e+03, /* 0x460a6cca */ + 3.7014625000e+04, /* 0x471096a0 */ +]; +const QS8: [f32; 6] = [ + 1.6377603149e+02, /* 0x4323c6aa */ + 8.0983447266e+03, /* 0x45fd12c2 */ + 1.4253829688e+05, /* 0x480b3293 */ + 8.0330925000e+05, /* 0x49441ed4 */ + 8.4050156250e+05, /* 0x494d3359 */ + -3.4389928125e+05, /* 0xc8a7eb69 */ +]; + +const QR5: [f32; 6] = [ + /* for x in [8,4.5454]=1/[0.125,0.22001] */ + 1.8408595828e-11, /* 0x2da1ec79 */ + 7.3242180049e-02, /* 0x3d95ffff */ + 5.8356351852e+00, /* 0x40babd86 */ + 1.3511157227e+02, /* 0x43071c90 */ + 1.0272437744e+03, /* 0x448067cd */ + 1.9899779053e+03, /* 0x44f8bf4b */ +]; +const QS5: [f32; 6] = [ + 8.2776611328e+01, /* 0x42a58da0 */ + 2.0778142090e+03, /* 0x4501dd07 */ + 1.8847289062e+04, /* 0x46933e94 */ + 5.6751113281e+04, /* 0x475daf1d */ + 3.5976753906e+04, /* 0x470c88c1 */ + -5.3543427734e+03, /* 0xc5a752be */ +]; + +const QR3: [f32; 6] = [ + /* for x in [4.547,2.8571]=1/[0.2199,0.35001] */ + 4.3774099900e-09, /* 0x3196681b */ + 7.3241114616e-02, /* 0x3d95ff70 */ + 3.3442313671e+00, /* 0x405607e3 */ + 4.2621845245e+01, /* 0x422a7cc5 */ + 1.7080809021e+02, /* 0x432acedf */ + 1.6673394775e+02, /* 0x4326bbe4 */ +]; +const QS3: [f32; 6] = [ + 4.8758872986e+01, /* 0x42430916 */ + 7.0968920898e+02, /* 0x44316c1c */ + 3.7041481934e+03, /* 0x4567825f */ + 6.4604252930e+03, /* 0x45c9e367 */ + 2.5163337402e+03, /* 0x451d4557 */ + -1.4924745178e+02, /* 0xc3153f59 */ +]; + +const QR2: [f32; 6] = [ + /* for x in [2.8570,2]=1/[0.3499,0.5] */ + 1.5044444979e-07, /* 0x342189db */ + 7.3223426938e-02, /* 0x3d95f62a */ + 1.9981917143e+00, /* 0x3fffc4bf */ + 1.4495602608e+01, /* 0x4167edfd */ + 3.1666231155e+01, /* 0x41fd5471 */ + 1.6252708435e+01, /* 0x4182058c */ +]; +const QS2: [f32; 6] = [ + 3.0365585327e+01, /* 0x41f2ecb8 */ + 2.6934811401e+02, /* 0x4386ac8f */ + 8.4478375244e+02, /* 0x44533229 */ + 8.8293585205e+02, /* 0x445cbbe5 */ + 2.1266638184e+02, /* 0x4354aa98 */ + -5.3109550476e+00, /* 0xc0a9f358 */ +]; + +fn qzerof(x: f32) -> f32 { + let p: &[f32; 6]; + let q: &[f32; 6]; + let s: f32; + let r: f32; + let z: f32; + let mut ix: u32; + + ix = x.to_bits(); + ix &= 0x7fffffff; + if ix >= 0x41000000 { + p = &QR8; + q = &QS8; + } else if ix >= 0x409173eb { + p = &QR5; + q = &QS5; + } else if ix >= 0x4036d917 { + p = &QR3; + q = &QS3; + } else + /*ix >= 0x40000000*/ + { + p = &QR2; + q = &QS2; + } + z = 1.0 / (x * x); + r = p[0] + z * (p[1] + z * (p[2] + z * (p[3] + z * (p[4] + z * p[5])))); + s = 1.0 + z * (q[0] + z * (q[1] + z * (q[2] + z * (q[3] + z * (q[4] + z * q[5]))))); + return (-0.125 + r / s) / x; +} diff --git a/library/compiler-builtins/libm/src/math/j1.rs b/library/compiler-builtins/libm/src/math/j1.rs new file mode 100644 index 0000000000000..9b604d9e46e00 --- /dev/null +++ b/library/compiler-builtins/libm/src/math/j1.rs @@ -0,0 +1,418 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/e_j1.c */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunSoft, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ +/* j1(x), y1(x) + * Bessel function of the first and second kinds of order zero. + * Method -- j1(x): + * 1. For tiny x, we use j1(x) = x/2 - x^3/16 + x^5/384 - ... + * 2. Reduce x to |x| since j1(x)=-j1(-x), and + * for x in (0,2) + * j1(x) = x/2 + x*z*R0/S0, where z = x*x; + * (precision: |j1/x - 1/2 - R0/S0 |<2**-61.51 ) + * for x in (2,inf) + * j1(x) = sqrt(2/(pi*x))*(p1(x)*cos(x1)-q1(x)*sin(x1)) + * y1(x) = sqrt(2/(pi*x))*(p1(x)*sin(x1)+q1(x)*cos(x1)) + * where x1 = x-3*pi/4. It is better to compute sin(x1),cos(x1) + * as follow: + * cos(x1) = cos(x)cos(3pi/4)+sin(x)sin(3pi/4) + * = 1/sqrt(2) * (sin(x) - cos(x)) + * sin(x1) = sin(x)cos(3pi/4)-cos(x)sin(3pi/4) + * = -1/sqrt(2) * (sin(x) + cos(x)) + * (To avoid cancellation, use + * sin(x) +- cos(x) = -cos(2x)/(sin(x) -+ cos(x)) + * to compute the worse one.) + * + * 3 Special cases + * j1(nan)= nan + * j1(0) = 0 + * j1(inf) = 0 + * + * Method -- y1(x): + * 1. screen out x<=0 cases: y1(0)=-inf, y1(x<0)=NaN + * 2. For x<2. + * Since + * y1(x) = 2/pi*(j1(x)*(ln(x/2)+Euler)-1/x-x/2+5/64*x^3-...) + * therefore y1(x)-2/pi*j1(x)*ln(x)-1/x is an odd function. + * We use the following function to approximate y1, + * y1(x) = x*U(z)/V(z) + (2/pi)*(j1(x)*ln(x)-1/x), z= x^2 + * where for x in [0,2] (abs err less than 2**-65.89) + * U(z) = U0[0] + U0[1]*z + ... + U0[4]*z^4 + * V(z) = 1 + v0[0]*z + ... + v0[4]*z^5 + * Note: For tiny x, 1/x dominate y1 and hence + * y1(tiny) = -2/pi/tiny, (choose tiny<2**-54) + * 3. For x>=2. + * y1(x) = sqrt(2/(pi*x))*(p1(x)*sin(x1)+q1(x)*cos(x1)) + * where x1 = x-3*pi/4. It is better to compute sin(x1),cos(x1) + * by method mentioned above. + */ + +use super::{cos, fabs, get_high_word, get_low_word, log, sin, sqrt}; + +const INVSQRTPI: f64 = 5.64189583547756279280e-01; /* 0x3FE20DD7, 0x50429B6D */ +const TPI: f64 = 6.36619772367581382433e-01; /* 0x3FE45F30, 0x6DC9C883 */ + +fn common(ix: u32, x: f64, y1: bool, sign: bool) -> f64 { + let z: f64; + let mut s: f64; + let c: f64; + let mut ss: f64; + let mut cc: f64; + + /* + * j1(x) = sqrt(2/(pi*x))*(p1(x)*cos(x-3pi/4)-q1(x)*sin(x-3pi/4)) + * y1(x) = sqrt(2/(pi*x))*(p1(x)*sin(x-3pi/4)+q1(x)*cos(x-3pi/4)) + * + * sin(x-3pi/4) = -(sin(x) + cos(x))/sqrt(2) + * cos(x-3pi/4) = (sin(x) - cos(x))/sqrt(2) + * sin(x) +- cos(x) = -cos(2x)/(sin(x) -+ cos(x)) + */ + s = sin(x); + if y1 { + s = -s; + } + c = cos(x); + cc = s - c; + if ix < 0x7fe00000 { + /* avoid overflow in 2*x */ + ss = -s - c; + z = cos(2.0 * x); + if s * c > 0.0 { + cc = z / ss; + } else { + ss = z / cc; + } + if ix < 0x48000000 { + if y1 { + ss = -ss; + } + cc = pone(x) * cc - qone(x) * ss; + } + } + if sign { + cc = -cc; + } + return INVSQRTPI * cc / sqrt(x); +} + +/* R0/S0 on [0,2] */ +const R00: f64 = -6.25000000000000000000e-02; /* 0xBFB00000, 0x00000000 */ +const R01: f64 = 1.40705666955189706048e-03; /* 0x3F570D9F, 0x98472C61 */ +const R02: f64 = -1.59955631084035597520e-05; /* 0xBEF0C5C6, 0xBA169668 */ +const R03: f64 = 4.96727999609584448412e-08; /* 0x3E6AAAFA, 0x46CA0BD9 */ +const S01: f64 = 1.91537599538363460805e-02; /* 0x3F939D0B, 0x12637E53 */ +const S02: f64 = 1.85946785588630915560e-04; /* 0x3F285F56, 0xB9CDF664 */ +const S03: f64 = 1.17718464042623683263e-06; /* 0x3EB3BFF8, 0x333F8498 */ +const S04: f64 = 5.04636257076217042715e-09; /* 0x3E35AC88, 0xC97DFF2C */ +const S05: f64 = 1.23542274426137913908e-11; /* 0x3DAB2ACF, 0xCFB97ED8 */ + +/// First order of the [Bessel function](https://en.wikipedia.org/wiki/Bessel_function) of the first kind (f64). +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn j1(x: f64) -> f64 { + let mut z: f64; + let r: f64; + let s: f64; + let mut ix: u32; + let sign: bool; + + ix = get_high_word(x); + sign = (ix >> 31) != 0; + ix &= 0x7fffffff; + if ix >= 0x7ff00000 { + return 1.0 / (x * x); + } + if ix >= 0x40000000 { + /* |x| >= 2 */ + return common(ix, fabs(x), false, sign); + } + if ix >= 0x38000000 { + /* |x| >= 2**-127 */ + z = x * x; + r = z * (R00 + z * (R01 + z * (R02 + z * R03))); + s = 1.0 + z * (S01 + z * (S02 + z * (S03 + z * (S04 + z * S05)))); + z = r / s; + } else { + /* avoid underflow, raise inexact if x!=0 */ + z = x; + } + return (0.5 + z) * x; +} + +const U0: [f64; 5] = [ + -1.96057090646238940668e-01, /* 0xBFC91866, 0x143CBC8A */ + 5.04438716639811282616e-02, /* 0x3FA9D3C7, 0x76292CD1 */ + -1.91256895875763547298e-03, /* 0xBF5F55E5, 0x4844F50F */ + 2.35252600561610495928e-05, /* 0x3EF8AB03, 0x8FA6B88E */ + -9.19099158039878874504e-08, /* 0xBE78AC00, 0x569105B8 */ +]; +const V0: [f64; 5] = [ + 1.99167318236649903973e-02, /* 0x3F94650D, 0x3F4DA9F0 */ + 2.02552581025135171496e-04, /* 0x3F2A8C89, 0x6C257764 */ + 1.35608801097516229404e-06, /* 0x3EB6C05A, 0x894E8CA6 */ + 6.22741452364621501295e-09, /* 0x3E3ABF1D, 0x5BA69A86 */ + 1.66559246207992079114e-11, /* 0x3DB25039, 0xDACA772A */ +]; + +/// First order of the [Bessel function](https://en.wikipedia.org/wiki/Bessel_function) of the second kind (f64). +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn y1(x: f64) -> f64 { + let z: f64; + let u: f64; + let v: f64; + let ix: u32; + let lx: u32; + + ix = get_high_word(x); + lx = get_low_word(x); + + /* y1(nan)=nan, y1(<0)=nan, y1(0)=-inf, y1(inf)=0 */ + if (ix << 1) | lx == 0 { + return -1.0 / 0.0; + } + if ix >> 31 != 0 { + return 0.0 / 0.0; + } + if ix >= 0x7ff00000 { + return 1.0 / x; + } + + if ix >= 0x40000000 { + /* x >= 2 */ + return common(ix, x, true, false); + } + if ix < 0x3c900000 { + /* x < 2**-54 */ + return -TPI / x; + } + z = x * x; + u = U0[0] + z * (U0[1] + z * (U0[2] + z * (U0[3] + z * U0[4]))); + v = 1.0 + z * (V0[0] + z * (V0[1] + z * (V0[2] + z * (V0[3] + z * V0[4])))); + return x * (u / v) + TPI * (j1(x) * log(x) - 1.0 / x); +} + +/* For x >= 8, the asymptotic expansions of pone is + * 1 + 15/128 s^2 - 4725/2^15 s^4 - ..., where s = 1/x. + * We approximate pone by + * pone(x) = 1 + (R/S) + * where R = pr0 + pr1*s^2 + pr2*s^4 + ... + pr5*s^10 + * S = 1 + ps0*s^2 + ... + ps4*s^10 + * and + * | pone(x)-1-R/S | <= 2 ** ( -60.06) + */ + +const PR8: [f64; 6] = [ + /* for x in [inf, 8]=1/[0,0.125] */ + 0.00000000000000000000e+00, /* 0x00000000, 0x00000000 */ + 1.17187499999988647970e-01, /* 0x3FBDFFFF, 0xFFFFFCCE */ + 1.32394806593073575129e+01, /* 0x402A7A9D, 0x357F7FCE */ + 4.12051854307378562225e+02, /* 0x4079C0D4, 0x652EA590 */ + 3.87474538913960532227e+03, /* 0x40AE457D, 0xA3A532CC */ + 7.91447954031891731574e+03, /* 0x40BEEA7A, 0xC32782DD */ +]; +const PS8: [f64; 5] = [ + 1.14207370375678408436e+02, /* 0x405C8D45, 0x8E656CAC */ + 3.65093083420853463394e+03, /* 0x40AC85DC, 0x964D274F */ + 3.69562060269033463555e+04, /* 0x40E20B86, 0x97C5BB7F */ + 9.76027935934950801311e+04, /* 0x40F7D42C, 0xB28F17BB */ + 3.08042720627888811578e+04, /* 0x40DE1511, 0x697A0B2D */ +]; + +const PR5: [f64; 6] = [ + /* for x in [8,4.5454]=1/[0.125,0.22001] */ + 1.31990519556243522749e-11, /* 0x3DAD0667, 0xDAE1CA7D */ + 1.17187493190614097638e-01, /* 0x3FBDFFFF, 0xE2C10043 */ + 6.80275127868432871736e+00, /* 0x401B3604, 0x6E6315E3 */ + 1.08308182990189109773e+02, /* 0x405B13B9, 0x452602ED */ + 5.17636139533199752805e+02, /* 0x40802D16, 0xD052D649 */ + 5.28715201363337541807e+02, /* 0x408085B8, 0xBB7E0CB7 */ +]; +const PS5: [f64; 5] = [ + 5.92805987221131331921e+01, /* 0x404DA3EA, 0xA8AF633D */ + 9.91401418733614377743e+02, /* 0x408EFB36, 0x1B066701 */ + 5.35326695291487976647e+03, /* 0x40B4E944, 0x5706B6FB */ + 7.84469031749551231769e+03, /* 0x40BEA4B0, 0xB8A5BB15 */ + 1.50404688810361062679e+03, /* 0x40978030, 0x036F5E51 */ +]; + +const PR3: [f64; 6] = [ + 3.02503916137373618024e-09, /* 0x3E29FC21, 0xA7AD9EDD */ + 1.17186865567253592491e-01, /* 0x3FBDFFF5, 0x5B21D17B */ + 3.93297750033315640650e+00, /* 0x400F76BC, 0xE85EAD8A */ + 3.51194035591636932736e+01, /* 0x40418F48, 0x9DA6D129 */ + 9.10550110750781271918e+01, /* 0x4056C385, 0x4D2C1837 */ + 4.85590685197364919645e+01, /* 0x4048478F, 0x8EA83EE5 */ +]; +const PS3: [f64; 5] = [ + 3.47913095001251519989e+01, /* 0x40416549, 0xA134069C */ + 3.36762458747825746741e+02, /* 0x40750C33, 0x07F1A75F */ + 1.04687139975775130551e+03, /* 0x40905B7C, 0x5037D523 */ + 8.90811346398256432622e+02, /* 0x408BD67D, 0xA32E31E9 */ + 1.03787932439639277504e+02, /* 0x4059F26D, 0x7C2EED53 */ +]; + +const PR2: [f64; 6] = [ + /* for x in [2.8570,2]=1/[0.3499,0.5] */ + 1.07710830106873743082e-07, /* 0x3E7CE9D4, 0xF65544F4 */ + 1.17176219462683348094e-01, /* 0x3FBDFF42, 0xBE760D83 */ + 2.36851496667608785174e+00, /* 0x4002F2B7, 0xF98FAEC0 */ + 1.22426109148261232917e+01, /* 0x40287C37, 0x7F71A964 */ + 1.76939711271687727390e+01, /* 0x4031B1A8, 0x177F8EE2 */ + 5.07352312588818499250e+00, /* 0x40144B49, 0xA574C1FE */ +]; +const PS2: [f64; 5] = [ + 2.14364859363821409488e+01, /* 0x40356FBD, 0x8AD5ECDC */ + 1.25290227168402751090e+02, /* 0x405F5293, 0x14F92CD5 */ + 2.32276469057162813669e+02, /* 0x406D08D8, 0xD5A2DBD9 */ + 1.17679373287147100768e+02, /* 0x405D6B7A, 0xDA1884A9 */ + 8.36463893371618283368e+00, /* 0x4020BAB1, 0xF44E5192 */ +]; + +fn pone(x: f64) -> f64 { + let p: &[f64; 6]; + let q: &[f64; 5]; + let z: f64; + let r: f64; + let s: f64; + let mut ix: u32; + + ix = get_high_word(x); + ix &= 0x7fffffff; + if ix >= 0x40200000 { + p = &PR8; + q = &PS8; + } else if ix >= 0x40122E8B { + p = &PR5; + q = &PS5; + } else if ix >= 0x4006DB6D { + p = &PR3; + q = &PS3; + } else + /*ix >= 0x40000000*/ + { + p = &PR2; + q = &PS2; + } + z = 1.0 / (x * x); + r = p[0] + z * (p[1] + z * (p[2] + z * (p[3] + z * (p[4] + z * p[5])))); + s = 1.0 + z * (q[0] + z * (q[1] + z * (q[2] + z * (q[3] + z * q[4])))); + return 1.0 + r / s; +} + +/* For x >= 8, the asymptotic expansions of qone is + * 3/8 s - 105/1024 s^3 - ..., where s = 1/x. + * We approximate pone by + * qone(x) = s*(0.375 + (R/S)) + * where R = qr1*s^2 + qr2*s^4 + ... + qr5*s^10 + * S = 1 + qs1*s^2 + ... + qs6*s^12 + * and + * | qone(x)/s -0.375-R/S | <= 2 ** ( -61.13) + */ + +const QR8: [f64; 6] = [ + /* for x in [inf, 8]=1/[0,0.125] */ + 0.00000000000000000000e+00, /* 0x00000000, 0x00000000 */ + -1.02539062499992714161e-01, /* 0xBFBA3FFF, 0xFFFFFDF3 */ + -1.62717534544589987888e+01, /* 0xC0304591, 0xA26779F7 */ + -7.59601722513950107896e+02, /* 0xC087BCD0, 0x53E4B576 */ + -1.18498066702429587167e+04, /* 0xC0C724E7, 0x40F87415 */ + -4.84385124285750353010e+04, /* 0xC0E7A6D0, 0x65D09C6A */ +]; +const QS8: [f64; 6] = [ + 1.61395369700722909556e+02, /* 0x40642CA6, 0xDE5BCDE5 */ + 7.82538599923348465381e+03, /* 0x40BE9162, 0xD0D88419 */ + 1.33875336287249578163e+05, /* 0x4100579A, 0xB0B75E98 */ + 7.19657723683240939863e+05, /* 0x4125F653, 0x72869C19 */ + 6.66601232617776375264e+05, /* 0x412457D2, 0x7719AD5C */ + -2.94490264303834643215e+05, /* 0xC111F969, 0x0EA5AA18 */ +]; + +const QR5: [f64; 6] = [ + /* for x in [8,4.5454]=1/[0.125,0.22001] */ + -2.08979931141764104297e-11, /* 0xBDB6FA43, 0x1AA1A098 */ + -1.02539050241375426231e-01, /* 0xBFBA3FFF, 0xCB597FEF */ + -8.05644828123936029840e+00, /* 0xC0201CE6, 0xCA03AD4B */ + -1.83669607474888380239e+02, /* 0xC066F56D, 0x6CA7B9B0 */ + -1.37319376065508163265e+03, /* 0xC09574C6, 0x6931734F */ + -2.61244440453215656817e+03, /* 0xC0A468E3, 0x88FDA79D */ +]; +const QS5: [f64; 6] = [ + 8.12765501384335777857e+01, /* 0x405451B2, 0xFF5A11B2 */ + 1.99179873460485964642e+03, /* 0x409F1F31, 0xE77BF839 */ + 1.74684851924908907677e+04, /* 0x40D10F1F, 0x0D64CE29 */ + 4.98514270910352279316e+04, /* 0x40E8576D, 0xAABAD197 */ + 2.79480751638918118260e+04, /* 0x40DB4B04, 0xCF7C364B */ + -4.71918354795128470869e+03, /* 0xC0B26F2E, 0xFCFFA004 */ +]; + +const QR3: [f64; 6] = [ + -5.07831226461766561369e-09, /* 0xBE35CFA9, 0xD38FC84F */ + -1.02537829820837089745e-01, /* 0xBFBA3FEB, 0x51AEED54 */ + -4.61011581139473403113e+00, /* 0xC01270C2, 0x3302D9FF */ + -5.78472216562783643212e+01, /* 0xC04CEC71, 0xC25D16DA */ + -2.28244540737631695038e+02, /* 0xC06C87D3, 0x4718D55F */ + -2.19210128478909325622e+02, /* 0xC06B66B9, 0x5F5C1BF6 */ +]; +const QS3: [f64; 6] = [ + 4.76651550323729509273e+01, /* 0x4047D523, 0xCCD367E4 */ + 6.73865112676699709482e+02, /* 0x40850EEB, 0xC031EE3E */ + 3.38015286679526343505e+03, /* 0x40AA684E, 0x448E7C9A */ + 5.54772909720722782367e+03, /* 0x40B5ABBA, 0xA61D54A6 */ + 1.90311919338810798763e+03, /* 0x409DBC7A, 0x0DD4DF4B */ + -1.35201191444307340817e+02, /* 0xC060E670, 0x290A311F */ +]; + +const QR2: [f64; 6] = [ + /* for x in [2.8570,2]=1/[0.3499,0.5] */ + -1.78381727510958865572e-07, /* 0xBE87F126, 0x44C626D2 */ + -1.02517042607985553460e-01, /* 0xBFBA3E8E, 0x9148B010 */ + -2.75220568278187460720e+00, /* 0xC0060484, 0x69BB4EDA */ + -1.96636162643703720221e+01, /* 0xC033A9E2, 0xC168907F */ + -4.23253133372830490089e+01, /* 0xC04529A3, 0xDE104AAA */ + -2.13719211703704061733e+01, /* 0xC0355F36, 0x39CF6E52 */ +]; +const QS2: [f64; 6] = [ + 2.95333629060523854548e+01, /* 0x403D888A, 0x78AE64FF */ + 2.52981549982190529136e+02, /* 0x406F9F68, 0xDB821CBA */ + 7.57502834868645436472e+02, /* 0x4087AC05, 0xCE49A0F7 */ + 7.39393205320467245656e+02, /* 0x40871B25, 0x48D4C029 */ + 1.55949003336666123687e+02, /* 0x40637E5E, 0x3C3ED8D4 */ + -4.95949898822628210127e+00, /* 0xC013D686, 0xE71BE86B */ +]; + +fn qone(x: f64) -> f64 { + let p: &[f64; 6]; + let q: &[f64; 6]; + let s: f64; + let r: f64; + let z: f64; + let mut ix: u32; + + ix = get_high_word(x); + ix &= 0x7fffffff; + if ix >= 0x40200000 { + p = &QR8; + q = &QS8; + } else if ix >= 0x40122E8B { + p = &QR5; + q = &QS5; + } else if ix >= 0x4006DB6D { + p = &QR3; + q = &QS3; + } else + /*ix >= 0x40000000*/ + { + p = &QR2; + q = &QS2; + } + z = 1.0 / (x * x); + r = p[0] + z * (p[1] + z * (p[2] + z * (p[3] + z * (p[4] + z * p[5])))); + s = 1.0 + z * (q[0] + z * (q[1] + z * (q[2] + z * (q[3] + z * (q[4] + z * q[5]))))); + return (0.375 + r / s) / x; +} diff --git a/library/compiler-builtins/libm/src/math/j1f.rs b/library/compiler-builtins/libm/src/math/j1f.rs new file mode 100644 index 0000000000000..a47472401ee27 --- /dev/null +++ b/library/compiler-builtins/libm/src/math/j1f.rs @@ -0,0 +1,384 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/e_j1f.c */ +/* + * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. + */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ + +use super::{cosf, fabsf, logf, sinf, sqrtf}; + +const INVSQRTPI: f32 = 5.6418961287e-01; /* 0x3f106ebb */ +const TPI: f32 = 6.3661974669e-01; /* 0x3f22f983 */ + +fn common(ix: u32, x: f32, y1: bool, sign: bool) -> f32 { + let z: f64; + let mut s: f64; + let c: f64; + let mut ss: f64; + let mut cc: f64; + + s = sinf(x) as f64; + if y1 { + s = -s; + } + c = cosf(x) as f64; + cc = s - c; + if ix < 0x7f000000 { + ss = -s - c; + z = cosf(2.0 * x) as f64; + if s * c > 0.0 { + cc = z / ss; + } else { + ss = z / cc; + } + if ix < 0x58800000 { + if y1 { + ss = -ss; + } + cc = (ponef(x) as f64) * cc - (qonef(x) as f64) * ss; + } + } + if sign { + cc = -cc; + } + return (((INVSQRTPI as f64) * cc) / (sqrtf(x) as f64)) as f32; +} + +/* R0/S0 on [0,2] */ +const R00: f32 = -6.2500000000e-02; /* 0xbd800000 */ +const R01: f32 = 1.4070566976e-03; /* 0x3ab86cfd */ +const R02: f32 = -1.5995563444e-05; /* 0xb7862e36 */ +const R03: f32 = 4.9672799207e-08; /* 0x335557d2 */ +const S01: f32 = 1.9153760746e-02; /* 0x3c9ce859 */ +const S02: f32 = 1.8594678841e-04; /* 0x3942fab6 */ +const S03: f32 = 1.1771846857e-06; /* 0x359dffc2 */ +const S04: f32 = 5.0463624390e-09; /* 0x31ad6446 */ +const S05: f32 = 1.2354227016e-11; /* 0x2d59567e */ + +/// First order of the [Bessel function](https://en.wikipedia.org/wiki/Bessel_function) of the first kind (f32). +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn j1f(x: f32) -> f32 { + let mut z: f32; + let r: f32; + let s: f32; + let mut ix: u32; + let sign: bool; + + ix = x.to_bits(); + sign = (ix >> 31) != 0; + ix &= 0x7fffffff; + if ix >= 0x7f800000 { + return 1.0 / (x * x); + } + if ix >= 0x40000000 { + /* |x| >= 2 */ + return common(ix, fabsf(x), false, sign); + } + if ix >= 0x39000000 { + /* |x| >= 2**-13 */ + z = x * x; + r = z * (R00 + z * (R01 + z * (R02 + z * R03))); + s = 1.0 + z * (S01 + z * (S02 + z * (S03 + z * (S04 + z * S05)))); + z = 0.5 + r / s; + } else { + z = 0.5; + } + return z * x; +} + +const U0: [f32; 5] = [ + -1.9605709612e-01, /* 0xbe48c331 */ + 5.0443872809e-02, /* 0x3d4e9e3c */ + -1.9125689287e-03, /* 0xbafaaf2a */ + 2.3525259166e-05, /* 0x37c5581c */ + -9.1909917899e-08, /* 0xb3c56003 */ +]; +const V0: [f32; 5] = [ + 1.9916731864e-02, /* 0x3ca3286a */ + 2.0255257550e-04, /* 0x3954644b */ + 1.3560879779e-06, /* 0x35b602d4 */ + 6.2274145840e-09, /* 0x31d5f8eb */ + 1.6655924903e-11, /* 0x2d9281cf */ +]; + +/// First order of the [Bessel function](https://en.wikipedia.org/wiki/Bessel_function) of the second kind (f32). +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn y1f(x: f32) -> f32 { + let z: f32; + let u: f32; + let v: f32; + let ix: u32; + + ix = x.to_bits(); + if (ix & 0x7fffffff) == 0 { + return -1.0 / 0.0; + } + if (ix >> 31) != 0 { + return 0.0 / 0.0; + } + if ix >= 0x7f800000 { + return 1.0 / x; + } + if ix >= 0x40000000 { + /* |x| >= 2.0 */ + return common(ix, x, true, false); + } + if ix < 0x33000000 { + /* x < 2**-25 */ + return -TPI / x; + } + z = x * x; + u = U0[0] + z * (U0[1] + z * (U0[2] + z * (U0[3] + z * U0[4]))); + v = 1.0 + z * (V0[0] + z * (V0[1] + z * (V0[2] + z * (V0[3] + z * V0[4])))); + return x * (u / v) + TPI * (j1f(x) * logf(x) - 1.0 / x); +} + +/* For x >= 8, the asymptotic expansions of pone is + * 1 + 15/128 s^2 - 4725/2^15 s^4 - ..., where s = 1/x. + * We approximate pone by + * pone(x) = 1 + (R/S) + * where R = pr0 + pr1*s^2 + pr2*s^4 + ... + pr5*s^10 + * S = 1 + ps0*s^2 + ... + ps4*s^10 + * and + * | pone(x)-1-R/S | <= 2 ** ( -60.06) + */ + +const PR8: [f32; 6] = [ + /* for x in [inf, 8]=1/[0,0.125] */ + 0.0000000000e+00, /* 0x00000000 */ + 1.1718750000e-01, /* 0x3df00000 */ + 1.3239480972e+01, /* 0x4153d4ea */ + 4.1205184937e+02, /* 0x43ce06a3 */ + 3.8747453613e+03, /* 0x45722bed */ + 7.9144794922e+03, /* 0x45f753d6 */ +]; +const PS8: [f32; 5] = [ + 1.1420736694e+02, /* 0x42e46a2c */ + 3.6509309082e+03, /* 0x45642ee5 */ + 3.6956207031e+04, /* 0x47105c35 */ + 9.7602796875e+04, /* 0x47bea166 */ + 3.0804271484e+04, /* 0x46f0a88b */ +]; + +const PR5: [f32; 6] = [ + /* for x in [8,4.5454]=1/[0.125,0.22001] */ + 1.3199052094e-11, /* 0x2d68333f */ + 1.1718749255e-01, /* 0x3defffff */ + 6.8027510643e+00, /* 0x40d9b023 */ + 1.0830818176e+02, /* 0x42d89dca */ + 5.1763616943e+02, /* 0x440168b7 */ + 5.2871520996e+02, /* 0x44042dc6 */ +]; +const PS5: [f32; 5] = [ + 5.9280597687e+01, /* 0x426d1f55 */ + 9.9140142822e+02, /* 0x4477d9b1 */ + 5.3532670898e+03, /* 0x45a74a23 */ + 7.8446904297e+03, /* 0x45f52586 */ + 1.5040468750e+03, /* 0x44bc0180 */ +]; + +const PR3: [f32; 6] = [ + 3.0250391081e-09, /* 0x314fe10d */ + 1.1718686670e-01, /* 0x3defffab */ + 3.9329774380e+00, /* 0x407bb5e7 */ + 3.5119403839e+01, /* 0x420c7a45 */ + 9.1055007935e+01, /* 0x42b61c2a */ + 4.8559066772e+01, /* 0x42423c7c */ +]; +const PS3: [f32; 5] = [ + 3.4791309357e+01, /* 0x420b2a4d */ + 3.3676245117e+02, /* 0x43a86198 */ + 1.0468714600e+03, /* 0x4482dbe3 */ + 8.9081134033e+02, /* 0x445eb3ed */ + 1.0378793335e+02, /* 0x42cf936c */ +]; + +const PR2: [f32; 6] = [ + /* for x in [2.8570,2]=1/[0.3499,0.5] */ + 1.0771083225e-07, /* 0x33e74ea8 */ + 1.1717621982e-01, /* 0x3deffa16 */ + 2.3685150146e+00, /* 0x401795c0 */ + 1.2242610931e+01, /* 0x4143e1bc */ + 1.7693971634e+01, /* 0x418d8d41 */ + 5.0735230446e+00, /* 0x40a25a4d */ +]; +const PS2: [f32; 5] = [ + 2.1436485291e+01, /* 0x41ab7dec */ + 1.2529022980e+02, /* 0x42fa9499 */ + 2.3227647400e+02, /* 0x436846c7 */ + 1.1767937469e+02, /* 0x42eb5bd7 */ + 8.3646392822e+00, /* 0x4105d590 */ +]; + +fn ponef(x: f32) -> f32 { + let p: &[f32; 6]; + let q: &[f32; 5]; + let z: f32; + let r: f32; + let s: f32; + let mut ix: u32; + + ix = x.to_bits(); + ix &= 0x7fffffff; + if ix >= 0x41000000 { + p = &PR8; + q = &PS8; + } else if ix >= 0x409173eb { + p = &PR5; + q = &PS5; + } else if ix >= 0x4036d917 { + p = &PR3; + q = &PS3; + } else + /*ix >= 0x40000000*/ + { + p = &PR2; + q = &PS2; + } + z = 1.0 / (x * x); + r = p[0] + z * (p[1] + z * (p[2] + z * (p[3] + z * (p[4] + z * p[5])))); + s = 1.0 + z * (q[0] + z * (q[1] + z * (q[2] + z * (q[3] + z * q[4])))); + return 1.0 + r / s; +} + +/* For x >= 8, the asymptotic expansions of qone is + * 3/8 s - 105/1024 s^3 - ..., where s = 1/x. + * We approximate pone by + * qone(x) = s*(0.375 + (R/S)) + * where R = qr1*s^2 + qr2*s^4 + ... + qr5*s^10 + * S = 1 + qs1*s^2 + ... + qs6*s^12 + * and + * | qone(x)/s -0.375-R/S | <= 2 ** ( -61.13) + */ + +const QR8: [f32; 6] = [ + /* for x in [inf, 8]=1/[0,0.125] */ + 0.0000000000e+00, /* 0x00000000 */ + -1.0253906250e-01, /* 0xbdd20000 */ + -1.6271753311e+01, /* 0xc1822c8d */ + -7.5960174561e+02, /* 0xc43de683 */ + -1.1849806641e+04, /* 0xc639273a */ + -4.8438511719e+04, /* 0xc73d3683 */ +]; +const QS8: [f32; 6] = [ + 1.6139537048e+02, /* 0x43216537 */ + 7.8253862305e+03, /* 0x45f48b17 */ + 1.3387534375e+05, /* 0x4802bcd6 */ + 7.1965775000e+05, /* 0x492fb29c */ + 6.6660125000e+05, /* 0x4922be94 */ + -2.9449025000e+05, /* 0xc88fcb48 */ +]; + +const QR5: [f32; 6] = [ + /* for x in [8,4.5454]=1/[0.125,0.22001] */ + -2.0897993405e-11, /* 0xadb7d219 */ + -1.0253904760e-01, /* 0xbdd1fffe */ + -8.0564479828e+00, /* 0xc100e736 */ + -1.8366960144e+02, /* 0xc337ab6b */ + -1.3731937256e+03, /* 0xc4aba633 */ + -2.6124443359e+03, /* 0xc523471c */ +]; +const QS5: [f32; 6] = [ + 8.1276550293e+01, /* 0x42a28d98 */ + 1.9917987061e+03, /* 0x44f8f98f */ + 1.7468484375e+04, /* 0x468878f8 */ + 4.9851425781e+04, /* 0x4742bb6d */ + 2.7948074219e+04, /* 0x46da5826 */ + -4.7191835938e+03, /* 0xc5937978 */ +]; + +const QR3: [f32; 6] = [ + -5.0783124372e-09, /* 0xb1ae7d4f */ + -1.0253783315e-01, /* 0xbdd1ff5b */ + -4.6101160049e+00, /* 0xc0938612 */ + -5.7847221375e+01, /* 0xc267638e */ + -2.2824453735e+02, /* 0xc3643e9a */ + -2.1921012878e+02, /* 0xc35b35cb */ +]; +const QS3: [f32; 6] = [ + 4.7665153503e+01, /* 0x423ea91e */ + 6.7386511230e+02, /* 0x4428775e */ + 3.3801528320e+03, /* 0x45534272 */ + 5.5477290039e+03, /* 0x45ad5dd5 */ + 1.9031191406e+03, /* 0x44ede3d0 */ + -1.3520118713e+02, /* 0xc3073381 */ +]; + +const QR2: [f32; 6] = [ + /* for x in [2.8570,2]=1/[0.3499,0.5] */ + -1.7838172539e-07, /* 0xb43f8932 */ + -1.0251704603e-01, /* 0xbdd1f475 */ + -2.7522056103e+00, /* 0xc0302423 */ + -1.9663616180e+01, /* 0xc19d4f16 */ + -4.2325313568e+01, /* 0xc2294d1f */ + -2.1371921539e+01, /* 0xc1aaf9b2 */ +]; +const QS2: [f32; 6] = [ + 2.9533363342e+01, /* 0x41ec4454 */ + 2.5298155212e+02, /* 0x437cfb47 */ + 7.5750280762e+02, /* 0x443d602e */ + 7.3939318848e+02, /* 0x4438d92a */ + 1.5594900513e+02, /* 0x431bf2f2 */ + -4.9594988823e+00, /* 0xc09eb437 */ +]; + +fn qonef(x: f32) -> f32 { + let p: &[f32; 6]; + let q: &[f32; 6]; + let s: f32; + let r: f32; + let z: f32; + let mut ix: u32; + + ix = x.to_bits(); + ix &= 0x7fffffff; + if ix >= 0x41000000 { + p = &QR8; + q = &QS8; + } else if ix >= 0x409173eb { + p = &QR5; + q = &QS5; + } else if ix >= 0x4036d917 { + p = &QR3; + q = &QS3; + } else + /*ix >= 0x40000000*/ + { + p = &QR2; + q = &QS2; + } + z = 1.0 / (x * x); + r = p[0] + z * (p[1] + z * (p[2] + z * (p[3] + z * (p[4] + z * p[5])))); + s = 1.0 + z * (q[0] + z * (q[1] + z * (q[2] + z * (q[3] + z * (q[4] + z * q[5]))))); + return (0.375 + r / s) / x; +} + +// PowerPC tests are failing on LLVM 13: https://github.com/rust-lang/rust/issues/88520 +#[cfg(not(target_arch = "powerpc64"))] +#[cfg(test)] +mod tests { + use super::{j1f, y1f}; + #[test] + fn test_j1f_2488() { + // 0x401F3E49 + assert_eq!(j1f(2.4881766_f32), 0.49999475_f32); + } + #[test] + fn test_y1f_2002() { + //allow slightly different result on x87 + let res = y1f(2.0000002_f32); + if cfg!(all(target_arch = "x86", not(target_feature = "sse2"))) && (res == -0.10703231_f32) + { + return; + } + assert_eq!(res, -0.10703229_f32); + } +} diff --git a/library/compiler-builtins/libm/src/math/jn.rs b/library/compiler-builtins/libm/src/math/jn.rs new file mode 100644 index 0000000000000..31f8d9c538290 --- /dev/null +++ b/library/compiler-builtins/libm/src/math/jn.rs @@ -0,0 +1,339 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/e_jn.c */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunSoft, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ +/* + * jn(n, x), yn(n, x) + * floating point Bessel's function of the 1st and 2nd kind + * of order n + * + * Special cases: + * y0(0)=y1(0)=yn(n,0) = -inf with division by zero signal; + * y0(-ve)=y1(-ve)=yn(n,-ve) are NaN with invalid signal. + * Note 2. About jn(n,x), yn(n,x) + * For n=0, j0(x) is called, + * for n=1, j1(x) is called, + * for n<=x, forward recursion is used starting + * from values of j0(x) and j1(x). + * for n>x, a continued fraction approximation to + * j(n,x)/j(n-1,x) is evaluated and then backward + * recursion is used starting from a supposed value + * for j(n,x). The resulting value of j(0,x) is + * compared with the actual value to correct the + * supposed value of j(n,x). + * + * yn(n,x) is similar in all respects, except + * that forward recursion is used for all + * values of n>1. + */ + +use super::{cos, fabs, get_high_word, get_low_word, j0, j1, log, sin, sqrt, y0, y1}; + +const INVSQRTPI: f64 = 5.64189583547756279280e-01; /* 0x3FE20DD7, 0x50429B6D */ + +/// Integer order of the [Bessel function](https://en.wikipedia.org/wiki/Bessel_function) of the first kind (f64). +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn jn(n: i32, mut x: f64) -> f64 { + let mut ix: u32; + let lx: u32; + let nm1: i32; + let mut i: i32; + let mut sign: bool; + let mut a: f64; + let mut b: f64; + let mut temp: f64; + + ix = get_high_word(x); + lx = get_low_word(x); + sign = (ix >> 31) != 0; + ix &= 0x7fffffff; + + // -lx == !lx + 1 + if ix | ((lx | (!lx).wrapping_add(1)) >> 31) > 0x7ff00000 { + /* nan */ + return x; + } + + /* J(-n,x) = (-1)^n * J(n, x), J(n, -x) = (-1)^n * J(n, x) + * Thus, J(-n,x) = J(n,-x) + */ + /* nm1 = |n|-1 is used instead of |n| to handle n==INT_MIN */ + if n == 0 { + return j0(x); + } + if n < 0 { + nm1 = -(n + 1); + x = -x; + sign = !sign; + } else { + nm1 = n - 1; + } + if nm1 == 0 { + return j1(x); + } + + sign &= (n & 1) != 0; /* even n: 0, odd n: signbit(x) */ + x = fabs(x); + if (ix | lx) == 0 || ix == 0x7ff00000 { + /* if x is 0 or inf */ + b = 0.0; + } else if (nm1 as f64) < x { + /* Safe to use J(n+1,x)=2n/x *J(n,x)-J(n-1,x) */ + if ix >= 0x52d00000 { + /* x > 2**302 */ + /* (x >> n**2) + * Jn(x) = cos(x-(2n+1)*pi/4)*sqrt(2/x*pi) + * Yn(x) = sin(x-(2n+1)*pi/4)*sqrt(2/x*pi) + * Let s=sin(x), c=cos(x), + * xn=x-(2n+1)*pi/4, sqt2 = sqrt(2),then + * + * n sin(xn)*sqt2 cos(xn)*sqt2 + * ---------------------------------- + * 0 s-c c+s + * 1 -s-c -c+s + * 2 -s+c -c-s + * 3 s+c c-s + */ + temp = match nm1 & 3 { + 0 => -cos(x) + sin(x), + 1 => -cos(x) - sin(x), + 2 => cos(x) - sin(x), + // 3 + _ => cos(x) + sin(x), + }; + b = INVSQRTPI * temp / sqrt(x); + } else { + a = j0(x); + b = j1(x); + i = 0; + while i < nm1 { + i += 1; + temp = b; + b = b * (2.0 * (i as f64) / x) - a; /* avoid underflow */ + a = temp; + } + } + } else if ix < 0x3e100000 { + /* x < 2**-29 */ + /* x is tiny, return the first Taylor expansion of J(n,x) + * J(n,x) = 1/n!*(x/2)^n - ... + */ + if nm1 > 32 { + /* underflow */ + b = 0.0; + } else { + temp = x * 0.5; + b = temp; + a = 1.0; + i = 2; + while i <= nm1 + 1 { + a *= i as f64; /* a = n! */ + b *= temp; /* b = (x/2)^n */ + i += 1; + } + b = b / a; + } + } else { + /* use backward recurrence */ + /* x x^2 x^2 + * J(n,x)/J(n-1,x) = ---- ------ ------ ..... + * 2n - 2(n+1) - 2(n+2) + * + * 1 1 1 + * (for large x) = ---- ------ ------ ..... + * 2n 2(n+1) 2(n+2) + * -- - ------ - ------ - + * x x x + * + * Let w = 2n/x and h=2/x, then the above quotient + * is equal to the continued fraction: + * 1 + * = ----------------------- + * 1 + * w - ----------------- + * 1 + * w+h - --------- + * w+2h - ... + * + * To determine how many terms needed, let + * Q(0) = w, Q(1) = w(w+h) - 1, + * Q(k) = (w+k*h)*Q(k-1) - Q(k-2), + * When Q(k) > 1e4 good for single + * When Q(k) > 1e9 good for double + * When Q(k) > 1e17 good for quadruple + */ + /* determine k */ + let mut t: f64; + let mut q0: f64; + let mut q1: f64; + let mut w: f64; + let h: f64; + let mut z: f64; + let mut tmp: f64; + let nf: f64; + + let mut k: i32; + + nf = (nm1 as f64) + 1.0; + w = 2.0 * nf / x; + h = 2.0 / x; + z = w + h; + q0 = w; + q1 = w * z - 1.0; + k = 1; + while q1 < 1.0e9 { + k += 1; + z += h; + tmp = z * q1 - q0; + q0 = q1; + q1 = tmp; + } + t = 0.0; + i = k; + while i >= 0 { + t = 1.0 / (2.0 * ((i as f64) + nf) / x - t); + i -= 1; + } + a = t; + b = 1.0; + /* estimate log((2/x)^n*n!) = n*log(2/x)+n*ln(n) + * Hence, if n*(log(2n/x)) > ... + * single 8.8722839355e+01 + * double 7.09782712893383973096e+02 + * long double 1.1356523406294143949491931077970765006170e+04 + * then recurrent value may overflow and the result is + * likely underflow to zero + */ + tmp = nf * log(fabs(w)); + if tmp < 7.09782712893383973096e+02 { + i = nm1; + while i > 0 { + temp = b; + b = b * (2.0 * (i as f64)) / x - a; + a = temp; + i -= 1; + } + } else { + i = nm1; + while i > 0 { + temp = b; + b = b * (2.0 * (i as f64)) / x - a; + a = temp; + /* scale b to avoid spurious overflow */ + let x1p500 = f64::from_bits(0x5f30000000000000); // 0x1p500 == 2^500 + if b > x1p500 { + a /= b; + t /= b; + b = 1.0; + } + i -= 1; + } + } + z = j0(x); + w = j1(x); + if fabs(z) >= fabs(w) { + b = t * z / b; + } else { + b = t * w / a; + } + } + + if sign { -b } else { b } +} + +/// Integer order of the [Bessel function](https://en.wikipedia.org/wiki/Bessel_function) of the second kind (f64). +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn yn(n: i32, x: f64) -> f64 { + let mut ix: u32; + let lx: u32; + let mut ib: u32; + let nm1: i32; + let mut sign: bool; + let mut i: i32; + let mut a: f64; + let mut b: f64; + let mut temp: f64; + + ix = get_high_word(x); + lx = get_low_word(x); + sign = (ix >> 31) != 0; + ix &= 0x7fffffff; + + // -lx == !lx + 1 + if ix | ((lx | (!lx).wrapping_add(1)) >> 31) > 0x7ff00000 { + /* nan */ + return x; + } + if sign && (ix | lx) != 0 { + /* x < 0 */ + return 0.0 / 0.0; + } + if ix == 0x7ff00000 { + return 0.0; + } + + if n == 0 { + return y0(x); + } + if n < 0 { + nm1 = -(n + 1); + sign = (n & 1) != 0; + } else { + nm1 = n - 1; + sign = false; + } + if nm1 == 0 { + if sign { + return -y1(x); + } else { + return y1(x); + } + } + + if ix >= 0x52d00000 { + /* x > 2**302 */ + /* (x >> n**2) + * Jn(x) = cos(x-(2n+1)*pi/4)*sqrt(2/x*pi) + * Yn(x) = sin(x-(2n+1)*pi/4)*sqrt(2/x*pi) + * Let s=sin(x), c=cos(x), + * xn=x-(2n+1)*pi/4, sqt2 = sqrt(2),then + * + * n sin(xn)*sqt2 cos(xn)*sqt2 + * ---------------------------------- + * 0 s-c c+s + * 1 -s-c -c+s + * 2 -s+c -c-s + * 3 s+c c-s + */ + temp = match nm1 & 3 { + 0 => -sin(x) - cos(x), + 1 => -sin(x) + cos(x), + 2 => sin(x) + cos(x), + // 3 + _ => sin(x) - cos(x), + }; + b = INVSQRTPI * temp / sqrt(x); + } else { + a = y0(x); + b = y1(x); + /* quit if b is -inf */ + ib = get_high_word(b); + i = 0; + while i < nm1 && ib != 0xfff00000 { + i += 1; + temp = b; + b = (2.0 * (i as f64) / x) * b - a; + ib = get_high_word(b); + a = temp; + } + } + + if sign { -b } else { b } +} diff --git a/library/compiler-builtins/libm/src/math/jnf.rs b/library/compiler-builtins/libm/src/math/jnf.rs new file mode 100644 index 0000000000000..52cf7d8a8bdad --- /dev/null +++ b/library/compiler-builtins/libm/src/math/jnf.rs @@ -0,0 +1,253 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/e_jnf.c */ +/* + * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. + */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ + +use super::{fabsf, j0f, j1f, logf, y0f, y1f}; + +/// Integer order of the [Bessel function](https://en.wikipedia.org/wiki/Bessel_function) of the first kind (f32). +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn jnf(n: i32, mut x: f32) -> f32 { + let mut ix: u32; + let mut nm1: i32; + let mut sign: bool; + let mut i: i32; + let mut a: f32; + let mut b: f32; + let mut temp: f32; + + ix = x.to_bits(); + sign = (ix >> 31) != 0; + ix &= 0x7fffffff; + if ix > 0x7f800000 { + /* nan */ + return x; + } + + /* J(-n,x) = J(n,-x), use |n|-1 to avoid overflow in -n */ + if n == 0 { + return j0f(x); + } + if n < 0 { + nm1 = -(n + 1); + x = -x; + sign = !sign; + } else { + nm1 = n - 1; + } + if nm1 == 0 { + return j1f(x); + } + + sign &= (n & 1) != 0; /* even n: 0, odd n: signbit(x) */ + x = fabsf(x); + if ix == 0 || ix == 0x7f800000 { + /* if x is 0 or inf */ + b = 0.0; + } else if (nm1 as f32) < x { + /* Safe to use J(n+1,x)=2n/x *J(n,x)-J(n-1,x) */ + a = j0f(x); + b = j1f(x); + i = 0; + while i < nm1 { + i += 1; + temp = b; + b = b * (2.0 * (i as f32) / x) - a; + a = temp; + } + } else if ix < 0x35800000 { + /* x < 2**-20 */ + /* x is tiny, return the first Taylor expansion of J(n,x) + * J(n,x) = 1/n!*(x/2)^n - ... + */ + if nm1 > 8 { + /* underflow */ + nm1 = 8; + } + temp = 0.5 * x; + b = temp; + a = 1.0; + i = 2; + while i <= nm1 + 1 { + a *= i as f32; /* a = n! */ + b *= temp; /* b = (x/2)^n */ + i += 1; + } + b = b / a; + } else { + /* use backward recurrence */ + /* x x^2 x^2 + * J(n,x)/J(n-1,x) = ---- ------ ------ ..... + * 2n - 2(n+1) - 2(n+2) + * + * 1 1 1 + * (for large x) = ---- ------ ------ ..... + * 2n 2(n+1) 2(n+2) + * -- - ------ - ------ - + * x x x + * + * Let w = 2n/x and h=2/x, then the above quotient + * is equal to the continued fraction: + * 1 + * = ----------------------- + * 1 + * w - ----------------- + * 1 + * w+h - --------- + * w+2h - ... + * + * To determine how many terms needed, let + * Q(0) = w, Q(1) = w(w+h) - 1, + * Q(k) = (w+k*h)*Q(k-1) - Q(k-2), + * When Q(k) > 1e4 good for single + * When Q(k) > 1e9 good for double + * When Q(k) > 1e17 good for quadruple + */ + /* determine k */ + let mut t: f32; + let mut q0: f32; + let mut q1: f32; + let mut w: f32; + let h: f32; + let mut z: f32; + let mut tmp: f32; + let nf: f32; + let mut k: i32; + + nf = (nm1 as f32) + 1.0; + w = 2.0 * nf / x; + h = 2.0 / x; + z = w + h; + q0 = w; + q1 = w * z - 1.0; + k = 1; + while q1 < 1.0e4 { + k += 1; + z += h; + tmp = z * q1 - q0; + q0 = q1; + q1 = tmp; + } + t = 0.0; + i = k; + while i >= 0 { + t = 1.0 / (2.0 * ((i as f32) + nf) / x - t); + i -= 1; + } + a = t; + b = 1.0; + /* estimate log((2/x)^n*n!) = n*log(2/x)+n*ln(n) + * Hence, if n*(log(2n/x)) > ... + * single 8.8722839355e+01 + * double 7.09782712893383973096e+02 + * long double 1.1356523406294143949491931077970765006170e+04 + * then recurrent value may overflow and the result is + * likely underflow to zero + */ + tmp = nf * logf(fabsf(w)); + if tmp < 88.721679688 { + i = nm1; + while i > 0 { + temp = b; + b = 2.0 * (i as f32) * b / x - a; + a = temp; + i -= 1; + } + } else { + i = nm1; + while i > 0 { + temp = b; + b = 2.0 * (i as f32) * b / x - a; + a = temp; + /* scale b to avoid spurious overflow */ + let x1p60 = f32::from_bits(0x5d800000); // 0x1p60 == 2^60 + if b > x1p60 { + a /= b; + t /= b; + b = 1.0; + } + i -= 1; + } + } + z = j0f(x); + w = j1f(x); + if fabsf(z) >= fabsf(w) { + b = t * z / b; + } else { + b = t * w / a; + } + } + + if sign { -b } else { b } +} + +/// Integer order of the [Bessel function](https://en.wikipedia.org/wiki/Bessel_function) of the second kind (f32). +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn ynf(n: i32, x: f32) -> f32 { + let mut ix: u32; + let mut ib: u32; + let nm1: i32; + let mut sign: bool; + let mut i: i32; + let mut a: f32; + let mut b: f32; + let mut temp: f32; + + ix = x.to_bits(); + sign = (ix >> 31) != 0; + ix &= 0x7fffffff; + if ix > 0x7f800000 { + /* nan */ + return x; + } + if sign && ix != 0 { + /* x < 0 */ + return 0.0 / 0.0; + } + if ix == 0x7f800000 { + return 0.0; + } + + if n == 0 { + return y0f(x); + } + if n < 0 { + nm1 = -(n + 1); + sign = (n & 1) != 0; + } else { + nm1 = n - 1; + sign = false; + } + if nm1 == 0 { + if sign { + return -y1f(x); + } else { + return y1f(x); + } + } + + a = y0f(x); + b = y1f(x); + /* quit if b is -inf */ + ib = b.to_bits(); + i = 0; + while i < nm1 && ib != 0xff800000 { + i += 1; + temp = b; + b = (2.0 * (i as f32) / x) * b - a; + ib = b.to_bits(); + a = temp; + } + + if sign { -b } else { b } +} diff --git a/library/compiler-builtins/libm/src/math/k_cos.rs b/library/compiler-builtins/libm/src/math/k_cos.rs new file mode 100644 index 0000000000000..49b2fc64d8648 --- /dev/null +++ b/library/compiler-builtins/libm/src/math/k_cos.rs @@ -0,0 +1,62 @@ +// origin: FreeBSD /usr/src/lib/msun/src/k_cos.c +// +// ==================================================== +// Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. +// +// Developed at SunSoft, a Sun Microsystems, Inc. business. +// Permission to use, copy, modify, and distribute this +// software is freely granted, provided that this notice +// is preserved. +// ==================================================== + +const C1: f64 = 4.16666666666666019037e-02; /* 0x3FA55555, 0x5555554C */ +const C2: f64 = -1.38888888888741095749e-03; /* 0xBF56C16C, 0x16C15177 */ +const C3: f64 = 2.48015872894767294178e-05; /* 0x3EFA01A0, 0x19CB1590 */ +const C4: f64 = -2.75573143513906633035e-07; /* 0xBE927E4F, 0x809C52AD */ +const C5: f64 = 2.08757232129817482790e-09; /* 0x3E21EE9E, 0xBDB4B1C4 */ +const C6: f64 = -1.13596475577881948265e-11; /* 0xBDA8FAE9, 0xBE8838D4 */ + +// kernel cos function on [-pi/4, pi/4], pi/4 ~ 0.785398164 +// Input x is assumed to be bounded by ~pi/4 in magnitude. +// Input y is the tail of x. +// +// Algorithm +// 1. Since cos(-x) = cos(x), we need only to consider positive x. +// 2. if x < 2^-27 (hx<0x3e400000 0), return 1 with inexact if x!=0. +// 3. cos(x) is approximated by a polynomial of degree 14 on +// [0,pi/4] +// 4 14 +// cos(x) ~ 1 - x*x/2 + C1*x + ... + C6*x +// where the remez error is +// +// | 2 4 6 8 10 12 14 | -58 +// |cos(x)-(1-.5*x +C1*x +C2*x +C3*x +C4*x +C5*x +C6*x )| <= 2 +// | | +// +// 4 6 8 10 12 14 +// 4. let r = C1*x +C2*x +C3*x +C4*x +C5*x +C6*x , then +// cos(x) ~ 1 - x*x/2 + r +// since cos(x+y) ~ cos(x) - sin(x)*y +// ~ cos(x) - x*y, +// a correction term is necessary in cos(x) and hence +// cos(x+y) = 1 - (x*x/2 - (r - x*y)) +// For better accuracy, rearrange to +// cos(x+y) ~ w + (tmp + (r-x*y)) +// where w = 1 - x*x/2 and tmp is a tiny correction term +// (1 - x*x/2 == w + tmp exactly in infinite precision). +// The exactness of w + tmp in infinite precision depends on w +// and tmp having the same precision as x. If they have extra +// precision due to compiler bugs, then the extra precision is +// only good provided it is retained in all terms of the final +// expression for cos(). Retention happens in all cases tested +// under FreeBSD, so don't pessimize things by forcibly clipping +// any extra precision in w. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub(crate) fn k_cos(x: f64, y: f64) -> f64 { + let z = x * x; + let w = z * z; + let r = z * (C1 + z * (C2 + z * C3)) + w * w * (C4 + z * (C5 + z * C6)); + let hz = 0.5 * z; + let w = 1.0 - hz; + w + (((1.0 - w) - hz) + (z * r - x * y)) +} diff --git a/library/compiler-builtins/libm/src/math/k_cosf.rs b/library/compiler-builtins/libm/src/math/k_cosf.rs new file mode 100644 index 0000000000000..e99f2348c00df --- /dev/null +++ b/library/compiler-builtins/libm/src/math/k_cosf.rs @@ -0,0 +1,29 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/k_cosf.c */ +/* + * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. + * Debugged and optimized by Bruce D. Evans. + */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ + +/* |cos(x) - c(x)| < 2**-34.1 (~[-5.37e-11, 5.295e-11]). */ +const C0: f64 = -0.499999997251031003120; /* -0x1ffffffd0c5e81.0p-54 */ +const C1: f64 = 0.0416666233237390631894; /* 0x155553e1053a42.0p-57 */ +const C2: f64 = -0.00138867637746099294692; /* -0x16c087e80f1e27.0p-62 */ +const C3: f64 = 0.0000243904487962774090654; /* 0x199342e0ee5069.0p-68 */ + +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub(crate) fn k_cosf(x: f64) -> f32 { + let z = x * x; + let w = z * z; + let r = C2 + z * C3; + (((1.0 + z * C0) + w * C1) + (w * z) * r) as f32 +} diff --git a/library/compiler-builtins/libm/src/math/k_expo2.rs b/library/compiler-builtins/libm/src/math/k_expo2.rs new file mode 100644 index 0000000000000..7345075f37684 --- /dev/null +++ b/library/compiler-builtins/libm/src/math/k_expo2.rs @@ -0,0 +1,14 @@ +use super::exp; + +/* k is such that k*ln2 has minimal relative error and x - kln2 > log(FLT_MIN) */ +const K: i32 = 2043; + +/* expf(x)/2 for x >= log(FLT_MAX), slightly better than 0.5f*expf(x/2)*expf(x/2) */ +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub(crate) fn k_expo2(x: f64) -> f64 { + let k_ln2 = f64::from_bits(0x40962066151add8b); + /* note that k is odd and scale*scale overflows */ + let scale = f64::from_bits(((((0x3ff + K / 2) as u32) << 20) as u64) << 32); + /* exp(x - k ln2) * 2**(k-1) */ + exp(x - k_ln2) * scale * scale +} diff --git a/library/compiler-builtins/libm/src/math/k_expo2f.rs b/library/compiler-builtins/libm/src/math/k_expo2f.rs new file mode 100644 index 0000000000000..fbd7b27d5832f --- /dev/null +++ b/library/compiler-builtins/libm/src/math/k_expo2f.rs @@ -0,0 +1,14 @@ +use super::expf; + +/* k is such that k*ln2 has minimal relative error and x - kln2 > log(FLT_MIN) */ +const K: i32 = 235; + +/* expf(x)/2 for x >= log(FLT_MAX), slightly better than 0.5f*expf(x/2)*expf(x/2) */ +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub(crate) fn k_expo2f(x: f32) -> f32 { + let k_ln2 = f32::from_bits(0x4322e3bc); + /* note that k is odd and scale*scale overflows */ + let scale = f32::from_bits(((0x7f + K / 2) as u32) << 23); + /* exp(x - k ln2) * 2**(k-1) */ + expf(x - k_ln2) * scale * scale +} diff --git a/library/compiler-builtins/libm/src/math/k_sin.rs b/library/compiler-builtins/libm/src/math/k_sin.rs new file mode 100644 index 0000000000000..9dd96c944744d --- /dev/null +++ b/library/compiler-builtins/libm/src/math/k_sin.rs @@ -0,0 +1,57 @@ +// origin: FreeBSD /usr/src/lib/msun/src/k_sin.c +// +// ==================================================== +// Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. +// +// Developed at SunSoft, a Sun Microsystems, Inc. business. +// Permission to use, copy, modify, and distribute this +// software is freely granted, provided that this notice +// is preserved. +// ==================================================== + +const S1: f64 = -1.66666666666666324348e-01; /* 0xBFC55555, 0x55555549 */ +const S2: f64 = 8.33333333332248946124e-03; /* 0x3F811111, 0x1110F8A6 */ +const S3: f64 = -1.98412698298579493134e-04; /* 0xBF2A01A0, 0x19C161D5 */ +const S4: f64 = 2.75573137070700676789e-06; /* 0x3EC71DE3, 0x57B1FE7D */ +const S5: f64 = -2.50507602534068634195e-08; /* 0xBE5AE5E6, 0x8A2B9CEB */ +const S6: f64 = 1.58969099521155010221e-10; /* 0x3DE5D93A, 0x5ACFD57C */ + +// kernel sin function on ~[-pi/4, pi/4] (except on -0), pi/4 ~ 0.7854 +// Input x is assumed to be bounded by ~pi/4 in magnitude. +// Input y is the tail of x. +// Input iy indicates whether y is 0. (if iy=0, y assume to be 0). +// +// Algorithm +// 1. Since sin(-x) = -sin(x), we need only to consider positive x. +// 2. Callers must return sin(-0) = -0 without calling here since our +// odd polynomial is not evaluated in a way that preserves -0. +// Callers may do the optimization sin(x) ~ x for tiny x. +// 3. sin(x) is approximated by a polynomial of degree 13 on +// [0,pi/4] +// 3 13 +// sin(x) ~ x + S1*x + ... + S6*x +// where +// +// |sin(x) 2 4 6 8 10 12 | -58 +// |----- - (1+S1*x +S2*x +S3*x +S4*x +S5*x +S6*x )| <= 2 +// | x | +// +// 4. sin(x+y) = sin(x) + sin'(x')*y +// ~ sin(x) + (1-x*x/2)*y +// For better accuracy, let +// 3 2 2 2 2 +// r = x *(S2+x *(S3+x *(S4+x *(S5+x *S6)))) +// then 3 2 +// sin(x) = x + (S1*x + (x *(r-y/2)+y)) +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub(crate) fn k_sin(x: f64, y: f64, iy: i32) -> f64 { + let z = x * x; + let w = z * z; + let r = S2 + z * (S3 + z * S4) + z * w * (S5 + z * S6); + let v = z * x; + if iy == 0 { + x + v * (S1 + z * r) + } else { + x - ((z * (0.5 * y - v * r) - y) - v * S1) + } +} diff --git a/library/compiler-builtins/libm/src/math/k_sinf.rs b/library/compiler-builtins/libm/src/math/k_sinf.rs new file mode 100644 index 0000000000000..88d10cababc5d --- /dev/null +++ b/library/compiler-builtins/libm/src/math/k_sinf.rs @@ -0,0 +1,30 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/k_sinf.c */ +/* + * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. + * Optimized by Bruce D. Evans. + */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ + +/* |sin(x)/x - s(x)| < 2**-37.5 (~[-4.89e-12, 4.824e-12]). */ +const S1: f64 = -0.166666666416265235595; /* -0x15555554cbac77.0p-55 */ +const S2: f64 = 0.0083333293858894631756; /* 0x111110896efbb2.0p-59 */ +const S3: f64 = -0.000198393348360966317347; /* -0x1a00f9e2cae774.0p-65 */ +const S4: f64 = 0.0000027183114939898219064; /* 0x16cd878c3b46a7.0p-71 */ + +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub(crate) fn k_sinf(x: f64) -> f32 { + let z = x * x; + let w = z * z; + let r = S3 + z * S4; + let s = z * x; + ((x + s * (S1 + z * S2)) + s * w * r) as f32 +} diff --git a/library/compiler-builtins/libm/src/math/k_tan.rs b/library/compiler-builtins/libm/src/math/k_tan.rs new file mode 100644 index 0000000000000..d177010bb0a0f --- /dev/null +++ b/library/compiler-builtins/libm/src/math/k_tan.rs @@ -0,0 +1,105 @@ +// origin: FreeBSD /usr/src/lib/msun/src/k_tan.c */ +// +// ==================================================== +// Copyright 2004 Sun Microsystems, Inc. All Rights Reserved. +// +// Permission to use, copy, modify, and distribute this +// software is freely granted, provided that this notice +// is preserved. +// ==================================================== + +// kernel tan function on ~[-pi/4, pi/4] (except on -0), pi/4 ~ 0.7854 +// Input x is assumed to be bounded by ~pi/4 in magnitude. +// Input y is the tail of x. +// Input odd indicates whether tan (if odd = 0) or -1/tan (if odd = 1) is returned. +// +// Algorithm +// 1. Since tan(-x) = -tan(x), we need only to consider positive x. +// 2. Callers must return tan(-0) = -0 without calling here since our +// odd polynomial is not evaluated in a way that preserves -0. +// Callers may do the optimization tan(x) ~ x for tiny x. +// 3. tan(x) is approximated by a odd polynomial of degree 27 on +// [0,0.67434] +// 3 27 +// tan(x) ~ x + T1*x + ... + T13*x +// where +// +// |tan(x) 2 4 26 | -59.2 +// |----- - (1+T1*x +T2*x +.... +T13*x )| <= 2 +// | x | +// +// Note: tan(x+y) = tan(x) + tan'(x)*y +// ~ tan(x) + (1+x*x)*y +// Therefore, for better accuracy in computing tan(x+y), let +// 3 2 2 2 2 +// r = x *(T2+x *(T3+x *(...+x *(T12+x *T13)))) +// then +// 3 2 +// tan(x+y) = x + (T1*x + (x *(r+y)+y)) +// +// 4. For x in [0.67434,pi/4], let y = pi/4 - x, then +// tan(x) = tan(pi/4-y) = (1-tan(y))/(1+tan(y)) +// = 1 - 2*(tan(y) - (tan(y)^2)/(1+tan(y))) +static T: [f64; 13] = [ + 3.33333333333334091986e-01, /* 3FD55555, 55555563 */ + 1.33333333333201242699e-01, /* 3FC11111, 1110FE7A */ + 5.39682539762260521377e-02, /* 3FABA1BA, 1BB341FE */ + 2.18694882948595424599e-02, /* 3F9664F4, 8406D637 */ + 8.86323982359930005737e-03, /* 3F8226E3, E96E8493 */ + 3.59207910759131235356e-03, /* 3F6D6D22, C9560328 */ + 1.45620945432529025516e-03, /* 3F57DBC8, FEE08315 */ + 5.88041240820264096874e-04, /* 3F4344D8, F2F26501 */ + 2.46463134818469906812e-04, /* 3F3026F7, 1A8D1068 */ + 7.81794442939557092300e-05, /* 3F147E88, A03792A6 */ + 7.14072491382608190305e-05, /* 3F12B80F, 32F0A7E9 */ + -1.85586374855275456654e-05, /* BEF375CB, DB605373 */ + 2.59073051863633712884e-05, /* 3EFB2A70, 74BF7AD4 */ +]; +const PIO4: f64 = 7.85398163397448278999e-01; /* 3FE921FB, 54442D18 */ +const PIO4_LO: f64 = 3.06161699786838301793e-17; /* 3C81A626, 33145C07 */ + +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub(crate) fn k_tan(mut x: f64, mut y: f64, odd: i32) -> f64 { + let hx = (f64::to_bits(x) >> 32) as u32; + let big = (hx & 0x7fffffff) >= 0x3FE59428; /* |x| >= 0.6744 */ + if big { + let sign = hx >> 31; + if sign != 0 { + x = -x; + y = -y; + } + x = (PIO4 - x) + (PIO4_LO - y); + y = 0.0; + } + let z = x * x; + let w = z * z; + /* + * Break x^5*(T[1]+x^2*T[2]+...) into + * x^5(T[1]+x^4*T[3]+...+x^20*T[11]) + + * x^5(x^2*(T[2]+x^4*T[4]+...+x^22*[T12])) + */ + let r = T[1] + w * (T[3] + w * (T[5] + w * (T[7] + w * (T[9] + w * T[11])))); + let v = z * (T[2] + w * (T[4] + w * (T[6] + w * (T[8] + w * (T[10] + w * T[12]))))); + let s = z * x; + let r = y + z * (s * (r + v) + y) + s * T[0]; + let w = x + r; + if big { + let sign = hx >> 31; + let s = 1.0 - 2.0 * odd as f64; + let v = s - 2.0 * (x + (r - w * w / (w + s))); + return if sign != 0 { -v } else { v }; + } + if odd == 0 { + return w; + } + /* -1.0/(x+r) has up to 2ulp error, so compute it accurately */ + let w0 = zero_low_word(w); + let v = r - (w0 - x); /* w0+v = r+x */ + let a = -1.0 / w; + let a0 = zero_low_word(a); + a0 + a * (1.0 + a0 * w0 + a0 * v) +} + +fn zero_low_word(x: f64) -> f64 { + f64::from_bits(f64::to_bits(x) & 0xFFFF_FFFF_0000_0000) +} diff --git a/library/compiler-builtins/libm/src/math/k_tanf.rs b/library/compiler-builtins/libm/src/math/k_tanf.rs new file mode 100644 index 0000000000000..af8db539dad4a --- /dev/null +++ b/library/compiler-builtins/libm/src/math/k_tanf.rs @@ -0,0 +1,46 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/k_tan.c */ +/* + * ==================================================== + * Copyright 2004 Sun Microsystems, Inc. All Rights Reserved. + * + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ + +/* |tan(x)/x - t(x)| < 2**-25.5 (~[-2e-08, 2e-08]). */ +const T: [f64; 6] = [ + 0.333331395030791399758, /* 0x15554d3418c99f.0p-54 */ + 0.133392002712976742718, /* 0x1112fd38999f72.0p-55 */ + 0.0533812378445670393523, /* 0x1b54c91d865afe.0p-57 */ + 0.0245283181166547278873, /* 0x191df3908c33ce.0p-58 */ + 0.00297435743359967304927, /* 0x185dadfcecf44e.0p-61 */ + 0.00946564784943673166728, /* 0x1362b9bf971bcd.0p-59 */ +]; + +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub(crate) fn k_tanf(x: f64, odd: bool) -> f32 { + let z = x * x; + /* + * Split up the polynomial into small independent terms to give + * opportunities for parallel evaluation. The chosen splitting is + * micro-optimized for Athlons (XP, X64). It costs 2 multiplications + * relative to Horner's method on sequential machines. + * + * We add the small terms from lowest degree up for efficiency on + * non-sequential machines (the lowest degree terms tend to be ready + * earlier). Apart from this, we don't care about order of + * operations, and don't need to to care since we have precision to + * spare. However, the chosen splitting is good for accuracy too, + * and would give results as accurate as Horner's method if the + * small terms were added from highest degree down. + */ + let mut r = T[4] + z * T[5]; + let t = T[2] + z * T[3]; + let w = z * z; + let s = z * x; + let u = T[0] + z * T[1]; + r = (x + s * u) + (s * w) * (t + w * r); + (if odd { -1. / r } else { r }) as f32 +} diff --git a/library/compiler-builtins/libm/src/math/ldexp.rs b/library/compiler-builtins/libm/src/math/ldexp.rs new file mode 100644 index 0000000000000..24899ba306af2 --- /dev/null +++ b/library/compiler-builtins/libm/src/math/ldexp.rs @@ -0,0 +1,21 @@ +#[cfg(f16_enabled)] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn ldexpf16(x: f16, n: i32) -> f16 { + super::scalbnf16(x, n) +} + +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn ldexpf(x: f32, n: i32) -> f32 { + super::scalbnf(x, n) +} + +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn ldexp(x: f64, n: i32) -> f64 { + super::scalbn(x, n) +} + +#[cfg(f128_enabled)] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn ldexpf128(x: f128, n: i32) -> f128 { + super::scalbnf128(x, n) +} diff --git a/library/compiler-builtins/libm/src/math/ldexpf.rs b/library/compiler-builtins/libm/src/math/ldexpf.rs new file mode 100644 index 0000000000000..95b27fc49d28e --- /dev/null +++ b/library/compiler-builtins/libm/src/math/ldexpf.rs @@ -0,0 +1,4 @@ +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn ldexpf(x: f32, n: i32) -> f32 { + super::scalbnf(x, n) +} diff --git a/library/compiler-builtins/libm/src/math/ldexpf128.rs b/library/compiler-builtins/libm/src/math/ldexpf128.rs new file mode 100644 index 0000000000000..b35277d15fbae --- /dev/null +++ b/library/compiler-builtins/libm/src/math/ldexpf128.rs @@ -0,0 +1,4 @@ +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn ldexpf128(x: f128, n: i32) -> f128 { + super::scalbnf128(x, n) +} diff --git a/library/compiler-builtins/libm/src/math/ldexpf16.rs b/library/compiler-builtins/libm/src/math/ldexpf16.rs new file mode 100644 index 0000000000000..8de6cffd69987 --- /dev/null +++ b/library/compiler-builtins/libm/src/math/ldexpf16.rs @@ -0,0 +1,4 @@ +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn ldexpf16(x: f16, n: i32) -> f16 { + super::scalbnf16(x, n) +} diff --git a/library/compiler-builtins/libm/src/math/lgamma.rs b/library/compiler-builtins/libm/src/math/lgamma.rs new file mode 100644 index 0000000000000..8312dc18648e0 --- /dev/null +++ b/library/compiler-builtins/libm/src/math/lgamma.rs @@ -0,0 +1,8 @@ +use super::lgamma_r; + +/// The natural logarithm of the +/// [Gamma function](https://en.wikipedia.org/wiki/Gamma_function) (f64). +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn lgamma(x: f64) -> f64 { + lgamma_r(x).0 +} diff --git a/library/compiler-builtins/libm/src/math/lgamma_r.rs b/library/compiler-builtins/libm/src/math/lgamma_r.rs new file mode 100644 index 0000000000000..6becaad2ce91d --- /dev/null +++ b/library/compiler-builtins/libm/src/math/lgamma_r.rs @@ -0,0 +1,321 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/e_lgamma_r.c */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunSoft, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + * + */ +/* lgamma_r(x, signgamp) + * Reentrant version of the logarithm of the Gamma function + * with user provide pointer for the sign of Gamma(x). + * + * Method: + * 1. Argument Reduction for 0 < x <= 8 + * Since gamma(1+s)=s*gamma(s), for x in [0,8], we may + * reduce x to a number in [1.5,2.5] by + * lgamma(1+s) = log(s) + lgamma(s) + * for example, + * lgamma(7.3) = log(6.3) + lgamma(6.3) + * = log(6.3*5.3) + lgamma(5.3) + * = log(6.3*5.3*4.3*3.3*2.3) + lgamma(2.3) + * 2. Polynomial approximation of lgamma around its + * minimun ymin=1.461632144968362245 to maintain monotonicity. + * On [ymin-0.23, ymin+0.27] (i.e., [1.23164,1.73163]), use + * Let z = x-ymin; + * lgamma(x) = -1.214862905358496078218 + z^2*poly(z) + * where + * poly(z) is a 14 degree polynomial. + * 2. Rational approximation in the primary interval [2,3] + * We use the following approximation: + * s = x-2.0; + * lgamma(x) = 0.5*s + s*P(s)/Q(s) + * with accuracy + * |P/Q - (lgamma(x)-0.5s)| < 2**-61.71 + * Our algorithms are based on the following observation + * + * zeta(2)-1 2 zeta(3)-1 3 + * lgamma(2+s) = s*(1-Euler) + --------- * s - --------- * s + ... + * 2 3 + * + * where Euler = 0.5771... is the Euler constant, which is very + * close to 0.5. + * + * 3. For x>=8, we have + * lgamma(x)~(x-0.5)log(x)-x+0.5*log(2pi)+1/(12x)-1/(360x**3)+.... + * (better formula: + * lgamma(x)~(x-0.5)*(log(x)-1)-.5*(log(2pi)-1) + ...) + * Let z = 1/x, then we approximation + * f(z) = lgamma(x) - (x-0.5)(log(x)-1) + * by + * 3 5 11 + * w = w0 + w1*z + w2*z + w3*z + ... + w6*z + * where + * |w - f(z)| < 2**-58.74 + * + * 4. For negative x, since (G is gamma function) + * -x*G(-x)*G(x) = PI/sin(PI*x), + * we have + * G(x) = PI/(sin(PI*x)*(-x)*G(-x)) + * since G(-x) is positive, sign(G(x)) = sign(sin(PI*x)) for x<0 + * Hence, for x<0, signgam = sign(sin(PI*x)) and + * lgamma(x) = log(|Gamma(x)|) + * = log(PI/(|x*sin(PI*x)|)) - lgamma(-x); + * Note: one should avoid compute PI*(-x) directly in the + * computation of sin(PI*(-x)). + * + * 5. Special Cases + * lgamma(2+s) ~ s*(1-Euler) for tiny s + * lgamma(1) = lgamma(2) = 0 + * lgamma(x) ~ -log(|x|) for tiny x + * lgamma(0) = lgamma(neg.integer) = inf and raise divide-by-zero + * lgamma(inf) = inf + * lgamma(-inf) = inf (bug for bug compatible with C99!?) + * + */ + +use super::{floor, k_cos, k_sin, log}; + +const PI: f64 = 3.14159265358979311600e+00; /* 0x400921FB, 0x54442D18 */ +const A0: f64 = 7.72156649015328655494e-02; /* 0x3FB3C467, 0xE37DB0C8 */ +const A1: f64 = 3.22467033424113591611e-01; /* 0x3FD4A34C, 0xC4A60FAD */ +const A2: f64 = 6.73523010531292681824e-02; /* 0x3FB13E00, 0x1A5562A7 */ +const A3: f64 = 2.05808084325167332806e-02; /* 0x3F951322, 0xAC92547B */ +const A4: f64 = 7.38555086081402883957e-03; /* 0x3F7E404F, 0xB68FEFE8 */ +const A5: f64 = 2.89051383673415629091e-03; /* 0x3F67ADD8, 0xCCB7926B */ +const A6: f64 = 1.19270763183362067845e-03; /* 0x3F538A94, 0x116F3F5D */ +const A7: f64 = 5.10069792153511336608e-04; /* 0x3F40B6C6, 0x89B99C00 */ +const A8: f64 = 2.20862790713908385557e-04; /* 0x3F2CF2EC, 0xED10E54D */ +const A9: f64 = 1.08011567247583939954e-04; /* 0x3F1C5088, 0x987DFB07 */ +const A10: f64 = 2.52144565451257326939e-05; /* 0x3EFA7074, 0x428CFA52 */ +const A11: f64 = 4.48640949618915160150e-05; /* 0x3F07858E, 0x90A45837 */ +const TC: f64 = 1.46163214496836224576e+00; /* 0x3FF762D8, 0x6356BE3F */ +const TF: f64 = -1.21486290535849611461e-01; /* 0xBFBF19B9, 0xBCC38A42 */ +/* tt = -(tail of TF) */ +const TT: f64 = -3.63867699703950536541e-18; /* 0xBC50C7CA, 0xA48A971F */ +const T0: f64 = 4.83836122723810047042e-01; /* 0x3FDEF72B, 0xC8EE38A2 */ +const T1: f64 = -1.47587722994593911752e-01; /* 0xBFC2E427, 0x8DC6C509 */ +const T2: f64 = 6.46249402391333854778e-02; /* 0x3FB08B42, 0x94D5419B */ +const T3: f64 = -3.27885410759859649565e-02; /* 0xBFA0C9A8, 0xDF35B713 */ +const T4: f64 = 1.79706750811820387126e-02; /* 0x3F9266E7, 0x970AF9EC */ +const T5: f64 = -1.03142241298341437450e-02; /* 0xBF851F9F, 0xBA91EC6A */ +const T6: f64 = 6.10053870246291332635e-03; /* 0x3F78FCE0, 0xE370E344 */ +const T7: f64 = -3.68452016781138256760e-03; /* 0xBF6E2EFF, 0xB3E914D7 */ +const T8: f64 = 2.25964780900612472250e-03; /* 0x3F6282D3, 0x2E15C915 */ +const T9: f64 = -1.40346469989232843813e-03; /* 0xBF56FE8E, 0xBF2D1AF1 */ +const T10: f64 = 8.81081882437654011382e-04; /* 0x3F4CDF0C, 0xEF61A8E9 */ +const T11: f64 = -5.38595305356740546715e-04; /* 0xBF41A610, 0x9C73E0EC */ +const T12: f64 = 3.15632070903625950361e-04; /* 0x3F34AF6D, 0x6C0EBBF7 */ +const T13: f64 = -3.12754168375120860518e-04; /* 0xBF347F24, 0xECC38C38 */ +const T14: f64 = 3.35529192635519073543e-04; /* 0x3F35FD3E, 0xE8C2D3F4 */ +const U0: f64 = -7.72156649015328655494e-02; /* 0xBFB3C467, 0xE37DB0C8 */ +const U1: f64 = 6.32827064025093366517e-01; /* 0x3FE4401E, 0x8B005DFF */ +const U2: f64 = 1.45492250137234768737e+00; /* 0x3FF7475C, 0xD119BD6F */ +const U3: f64 = 9.77717527963372745603e-01; /* 0x3FEF4976, 0x44EA8450 */ +const U4: f64 = 2.28963728064692451092e-01; /* 0x3FCD4EAE, 0xF6010924 */ +const U5: f64 = 1.33810918536787660377e-02; /* 0x3F8B678B, 0xBF2BAB09 */ +const V1: f64 = 2.45597793713041134822e+00; /* 0x4003A5D7, 0xC2BD619C */ +const V2: f64 = 2.12848976379893395361e+00; /* 0x40010725, 0xA42B18F5 */ +const V3: f64 = 7.69285150456672783825e-01; /* 0x3FE89DFB, 0xE45050AF */ +const V4: f64 = 1.04222645593369134254e-01; /* 0x3FBAAE55, 0xD6537C88 */ +const V5: f64 = 3.21709242282423911810e-03; /* 0x3F6A5ABB, 0x57D0CF61 */ +const S0: f64 = -7.72156649015328655494e-02; /* 0xBFB3C467, 0xE37DB0C8 */ +const S1: f64 = 2.14982415960608852501e-01; /* 0x3FCB848B, 0x36E20878 */ +const S2: f64 = 3.25778796408930981787e-01; /* 0x3FD4D98F, 0x4F139F59 */ +const S3: f64 = 1.46350472652464452805e-01; /* 0x3FC2BB9C, 0xBEE5F2F7 */ +const S4: f64 = 2.66422703033638609560e-02; /* 0x3F9B481C, 0x7E939961 */ +const S5: f64 = 1.84028451407337715652e-03; /* 0x3F5E26B6, 0x7368F239 */ +const S6: f64 = 3.19475326584100867617e-05; /* 0x3F00BFEC, 0xDD17E945 */ +const R1: f64 = 1.39200533467621045958e+00; /* 0x3FF645A7, 0x62C4AB74 */ +const R2: f64 = 7.21935547567138069525e-01; /* 0x3FE71A18, 0x93D3DCDC */ +const R3: f64 = 1.71933865632803078993e-01; /* 0x3FC601ED, 0xCCFBDF27 */ +const R4: f64 = 1.86459191715652901344e-02; /* 0x3F9317EA, 0x742ED475 */ +const R5: f64 = 7.77942496381893596434e-04; /* 0x3F497DDA, 0xCA41A95B */ +const R6: f64 = 7.32668430744625636189e-06; /* 0x3EDEBAF7, 0xA5B38140 */ +const W0: f64 = 4.18938533204672725052e-01; /* 0x3FDACFE3, 0x90C97D69 */ +const W1: f64 = 8.33333333333329678849e-02; /* 0x3FB55555, 0x5555553B */ +const W2: f64 = -2.77777777728775536470e-03; /* 0xBF66C16C, 0x16B02E5C */ +const W3: f64 = 7.93650558643019558500e-04; /* 0x3F4A019F, 0x98CF38B6 */ +const W4: f64 = -5.95187557450339963135e-04; /* 0xBF4380CB, 0x8C0FE741 */ +const W5: f64 = 8.36339918996282139126e-04; /* 0x3F4B67BA, 0x4CDAD5D1 */ +const W6: f64 = -1.63092934096575273989e-03; /* 0xBF5AB89D, 0x0B9E43E4 */ + +/* sin(PI*x) assuming x > 2^-100, if sin(PI*x)==0 the sign is arbitrary */ +fn sin_pi(mut x: f64) -> f64 { + let mut n: i32; + + /* spurious inexact if odd int */ + x = 2.0 * (x * 0.5 - floor(x * 0.5)); /* x mod 2.0 */ + + n = (x * 4.0) as i32; + n = div!(n + 1, 2); + x -= (n as f64) * 0.5; + x *= PI; + + match n { + 1 => k_cos(x, 0.0), + 2 => k_sin(-x, 0.0, 0), + 3 => -k_cos(x, 0.0), + // 0 + _ => k_sin(x, 0.0, 0), + } +} + +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn lgamma_r(mut x: f64) -> (f64, i32) { + let u: u64 = x.to_bits(); + let mut t: f64; + let y: f64; + let mut z: f64; + let nadj: f64; + let p: f64; + let p1: f64; + let p2: f64; + let p3: f64; + let q: f64; + let mut r: f64; + let w: f64; + let ix: u32; + let sign: bool; + let i: i32; + let mut signgam: i32; + + /* purge off +-inf, NaN, +-0, tiny and negative arguments */ + signgam = 1; + sign = (u >> 63) != 0; + ix = ((u >> 32) as u32) & 0x7fffffff; + if ix >= 0x7ff00000 { + return (x * x, signgam); + } + if ix < (0x3ff - 70) << 20 { + /* |x|<2**-70, return -log(|x|) */ + if sign { + x = -x; + signgam = -1; + } + return (-log(x), signgam); + } + if sign { + x = -x; + t = sin_pi(x); + if t == 0.0 { + /* -integer */ + return (1.0 / (x - x), signgam); + } + if t > 0.0 { + signgam = -1; + } else { + t = -t; + } + nadj = log(PI / (t * x)); + } else { + nadj = 0.0; + } + + /* purge off 1 and 2 */ + if (ix == 0x3ff00000 || ix == 0x40000000) && (u & 0xffffffff) == 0 { + r = 0.0; + } + /* for x < 2.0 */ + else if ix < 0x40000000 { + if ix <= 0x3feccccc { + /* lgamma(x) = lgamma(x+1)-log(x) */ + r = -log(x); + if ix >= 0x3FE76944 { + y = 1.0 - x; + i = 0; + } else if ix >= 0x3FCDA661 { + y = x - (TC - 1.0); + i = 1; + } else { + y = x; + i = 2; + } + } else { + r = 0.0; + if ix >= 0x3FFBB4C3 { + /* [1.7316,2] */ + y = 2.0 - x; + i = 0; + } else if ix >= 0x3FF3B4C4 { + /* [1.23,1.73] */ + y = x - TC; + i = 1; + } else { + y = x - 1.0; + i = 2; + } + } + match i { + 0 => { + z = y * y; + p1 = A0 + z * (A2 + z * (A4 + z * (A6 + z * (A8 + z * A10)))); + p2 = z * (A1 + z * (A3 + z * (A5 + z * (A7 + z * (A9 + z * A11))))); + p = y * p1 + p2; + r += p - 0.5 * y; + } + 1 => { + z = y * y; + w = z * y; + p1 = T0 + w * (T3 + w * (T6 + w * (T9 + w * T12))); /* parallel comp */ + p2 = T1 + w * (T4 + w * (T7 + w * (T10 + w * T13))); + p3 = T2 + w * (T5 + w * (T8 + w * (T11 + w * T14))); + p = z * p1 - (TT - w * (p2 + y * p3)); + r += TF + p; + } + 2 => { + p1 = y * (U0 + y * (U1 + y * (U2 + y * (U3 + y * (U4 + y * U5))))); + p2 = 1.0 + y * (V1 + y * (V2 + y * (V3 + y * (V4 + y * V5)))); + r += -0.5 * y + p1 / p2; + } + #[cfg(debug_assertions)] + _ => unreachable!(), + #[cfg(not(debug_assertions))] + _ => {} + } + } else if ix < 0x40200000 { + /* x < 8.0 */ + i = x as i32; + y = x - (i as f64); + p = y * (S0 + y * (S1 + y * (S2 + y * (S3 + y * (S4 + y * (S5 + y * S6)))))); + q = 1.0 + y * (R1 + y * (R2 + y * (R3 + y * (R4 + y * (R5 + y * R6))))); + r = 0.5 * y + p / q; + z = 1.0; /* lgamma(1+s) = log(s) + lgamma(s) */ + // TODO: In C, this was implemented using switch jumps with fallthrough. + // Does this implementation have performance problems? + if i >= 7 { + z *= y + 6.0; + } + if i >= 6 { + z *= y + 5.0; + } + if i >= 5 { + z *= y + 4.0; + } + if i >= 4 { + z *= y + 3.0; + } + if i >= 3 { + z *= y + 2.0; + r += log(z); + } + } else if ix < 0x43900000 { + /* 8.0 <= x < 2**58 */ + t = log(x); + z = 1.0 / x; + y = z * z; + w = W0 + z * (W1 + y * (W2 + y * (W3 + y * (W4 + y * (W5 + y * W6))))); + r = (x - 0.5) * (t - 1.0) + w; + } else { + /* 2**58 <= x <= inf */ + r = x * (log(x) - 1.0); + } + if sign { + r = nadj - r; + } + return (r, signgam); +} diff --git a/library/compiler-builtins/libm/src/math/lgammaf.rs b/library/compiler-builtins/libm/src/math/lgammaf.rs new file mode 100644 index 0000000000000..d37512397cb39 --- /dev/null +++ b/library/compiler-builtins/libm/src/math/lgammaf.rs @@ -0,0 +1,8 @@ +use super::lgammaf_r; + +/// The natural logarithm of the +/// [Gamma function](https://en.wikipedia.org/wiki/Gamma_function) (f32). +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn lgammaf(x: f32) -> f32 { + lgammaf_r(x).0 +} diff --git a/library/compiler-builtins/libm/src/math/lgammaf_r.rs b/library/compiler-builtins/libm/src/math/lgammaf_r.rs new file mode 100644 index 0000000000000..10cecee541ccc --- /dev/null +++ b/library/compiler-builtins/libm/src/math/lgammaf_r.rs @@ -0,0 +1,256 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/e_lgammaf_r.c */ +/* + * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. + */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ + +use super::{floorf, k_cosf, k_sinf, logf}; + +const PI: f32 = 3.1415927410e+00; /* 0x40490fdb */ +const A0: f32 = 7.7215664089e-02; /* 0x3d9e233f */ +const A1: f32 = 3.2246702909e-01; /* 0x3ea51a66 */ +const A2: f32 = 6.7352302372e-02; /* 0x3d89f001 */ +const A3: f32 = 2.0580807701e-02; /* 0x3ca89915 */ +const A4: f32 = 7.3855509982e-03; /* 0x3bf2027e */ +const A5: f32 = 2.8905137442e-03; /* 0x3b3d6ec6 */ +const A6: f32 = 1.1927076848e-03; /* 0x3a9c54a1 */ +const A7: f32 = 5.1006977446e-04; /* 0x3a05b634 */ +const A8: f32 = 2.2086278477e-04; /* 0x39679767 */ +const A9: f32 = 1.0801156895e-04; /* 0x38e28445 */ +const A10: f32 = 2.5214456400e-05; /* 0x37d383a2 */ +const A11: f32 = 4.4864096708e-05; /* 0x383c2c75 */ +const TC: f32 = 1.4616321325e+00; /* 0x3fbb16c3 */ +const TF: f32 = -1.2148628384e-01; /* 0xbdf8cdcd */ +/* TT = -(tail of TF) */ +const TT: f32 = 6.6971006518e-09; /* 0x31e61c52 */ +const T0: f32 = 4.8383611441e-01; /* 0x3ef7b95e */ +const T1: f32 = -1.4758771658e-01; /* 0xbe17213c */ +const T2: f32 = 6.4624942839e-02; /* 0x3d845a15 */ +const T3: f32 = -3.2788541168e-02; /* 0xbd064d47 */ +const T4: f32 = 1.7970675603e-02; /* 0x3c93373d */ +const T5: f32 = -1.0314224288e-02; /* 0xbc28fcfe */ +const T6: f32 = 6.1005386524e-03; /* 0x3bc7e707 */ +const T7: f32 = -3.6845202558e-03; /* 0xbb7177fe */ +const T8: f32 = 2.2596477065e-03; /* 0x3b141699 */ +const T9: f32 = -1.4034647029e-03; /* 0xbab7f476 */ +const T10: f32 = 8.8108185446e-04; /* 0x3a66f867 */ +const T11: f32 = -5.3859531181e-04; /* 0xba0d3085 */ +const T12: f32 = 3.1563205994e-04; /* 0x39a57b6b */ +const T13: f32 = -3.1275415677e-04; /* 0xb9a3f927 */ +const T14: f32 = 3.3552918467e-04; /* 0x39afe9f7 */ +const U0: f32 = -7.7215664089e-02; /* 0xbd9e233f */ +const U1: f32 = 6.3282704353e-01; /* 0x3f2200f4 */ +const U2: f32 = 1.4549225569e+00; /* 0x3fba3ae7 */ +const U3: f32 = 9.7771751881e-01; /* 0x3f7a4bb2 */ +const U4: f32 = 2.2896373272e-01; /* 0x3e6a7578 */ +const U5: f32 = 1.3381091878e-02; /* 0x3c5b3c5e */ +const V1: f32 = 2.4559779167e+00; /* 0x401d2ebe */ +const V2: f32 = 2.1284897327e+00; /* 0x4008392d */ +const V3: f32 = 7.6928514242e-01; /* 0x3f44efdf */ +const V4: f32 = 1.0422264785e-01; /* 0x3dd572af */ +const V5: f32 = 3.2170924824e-03; /* 0x3b52d5db */ +const S0: f32 = -7.7215664089e-02; /* 0xbd9e233f */ +const S1: f32 = 2.1498242021e-01; /* 0x3e5c245a */ +const S2: f32 = 3.2577878237e-01; /* 0x3ea6cc7a */ +const S3: f32 = 1.4635047317e-01; /* 0x3e15dce6 */ +const S4: f32 = 2.6642270386e-02; /* 0x3cda40e4 */ +const S5: f32 = 1.8402845599e-03; /* 0x3af135b4 */ +const S6: f32 = 3.1947532989e-05; /* 0x3805ff67 */ +const R1: f32 = 1.3920053244e+00; /* 0x3fb22d3b */ +const R2: f32 = 7.2193557024e-01; /* 0x3f38d0c5 */ +const R3: f32 = 1.7193385959e-01; /* 0x3e300f6e */ +const R4: f32 = 1.8645919859e-02; /* 0x3c98bf54 */ +const R5: f32 = 7.7794247773e-04; /* 0x3a4beed6 */ +const R6: f32 = 7.3266842264e-06; /* 0x36f5d7bd */ +const W0: f32 = 4.1893854737e-01; /* 0x3ed67f1d */ +const W1: f32 = 8.3333335817e-02; /* 0x3daaaaab */ +const W2: f32 = -2.7777778450e-03; /* 0xbb360b61 */ +const W3: f32 = 7.9365057172e-04; /* 0x3a500cfd */ +const W4: f32 = -5.9518753551e-04; /* 0xba1c065c */ +const W5: f32 = 8.3633989561e-04; /* 0x3a5b3dd2 */ +const W6: f32 = -1.6309292987e-03; /* 0xbad5c4e8 */ + +/* sin(PI*x) assuming x > 2^-100, if sin(PI*x)==0 the sign is arbitrary */ +fn sin_pi(mut x: f32) -> f32 { + let mut y: f64; + let mut n: isize; + + /* spurious inexact if odd int */ + x = 2.0 * (x * 0.5 - floorf(x * 0.5)); /* x mod 2.0 */ + + n = (x * 4.0) as isize; + n = div!(n + 1, 2); + y = (x as f64) - (n as f64) * 0.5; + y *= 3.14159265358979323846; + match n { + 1 => k_cosf(y), + 2 => k_sinf(-y), + 3 => -k_cosf(y), + // 0 + _ => k_sinf(y), + } +} + +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn lgammaf_r(mut x: f32) -> (f32, i32) { + let u = x.to_bits(); + let mut t: f32; + let y: f32; + let mut z: f32; + let nadj: f32; + let p: f32; + let p1: f32; + let p2: f32; + let p3: f32; + let q: f32; + let mut r: f32; + let w: f32; + let ix: u32; + let i: i32; + let sign: bool; + let mut signgam: i32; + + /* purge off +-inf, NaN, +-0, tiny and negative arguments */ + signgam = 1; + sign = (u >> 31) != 0; + ix = u & 0x7fffffff; + if ix >= 0x7f800000 { + return (x * x, signgam); + } + if ix < 0x35000000 { + /* |x| < 2**-21, return -log(|x|) */ + if sign { + signgam = -1; + x = -x; + } + return (-logf(x), signgam); + } + if sign { + x = -x; + t = sin_pi(x); + if t == 0.0 { + /* -integer */ + return (1.0 / (x - x), signgam); + } + if t > 0.0 { + signgam = -1; + } else { + t = -t; + } + nadj = logf(PI / (t * x)); + } else { + nadj = 0.0; + } + + /* purge off 1 and 2 */ + if ix == 0x3f800000 || ix == 0x40000000 { + r = 0.0; + } + /* for x < 2.0 */ + else if ix < 0x40000000 { + if ix <= 0x3f666666 { + /* lgamma(x) = lgamma(x+1)-log(x) */ + r = -logf(x); + if ix >= 0x3f3b4a20 { + y = 1.0 - x; + i = 0; + } else if ix >= 0x3e6d3308 { + y = x - (TC - 1.0); + i = 1; + } else { + y = x; + i = 2; + } + } else { + r = 0.0; + if ix >= 0x3fdda618 { + /* [1.7316,2] */ + y = 2.0 - x; + i = 0; + } else if ix >= 0x3F9da620 { + /* [1.23,1.73] */ + y = x - TC; + i = 1; + } else { + y = x - 1.0; + i = 2; + } + } + match i { + 0 => { + z = y * y; + p1 = A0 + z * (A2 + z * (A4 + z * (A6 + z * (A8 + z * A10)))); + p2 = z * (A1 + z * (A3 + z * (A5 + z * (A7 + z * (A9 + z * A11))))); + p = y * p1 + p2; + r += p - 0.5 * y; + } + 1 => { + z = y * y; + w = z * y; + p1 = T0 + w * (T3 + w * (T6 + w * (T9 + w * T12))); /* parallel comp */ + p2 = T1 + w * (T4 + w * (T7 + w * (T10 + w * T13))); + p3 = T2 + w * (T5 + w * (T8 + w * (T11 + w * T14))); + p = z * p1 - (TT - w * (p2 + y * p3)); + r += TF + p; + } + 2 => { + p1 = y * (U0 + y * (U1 + y * (U2 + y * (U3 + y * (U4 + y * U5))))); + p2 = 1.0 + y * (V1 + y * (V2 + y * (V3 + y * (V4 + y * V5)))); + r += -0.5 * y + p1 / p2; + } + #[cfg(debug_assertions)] + _ => unreachable!(), + #[cfg(not(debug_assertions))] + _ => {} + } + } else if ix < 0x41000000 { + /* x < 8.0 */ + i = x as i32; + y = x - (i as f32); + p = y * (S0 + y * (S1 + y * (S2 + y * (S3 + y * (S4 + y * (S5 + y * S6)))))); + q = 1.0 + y * (R1 + y * (R2 + y * (R3 + y * (R4 + y * (R5 + y * R6))))); + r = 0.5 * y + p / q; + z = 1.0; /* lgamma(1+s) = log(s) + lgamma(s) */ + // TODO: In C, this was implemented using switch jumps with fallthrough. + // Does this implementation have performance problems? + if i >= 7 { + z *= y + 6.0; + } + if i >= 6 { + z *= y + 5.0; + } + if i >= 5 { + z *= y + 4.0; + } + if i >= 4 { + z *= y + 3.0; + } + if i >= 3 { + z *= y + 2.0; + r += logf(z); + } + } else if ix < 0x5c800000 { + /* 8.0 <= x < 2**58 */ + t = logf(x); + z = 1.0 / x; + y = z * z; + w = W0 + z * (W1 + y * (W2 + y * (W3 + y * (W4 + y * (W5 + y * W6))))); + r = (x - 0.5) * (t - 1.0) + w; + } else { + /* 2**58 <= x <= inf */ + r = x * (logf(x) - 1.0); + } + if sign { + r = nadj - r; + } + return (r, signgam); +} diff --git a/library/compiler-builtins/libm/src/math/log.rs b/library/compiler-builtins/libm/src/math/log.rs new file mode 100644 index 0000000000000..f2dc47ec5ccff --- /dev/null +++ b/library/compiler-builtins/libm/src/math/log.rs @@ -0,0 +1,118 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/e_log.c */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunSoft, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ +/* log(x) + * Return the logarithm of x + * + * Method : + * 1. Argument Reduction: find k and f such that + * x = 2^k * (1+f), + * where sqrt(2)/2 < 1+f < sqrt(2) . + * + * 2. Approximation of log(1+f). + * Let s = f/(2+f) ; based on log(1+f) = log(1+s) - log(1-s) + * = 2s + 2/3 s**3 + 2/5 s**5 + ....., + * = 2s + s*R + * We use a special Remez algorithm on [0,0.1716] to generate + * a polynomial of degree 14 to approximate R The maximum error + * of this polynomial approximation is bounded by 2**-58.45. In + * other words, + * 2 4 6 8 10 12 14 + * R(z) ~ Lg1*s +Lg2*s +Lg3*s +Lg4*s +Lg5*s +Lg6*s +Lg7*s + * (the values of Lg1 to Lg7 are listed in the program) + * and + * | 2 14 | -58.45 + * | Lg1*s +...+Lg7*s - R(z) | <= 2 + * | | + * Note that 2s = f - s*f = f - hfsq + s*hfsq, where hfsq = f*f/2. + * In order to guarantee error in log below 1ulp, we compute log + * by + * log(1+f) = f - s*(f - R) (if f is not too large) + * log(1+f) = f - (hfsq - s*(hfsq+R)). (better accuracy) + * + * 3. Finally, log(x) = k*ln2 + log(1+f). + * = k*ln2_hi+(f-(hfsq-(s*(hfsq+R)+k*ln2_lo))) + * Here ln2 is split into two floating point number: + * ln2_hi + ln2_lo, + * where n*ln2_hi is always exact for |n| < 2000. + * + * Special cases: + * log(x) is NaN with signal if x < 0 (including -INF) ; + * log(+INF) is +INF; log(0) is -INF with signal; + * log(NaN) is that NaN with no signal. + * + * Accuracy: + * according to an error analysis, the error is always less than + * 1 ulp (unit in the last place). + * + * Constants: + * The hexadecimal values are the intended ones for the following + * constants. The decimal values may be used, provided that the + * compiler will convert from decimal to binary accurately enough + * to produce the hexadecimal values shown. + */ + +const LN2_HI: f64 = 6.93147180369123816490e-01; /* 3fe62e42 fee00000 */ +const LN2_LO: f64 = 1.90821492927058770002e-10; /* 3dea39ef 35793c76 */ +const LG1: f64 = 6.666666666666735130e-01; /* 3FE55555 55555593 */ +const LG2: f64 = 3.999999999940941908e-01; /* 3FD99999 9997FA04 */ +const LG3: f64 = 2.857142874366239149e-01; /* 3FD24924 94229359 */ +const LG4: f64 = 2.222219843214978396e-01; /* 3FCC71C5 1D8E78AF */ +const LG5: f64 = 1.818357216161805012e-01; /* 3FC74664 96CB03DE */ +const LG6: f64 = 1.531383769920937332e-01; /* 3FC39A09 D078C69F */ +const LG7: f64 = 1.479819860511658591e-01; /* 3FC2F112 DF3E5244 */ + +/// The natural logarithm of `x` (f64). +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn log(mut x: f64) -> f64 { + let x1p54 = f64::from_bits(0x4350000000000000); // 0x1p54 === 2 ^ 54 + + let mut ui = x.to_bits(); + let mut hx: u32 = (ui >> 32) as u32; + let mut k: i32 = 0; + + if (hx < 0x00100000) || ((hx >> 31) != 0) { + /* x < 2**-126 */ + if ui << 1 == 0 { + return -1. / (x * x); /* log(+-0)=-inf */ + } + if hx >> 31 != 0 { + return (x - x) / 0.0; /* log(-#) = NaN */ + } + /* subnormal number, scale x up */ + k -= 54; + x *= x1p54; + ui = x.to_bits(); + hx = (ui >> 32) as u32; + } else if hx >= 0x7ff00000 { + return x; + } else if hx == 0x3ff00000 && ui << 32 == 0 { + return 0.; + } + + /* reduce x into [sqrt(2)/2, sqrt(2)] */ + hx += 0x3ff00000 - 0x3fe6a09e; + k += ((hx >> 20) as i32) - 0x3ff; + hx = (hx & 0x000fffff) + 0x3fe6a09e; + ui = ((hx as u64) << 32) | (ui & 0xffffffff); + x = f64::from_bits(ui); + + let f: f64 = x - 1.0; + let hfsq: f64 = 0.5 * f * f; + let s: f64 = f / (2.0 + f); + let z: f64 = s * s; + let w: f64 = z * z; + let t1: f64 = w * (LG2 + w * (LG4 + w * LG6)); + let t2: f64 = z * (LG1 + w * (LG3 + w * (LG5 + w * LG7))); + let r: f64 = t2 + t1; + let dk: f64 = k as f64; + s * (hfsq + r) + dk * LN2_LO - hfsq + f + dk * LN2_HI +} diff --git a/library/compiler-builtins/libm/src/math/log10.rs b/library/compiler-builtins/libm/src/math/log10.rs new file mode 100644 index 0000000000000..8c9d68c492dbd --- /dev/null +++ b/library/compiler-builtins/libm/src/math/log10.rs @@ -0,0 +1,118 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/e_log10.c */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunSoft, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ +/* + * Return the base 10 logarithm of x. See log.c for most comments. + * + * Reduce x to 2^k (1+f) and calculate r = log(1+f) - f + f*f/2 + * as in log.c, then combine and scale in extra precision: + * log10(x) = (f - f*f/2 + r)/log(10) + k*log10(2) + */ + +use core::f64; + +const IVLN10HI: f64 = 4.34294481878168880939e-01; /* 0x3fdbcb7b, 0x15200000 */ +const IVLN10LO: f64 = 2.50829467116452752298e-11; /* 0x3dbb9438, 0xca9aadd5 */ +const LOG10_2HI: f64 = 3.01029995663611771306e-01; /* 0x3FD34413, 0x509F6000 */ +const LOG10_2LO: f64 = 3.69423907715893078616e-13; /* 0x3D59FEF3, 0x11F12B36 */ +const LG1: f64 = 6.666666666666735130e-01; /* 3FE55555 55555593 */ +const LG2: f64 = 3.999999999940941908e-01; /* 3FD99999 9997FA04 */ +const LG3: f64 = 2.857142874366239149e-01; /* 3FD24924 94229359 */ +const LG4: f64 = 2.222219843214978396e-01; /* 3FCC71C5 1D8E78AF */ +const LG5: f64 = 1.818357216161805012e-01; /* 3FC74664 96CB03DE */ +const LG6: f64 = 1.531383769920937332e-01; /* 3FC39A09 D078C69F */ +const LG7: f64 = 1.479819860511658591e-01; /* 3FC2F112 DF3E5244 */ + +/// The base 10 logarithm of `x` (f64). +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn log10(mut x: f64) -> f64 { + let x1p54 = f64::from_bits(0x4350000000000000); // 0x1p54 === 2 ^ 54 + + let mut ui: u64 = x.to_bits(); + let hfsq: f64; + let f: f64; + let s: f64; + let z: f64; + let r: f64; + let mut w: f64; + let t1: f64; + let t2: f64; + let dk: f64; + let y: f64; + let mut hi: f64; + let lo: f64; + let mut val_hi: f64; + let mut val_lo: f64; + let mut hx: u32; + let mut k: i32; + + hx = (ui >> 32) as u32; + k = 0; + if hx < 0x00100000 || (hx >> 31) > 0 { + if ui << 1 == 0 { + return -1. / (x * x); /* log(+-0)=-inf */ + } + if (hx >> 31) > 0 { + return (x - x) / 0.0; /* log(-#) = NaN */ + } + /* subnormal number, scale x up */ + k -= 54; + x *= x1p54; + ui = x.to_bits(); + hx = (ui >> 32) as u32; + } else if hx >= 0x7ff00000 { + return x; + } else if hx == 0x3ff00000 && ui << 32 == 0 { + return 0.; + } + + /* reduce x into [sqrt(2)/2, sqrt(2)] */ + hx += 0x3ff00000 - 0x3fe6a09e; + k += (hx >> 20) as i32 - 0x3ff; + hx = (hx & 0x000fffff) + 0x3fe6a09e; + ui = ((hx as u64) << 32) | (ui & 0xffffffff); + x = f64::from_bits(ui); + + f = x - 1.0; + hfsq = 0.5 * f * f; + s = f / (2.0 + f); + z = s * s; + w = z * z; + t1 = w * (LG2 + w * (LG4 + w * LG6)); + t2 = z * (LG1 + w * (LG3 + w * (LG5 + w * LG7))); + r = t2 + t1; + + /* See log2.c for details. */ + /* hi+lo = f - hfsq + s*(hfsq+R) ~ log(1+f) */ + hi = f - hfsq; + ui = hi.to_bits(); + ui &= (-1i64 as u64) << 32; + hi = f64::from_bits(ui); + lo = f - hi - hfsq + s * (hfsq + r); + + /* val_hi+val_lo ~ log10(1+f) + k*log10(2) */ + val_hi = hi * IVLN10HI; + dk = k as f64; + y = dk * LOG10_2HI; + val_lo = dk * LOG10_2LO + (lo + hi) * IVLN10LO + lo * IVLN10HI; + + /* + * Extra precision in for adding y is not strictly needed + * since there is no very large cancellation near x = sqrt(2) or + * x = 1/sqrt(2), but we do it anyway since it costs little on CPUs + * with some parallelism and it reduces the error for many args. + */ + w = y + val_hi; + val_lo += (y - w) + val_hi; + val_hi = w; + + val_lo + val_hi +} diff --git a/library/compiler-builtins/libm/src/math/log10f.rs b/library/compiler-builtins/libm/src/math/log10f.rs new file mode 100644 index 0000000000000..18bf8fcc8320e --- /dev/null +++ b/library/compiler-builtins/libm/src/math/log10f.rs @@ -0,0 +1,92 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/e_log10f.c */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ +/* + * See comments in log10.c. + */ + +use core::f32; + +const IVLN10HI: f32 = 4.3432617188e-01; /* 0x3ede6000 */ +const IVLN10LO: f32 = -3.1689971365e-05; /* 0xb804ead9 */ +const LOG10_2HI: f32 = 3.0102920532e-01; /* 0x3e9a2080 */ +const LOG10_2LO: f32 = 7.9034151668e-07; /* 0x355427db */ +/* |(log(1+s)-log(1-s))/s - Lg(s)| < 2**-34.24 (~[-4.95e-11, 4.97e-11]). */ +const LG1: f32 = 0.66666662693; /* 0xaaaaaa.0p-24 */ +const LG2: f32 = 0.40000972152; /* 0xccce13.0p-25 */ +const LG3: f32 = 0.28498786688; /* 0x91e9ee.0p-25 */ +const LG4: f32 = 0.24279078841; /* 0xf89e26.0p-26 */ + +/// The base 10 logarithm of `x` (f32). +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn log10f(mut x: f32) -> f32 { + let x1p25f = f32::from_bits(0x4c000000); // 0x1p25f === 2 ^ 25 + + let mut ui: u32 = x.to_bits(); + let hfsq: f32; + let f: f32; + let s: f32; + let z: f32; + let r: f32; + let w: f32; + let t1: f32; + let t2: f32; + let dk: f32; + let mut hi: f32; + let lo: f32; + let mut ix: u32; + let mut k: i32; + + ix = ui; + k = 0; + if ix < 0x00800000 || (ix >> 31) > 0 { + /* x < 2**-126 */ + if ix << 1 == 0 { + return -1. / (x * x); /* log(+-0)=-inf */ + } + if (ix >> 31) > 0 { + return (x - x) / 0.0; /* log(-#) = NaN */ + } + /* subnormal number, scale up x */ + k -= 25; + x *= x1p25f; + ui = x.to_bits(); + ix = ui; + } else if ix >= 0x7f800000 { + return x; + } else if ix == 0x3f800000 { + return 0.; + } + + /* reduce x into [sqrt(2)/2, sqrt(2)] */ + ix += 0x3f800000 - 0x3f3504f3; + k += (ix >> 23) as i32 - 0x7f; + ix = (ix & 0x007fffff) + 0x3f3504f3; + ui = ix; + x = f32::from_bits(ui); + + f = x - 1.0; + s = f / (2.0 + f); + z = s * s; + w = z * z; + t1 = w * (LG2 + w * LG4); + t2 = z * (LG1 + w * LG3); + r = t2 + t1; + hfsq = 0.5 * f * f; + + hi = f - hfsq; + ui = hi.to_bits(); + ui &= 0xfffff000; + hi = f32::from_bits(ui); + lo = f - hi - hfsq + s * (hfsq + r); + dk = k as f32; + dk * LOG10_2LO + (lo + hi) * IVLN10LO + lo * IVLN10HI + hi * IVLN10HI + dk * LOG10_2HI +} diff --git a/library/compiler-builtins/libm/src/math/log1p.rs b/library/compiler-builtins/libm/src/math/log1p.rs new file mode 100644 index 0000000000000..65142c0d622cc --- /dev/null +++ b/library/compiler-builtins/libm/src/math/log1p.rs @@ -0,0 +1,144 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/s_log1p.c */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ +/* double log1p(double x) + * Return the natural logarithm of 1+x. + * + * Method : + * 1. Argument Reduction: find k and f such that + * 1+x = 2^k * (1+f), + * where sqrt(2)/2 < 1+f < sqrt(2) . + * + * Note. If k=0, then f=x is exact. However, if k!=0, then f + * may not be representable exactly. In that case, a correction + * term is need. Let u=1+x rounded. Let c = (1+x)-u, then + * log(1+x) - log(u) ~ c/u. Thus, we proceed to compute log(u), + * and add back the correction term c/u. + * (Note: when x > 2**53, one can simply return log(x)) + * + * 2. Approximation of log(1+f): See log.c + * + * 3. Finally, log1p(x) = k*ln2 + log(1+f) + c/u. See log.c + * + * Special cases: + * log1p(x) is NaN with signal if x < -1 (including -INF) ; + * log1p(+INF) is +INF; log1p(-1) is -INF with signal; + * log1p(NaN) is that NaN with no signal. + * + * Accuracy: + * according to an error analysis, the error is always less than + * 1 ulp (unit in the last place). + * + * Constants: + * The hexadecimal values are the intended ones for the following + * constants. The decimal values may be used, provided that the + * compiler will convert from decimal to binary accurately enough + * to produce the hexadecimal values shown. + * + * Note: Assuming log() return accurate answer, the following + * algorithm can be used to compute log1p(x) to within a few ULP: + * + * u = 1+x; + * if(u==1.0) return x ; else + * return log(u)*(x/(u-1.0)); + * + * See HP-15C Advanced Functions Handbook, p.193. + */ + +use core::f64; + +const LN2_HI: f64 = 6.93147180369123816490e-01; /* 3fe62e42 fee00000 */ +const LN2_LO: f64 = 1.90821492927058770002e-10; /* 3dea39ef 35793c76 */ +const LG1: f64 = 6.666666666666735130e-01; /* 3FE55555 55555593 */ +const LG2: f64 = 3.999999999940941908e-01; /* 3FD99999 9997FA04 */ +const LG3: f64 = 2.857142874366239149e-01; /* 3FD24924 94229359 */ +const LG4: f64 = 2.222219843214978396e-01; /* 3FCC71C5 1D8E78AF */ +const LG5: f64 = 1.818357216161805012e-01; /* 3FC74664 96CB03DE */ +const LG6: f64 = 1.531383769920937332e-01; /* 3FC39A09 D078C69F */ +const LG7: f64 = 1.479819860511658591e-01; /* 3FC2F112 DF3E5244 */ + +/// The natural logarithm of 1+`x` (f64). +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn log1p(x: f64) -> f64 { + let mut ui: u64 = x.to_bits(); + let hfsq: f64; + let mut f: f64 = 0.; + let mut c: f64 = 0.; + let s: f64; + let z: f64; + let r: f64; + let w: f64; + let t1: f64; + let t2: f64; + let dk: f64; + let hx: u32; + let mut hu: u32; + let mut k: i32; + + hx = (ui >> 32) as u32; + k = 1; + if hx < 0x3fda827a || (hx >> 31) > 0 { + /* 1+x < sqrt(2)+ */ + if hx >= 0xbff00000 { + /* x <= -1.0 */ + if x == -1. { + return x / 0.0; /* log1p(-1) = -inf */ + } + return (x - x) / 0.0; /* log1p(x<-1) = NaN */ + } + if hx << 1 < 0x3ca00000 << 1 { + /* |x| < 2**-53 */ + /* underflow if subnormal */ + if (hx & 0x7ff00000) == 0 { + force_eval!(x as f32); + } + return x; + } + if hx <= 0xbfd2bec4 { + /* sqrt(2)/2- <= 1+x < sqrt(2)+ */ + k = 0; + c = 0.; + f = x; + } + } else if hx >= 0x7ff00000 { + return x; + } + if k > 0 { + ui = (1. + x).to_bits(); + hu = (ui >> 32) as u32; + hu += 0x3ff00000 - 0x3fe6a09e; + k = (hu >> 20) as i32 - 0x3ff; + /* correction term ~ log(1+x)-log(u), avoid underflow in c/u */ + if k < 54 { + c = if k >= 2 { + 1. - (f64::from_bits(ui) - x) + } else { + x - (f64::from_bits(ui) - 1.) + }; + c /= f64::from_bits(ui); + } else { + c = 0.; + } + /* reduce u into [sqrt(2)/2, sqrt(2)] */ + hu = (hu & 0x000fffff) + 0x3fe6a09e; + ui = ((hu as u64) << 32) | (ui & 0xffffffff); + f = f64::from_bits(ui) - 1.; + } + hfsq = 0.5 * f * f; + s = f / (2.0 + f); + z = s * s; + w = z * z; + t1 = w * (LG2 + w * (LG4 + w * LG6)); + t2 = z * (LG1 + w * (LG3 + w * (LG5 + w * LG7))); + r = t2 + t1; + dk = k as f64; + s * (hfsq + r) + (dk * LN2_LO + c) - hfsq + f + dk * LN2_HI +} diff --git a/library/compiler-builtins/libm/src/math/log1pf.rs b/library/compiler-builtins/libm/src/math/log1pf.rs new file mode 100644 index 0000000000000..23978e61c3c4a --- /dev/null +++ b/library/compiler-builtins/libm/src/math/log1pf.rs @@ -0,0 +1,99 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/s_log1pf.c */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ + +use core::f32; + +const LN2_HI: f32 = 6.9313812256e-01; /* 0x3f317180 */ +const LN2_LO: f32 = 9.0580006145e-06; /* 0x3717f7d1 */ +/* |(log(1+s)-log(1-s))/s - Lg(s)| < 2**-34.24 (~[-4.95e-11, 4.97e-11]). */ +const LG1: f32 = 0.66666662693; /* 0xaaaaaa.0p-24 */ +const LG2: f32 = 0.40000972152; /* 0xccce13.0p-25 */ +const LG3: f32 = 0.28498786688; /* 0x91e9ee.0p-25 */ +const LG4: f32 = 0.24279078841; /* 0xf89e26.0p-26 */ + +/// The natural logarithm of 1+`x` (f32). +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn log1pf(x: f32) -> f32 { + let mut ui: u32 = x.to_bits(); + let hfsq: f32; + let mut f: f32 = 0.; + let mut c: f32 = 0.; + let s: f32; + let z: f32; + let r: f32; + let w: f32; + let t1: f32; + let t2: f32; + let dk: f32; + let ix: u32; + let mut iu: u32; + let mut k: i32; + + ix = ui; + k = 1; + if ix < 0x3ed413d0 || (ix >> 31) > 0 { + /* 1+x < sqrt(2)+ */ + if ix >= 0xbf800000 { + /* x <= -1.0 */ + if x == -1. { + return x / 0.0; /* log1p(-1)=+inf */ + } + return (x - x) / 0.0; /* log1p(x<-1)=NaN */ + } + if ix << 1 < 0x33800000 << 1 { + /* |x| < 2**-24 */ + /* underflow if subnormal */ + if (ix & 0x7f800000) == 0 { + force_eval!(x * x); + } + return x; + } + if ix <= 0xbe95f619 { + /* sqrt(2)/2- <= 1+x < sqrt(2)+ */ + k = 0; + c = 0.; + f = x; + } + } else if ix >= 0x7f800000 { + return x; + } + if k > 0 { + ui = (1. + x).to_bits(); + iu = ui; + iu += 0x3f800000 - 0x3f3504f3; + k = (iu >> 23) as i32 - 0x7f; + /* correction term ~ log(1+x)-log(u), avoid underflow in c/u */ + if k < 25 { + c = if k >= 2 { + 1. - (f32::from_bits(ui) - x) + } else { + x - (f32::from_bits(ui) - 1.) + }; + c /= f32::from_bits(ui); + } else { + c = 0.; + } + /* reduce u into [sqrt(2)/2, sqrt(2)] */ + iu = (iu & 0x007fffff) + 0x3f3504f3; + ui = iu; + f = f32::from_bits(ui) - 1.; + } + s = f / (2.0 + f); + z = s * s; + w = z * z; + t1 = w * (LG2 + w * LG4); + t2 = z * (LG1 + w * LG3); + r = t2 + t1; + hfsq = 0.5 * f * f; + dk = k as f32; + s * (hfsq + r) + (dk * LN2_LO + c) - hfsq + f + dk * LN2_HI +} diff --git a/library/compiler-builtins/libm/src/math/log2.rs b/library/compiler-builtins/libm/src/math/log2.rs new file mode 100644 index 0000000000000..701f63c25e72b --- /dev/null +++ b/library/compiler-builtins/libm/src/math/log2.rs @@ -0,0 +1,107 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/e_log2.c */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunSoft, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ +/* + * Return the base 2 logarithm of x. See log.c for most comments. + * + * Reduce x to 2^k (1+f) and calculate r = log(1+f) - f + f*f/2 + * as in log.c, then combine and scale in extra precision: + * log2(x) = (f - f*f/2 + r)/log(2) + k + */ + +use core::f64; + +const IVLN2HI: f64 = 1.44269504072144627571e+00; /* 0x3ff71547, 0x65200000 */ +const IVLN2LO: f64 = 1.67517131648865118353e-10; /* 0x3de705fc, 0x2eefa200 */ +const LG1: f64 = 6.666666666666735130e-01; /* 3FE55555 55555593 */ +const LG2: f64 = 3.999999999940941908e-01; /* 3FD99999 9997FA04 */ +const LG3: f64 = 2.857142874366239149e-01; /* 3FD24924 94229359 */ +const LG4: f64 = 2.222219843214978396e-01; /* 3FCC71C5 1D8E78AF */ +const LG5: f64 = 1.818357216161805012e-01; /* 3FC74664 96CB03DE */ +const LG6: f64 = 1.531383769920937332e-01; /* 3FC39A09 D078C69F */ +const LG7: f64 = 1.479819860511658591e-01; /* 3FC2F112 DF3E5244 */ + +/// The base 2 logarithm of `x` (f64). +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn log2(mut x: f64) -> f64 { + let x1p54 = f64::from_bits(0x4350000000000000); // 0x1p54 === 2 ^ 54 + + let mut ui: u64 = x.to_bits(); + let hfsq: f64; + let f: f64; + let s: f64; + let z: f64; + let r: f64; + let mut w: f64; + let t1: f64; + let t2: f64; + let y: f64; + let mut hi: f64; + let lo: f64; + let mut val_hi: f64; + let mut val_lo: f64; + let mut hx: u32; + let mut k: i32; + + hx = (ui >> 32) as u32; + k = 0; + if hx < 0x00100000 || (hx >> 31) > 0 { + if ui << 1 == 0 { + return -1. / (x * x); /* log(+-0)=-inf */ + } + if (hx >> 31) > 0 { + return (x - x) / 0.0; /* log(-#) = NaN */ + } + /* subnormal number, scale x up */ + k -= 54; + x *= x1p54; + ui = x.to_bits(); + hx = (ui >> 32) as u32; + } else if hx >= 0x7ff00000 { + return x; + } else if hx == 0x3ff00000 && ui << 32 == 0 { + return 0.; + } + + /* reduce x into [sqrt(2)/2, sqrt(2)] */ + hx += 0x3ff00000 - 0x3fe6a09e; + k += (hx >> 20) as i32 - 0x3ff; + hx = (hx & 0x000fffff) + 0x3fe6a09e; + ui = ((hx as u64) << 32) | (ui & 0xffffffff); + x = f64::from_bits(ui); + + f = x - 1.0; + hfsq = 0.5 * f * f; + s = f / (2.0 + f); + z = s * s; + w = z * z; + t1 = w * (LG2 + w * (LG4 + w * LG6)); + t2 = z * (LG1 + w * (LG3 + w * (LG5 + w * LG7))); + r = t2 + t1; + + /* hi+lo = f - hfsq + s*(hfsq+R) ~ log(1+f) */ + hi = f - hfsq; + ui = hi.to_bits(); + ui &= (-1i64 as u64) << 32; + hi = f64::from_bits(ui); + lo = f - hi - hfsq + s * (hfsq + r); + + val_hi = hi * IVLN2HI; + val_lo = (lo + hi) * IVLN2LO + lo * IVLN2HI; + + /* spadd(val_hi, val_lo, y), except for not using double_t: */ + y = k.into(); + w = y + val_hi; + val_lo += (y - w) + val_hi; + val_hi = w; + + val_lo + val_hi +} diff --git a/library/compiler-builtins/libm/src/math/log2f.rs b/library/compiler-builtins/libm/src/math/log2f.rs new file mode 100644 index 0000000000000..5ba2427d1d468 --- /dev/null +++ b/library/compiler-builtins/libm/src/math/log2f.rs @@ -0,0 +1,88 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/e_log2f.c */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ +/* + * See comments in log2.c. + */ + +use core::f32; + +const IVLN2HI: f32 = 1.4428710938e+00; /* 0x3fb8b000 */ +const IVLN2LO: f32 = -1.7605285393e-04; /* 0xb9389ad4 */ +/* |(log(1+s)-log(1-s))/s - Lg(s)| < 2**-34.24 (~[-4.95e-11, 4.97e-11]). */ +const LG1: f32 = 0.66666662693; /* 0xaaaaaa.0p-24 */ +const LG2: f32 = 0.40000972152; /* 0xccce13.0p-25 */ +const LG3: f32 = 0.28498786688; /* 0x91e9ee.0p-25 */ +const LG4: f32 = 0.24279078841; /* 0xf89e26.0p-26 */ + +/// The base 2 logarithm of `x` (f32). +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn log2f(mut x: f32) -> f32 { + let x1p25f = f32::from_bits(0x4c000000); // 0x1p25f === 2 ^ 25 + + let mut ui: u32 = x.to_bits(); + let hfsq: f32; + let f: f32; + let s: f32; + let z: f32; + let r: f32; + let w: f32; + let t1: f32; + let t2: f32; + let mut hi: f32; + let lo: f32; + let mut ix: u32; + let mut k: i32; + + ix = ui; + k = 0; + if ix < 0x00800000 || (ix >> 31) > 0 { + /* x < 2**-126 */ + if ix << 1 == 0 { + return -1. / (x * x); /* log(+-0)=-inf */ + } + if (ix >> 31) > 0 { + return (x - x) / 0.0; /* log(-#) = NaN */ + } + /* subnormal number, scale up x */ + k -= 25; + x *= x1p25f; + ui = x.to_bits(); + ix = ui; + } else if ix >= 0x7f800000 { + return x; + } else if ix == 0x3f800000 { + return 0.; + } + + /* reduce x into [sqrt(2)/2, sqrt(2)] */ + ix += 0x3f800000 - 0x3f3504f3; + k += (ix >> 23) as i32 - 0x7f; + ix = (ix & 0x007fffff) + 0x3f3504f3; + ui = ix; + x = f32::from_bits(ui); + + f = x - 1.0; + s = f / (2.0 + f); + z = s * s; + w = z * z; + t1 = w * (LG2 + w * LG4); + t2 = z * (LG1 + w * LG3); + r = t2 + t1; + hfsq = 0.5 * f * f; + + hi = f - hfsq; + ui = hi.to_bits(); + ui &= 0xfffff000; + hi = f32::from_bits(ui); + lo = f - hi - hfsq + s * (hfsq + r); + (lo + hi) * IVLN2LO + lo * IVLN2HI + hi * IVLN2HI + k as f32 +} diff --git a/library/compiler-builtins/libm/src/math/logf.rs b/library/compiler-builtins/libm/src/math/logf.rs new file mode 100644 index 0000000000000..68d1943025e35 --- /dev/null +++ b/library/compiler-builtins/libm/src/math/logf.rs @@ -0,0 +1,66 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/e_logf.c */ +/* + * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. + */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ + +const LN2_HI: f32 = 6.9313812256e-01; /* 0x3f317180 */ +const LN2_LO: f32 = 9.0580006145e-06; /* 0x3717f7d1 */ +/* |(log(1+s)-log(1-s))/s - Lg(s)| < 2**-34.24 (~[-4.95e-11, 4.97e-11]). */ +const LG1: f32 = 0.66666662693; /* 0xaaaaaa.0p-24*/ +const LG2: f32 = 0.40000972152; /* 0xccce13.0p-25 */ +const LG3: f32 = 0.28498786688; /* 0x91e9ee.0p-25 */ +const LG4: f32 = 0.24279078841; /* 0xf89e26.0p-26 */ + +/// The natural logarithm of `x` (f32). +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn logf(mut x: f32) -> f32 { + let x1p25 = f32::from_bits(0x4c000000); // 0x1p25f === 2 ^ 25 + + let mut ix = x.to_bits(); + let mut k = 0i32; + + if (ix < 0x00800000) || ((ix >> 31) != 0) { + /* x < 2**-126 */ + if ix << 1 == 0 { + return -1. / (x * x); /* log(+-0)=-inf */ + } + if (ix >> 31) != 0 { + return (x - x) / 0.; /* log(-#) = NaN */ + } + /* subnormal number, scale up x */ + k -= 25; + x *= x1p25; + ix = x.to_bits(); + } else if ix >= 0x7f800000 { + return x; + } else if ix == 0x3f800000 { + return 0.; + } + + /* reduce x into [sqrt(2)/2, sqrt(2)] */ + ix += 0x3f800000 - 0x3f3504f3; + k += ((ix >> 23) as i32) - 0x7f; + ix = (ix & 0x007fffff) + 0x3f3504f3; + x = f32::from_bits(ix); + + let f = x - 1.; + let s = f / (2. + f); + let z = s * s; + let w = z * z; + let t1 = w * (LG2 + w * LG4); + let t2 = z * (LG1 + w * LG3); + let r = t2 + t1; + let hfsq = 0.5 * f * f; + let dk = k as f32; + s * (hfsq + r) + dk * LN2_LO - hfsq + f + dk * LN2_HI +} diff --git a/library/compiler-builtins/libm/src/math/mod.rs b/library/compiler-builtins/libm/src/math/mod.rs new file mode 100644 index 0000000000000..ce9b8fc58bbd4 --- /dev/null +++ b/library/compiler-builtins/libm/src/math/mod.rs @@ -0,0 +1,394 @@ +macro_rules! force_eval { + ($e:expr) => { + unsafe { ::core::ptr::read_volatile(&$e) } + }; +} + +#[cfg(not(debug_assertions))] +macro_rules! i { + ($array:expr, $index:expr) => { + unsafe { *$array.get_unchecked($index) } + }; + ($array:expr, $index:expr, = , $rhs:expr) => { + unsafe { + *$array.get_unchecked_mut($index) = $rhs; + } + }; + ($array:expr, $index:expr, += , $rhs:expr) => { + unsafe { + *$array.get_unchecked_mut($index) += $rhs; + } + }; + ($array:expr, $index:expr, -= , $rhs:expr) => { + unsafe { + *$array.get_unchecked_mut($index) -= $rhs; + } + }; + ($array:expr, $index:expr, &= , $rhs:expr) => { + unsafe { + *$array.get_unchecked_mut($index) &= $rhs; + } + }; + ($array:expr, $index:expr, == , $rhs:expr) => { + unsafe { *$array.get_unchecked_mut($index) == $rhs } + }; +} + +#[cfg(debug_assertions)] +macro_rules! i { + ($array:expr, $index:expr) => { + *$array.get($index).unwrap() + }; + ($array:expr, $index:expr, = , $rhs:expr) => { + *$array.get_mut($index).unwrap() = $rhs; + }; + ($array:expr, $index:expr, -= , $rhs:expr) => { + *$array.get_mut($index).unwrap() -= $rhs; + }; + ($array:expr, $index:expr, += , $rhs:expr) => { + *$array.get_mut($index).unwrap() += $rhs; + }; + ($array:expr, $index:expr, &= , $rhs:expr) => { + *$array.get_mut($index).unwrap() &= $rhs; + }; + ($array:expr, $index:expr, == , $rhs:expr) => { + *$array.get_mut($index).unwrap() == $rhs + }; +} + +// Temporary macro to avoid panic codegen for division (in debug mode too). At +// the time of this writing this is only used in a few places, and once +// rust-lang/rust#72751 is fixed then this macro will no longer be necessary and +// the native `/` operator can be used and panics won't be codegen'd. +#[cfg(any(debug_assertions, not(intrinsics_enabled)))] +macro_rules! div { + ($a:expr, $b:expr) => { + $a / $b + }; +} + +#[cfg(all(not(debug_assertions), intrinsics_enabled))] +macro_rules! div { + ($a:expr, $b:expr) => { + unsafe { core::intrinsics::unchecked_div($a, $b) } + }; +} + +// `support` may be public for testing +#[macro_use] +#[cfg(feature = "unstable-public-internals")] +pub mod support; + +#[macro_use] +#[cfg(not(feature = "unstable-public-internals"))] +pub(crate) mod support; + +cfg_if! { + if #[cfg(feature = "unstable-public-internals")] { + pub mod generic; + } else { + mod generic; + } +} + +// Private modules +mod arch; +mod expo2; +mod k_cos; +mod k_cosf; +mod k_expo2; +mod k_expo2f; +mod k_sin; +mod k_sinf; +mod k_tan; +mod k_tanf; +mod rem_pio2; +mod rem_pio2_large; +mod rem_pio2f; + +// Private re-imports +use self::expo2::expo2; +use self::k_cos::k_cos; +use self::k_cosf::k_cosf; +use self::k_expo2::k_expo2; +use self::k_expo2f::k_expo2f; +use self::k_sin::k_sin; +use self::k_sinf::k_sinf; +use self::k_tan::k_tan; +use self::k_tanf::k_tanf; +use self::rem_pio2::rem_pio2; +use self::rem_pio2_large::rem_pio2_large; +use self::rem_pio2f::rem_pio2f; +#[allow(unused_imports)] +use self::support::{CastFrom, CastInto, DFloat, DInt, Float, HFloat, HInt, Int, IntTy, MinInt}; + +// Public modules +mod acos; +mod acosf; +mod acosh; +mod acoshf; +mod asin; +mod asinf; +mod asinh; +mod asinhf; +mod atan; +mod atan2; +mod atan2f; +mod atanf; +mod atanh; +mod atanhf; +mod cbrt; +mod cbrtf; +mod ceil; +mod copysign; +mod cos; +mod cosf; +mod cosh; +mod coshf; +mod erf; +mod erff; +mod exp; +mod exp10; +mod exp10f; +mod exp2; +mod exp2f; +mod expf; +mod expm1; +mod expm1f; +mod fabs; +mod fdim; +mod floor; +mod fma; +mod fmin_fmax; +mod fminimum_fmaximum; +mod fminimum_fmaximum_num; +mod fmod; +mod frexp; +mod frexpf; +mod hypot; +mod hypotf; +mod ilogb; +mod ilogbf; +mod j0; +mod j0f; +mod j1; +mod j1f; +mod jn; +mod jnf; +mod ldexp; +mod lgamma; +mod lgamma_r; +mod lgammaf; +mod lgammaf_r; +mod log; +mod log10; +mod log10f; +mod log1p; +mod log1pf; +mod log2; +mod log2f; +mod logf; +mod modf; +mod modff; +mod nextafter; +mod nextafterf; +mod pow; +mod powf; +mod remainder; +mod remainderf; +mod remquo; +mod remquof; +mod rint; +mod round; +mod roundeven; +mod scalbn; +mod sin; +mod sincos; +mod sincosf; +mod sinf; +mod sinh; +mod sinhf; +mod sqrt; +mod tan; +mod tanf; +mod tanh; +mod tanhf; +mod tgamma; +mod tgammaf; +mod trunc; + +// Use separated imports instead of {}-grouped imports for easier merging. +pub use self::acos::acos; +pub use self::acosf::acosf; +pub use self::acosh::acosh; +pub use self::acoshf::acoshf; +pub use self::asin::asin; +pub use self::asinf::asinf; +pub use self::asinh::asinh; +pub use self::asinhf::asinhf; +pub use self::atan::atan; +pub use self::atan2::atan2; +pub use self::atan2f::atan2f; +pub use self::atanf::atanf; +pub use self::atanh::atanh; +pub use self::atanhf::atanhf; +pub use self::cbrt::cbrt; +pub use self::cbrtf::cbrtf; +pub use self::ceil::{ceil, ceilf}; +pub use self::copysign::{copysign, copysignf}; +pub use self::cos::cos; +pub use self::cosf::cosf; +pub use self::cosh::cosh; +pub use self::coshf::coshf; +pub use self::erf::{erf, erfc}; +pub use self::erff::{erfcf, erff}; +pub use self::exp::exp; +pub use self::exp2::exp2; +pub use self::exp2f::exp2f; +pub use self::exp10::exp10; +pub use self::exp10f::exp10f; +pub use self::expf::expf; +pub use self::expm1::expm1; +pub use self::expm1f::expm1f; +pub use self::fabs::{fabs, fabsf}; +pub use self::fdim::{fdim, fdimf}; +pub use self::floor::{floor, floorf}; +pub use self::fma::{fma, fmaf}; +pub use self::fmin_fmax::{fmax, fmaxf, fmin, fminf}; +pub use self::fminimum_fmaximum::{fmaximum, fmaximumf, fminimum, fminimumf}; +pub use self::fminimum_fmaximum_num::{fmaximum_num, fmaximum_numf, fminimum_num, fminimum_numf}; +pub use self::fmod::{fmod, fmodf}; +pub use self::frexp::frexp; +pub use self::frexpf::frexpf; +pub use self::hypot::hypot; +pub use self::hypotf::hypotf; +pub use self::ilogb::ilogb; +pub use self::ilogbf::ilogbf; +pub use self::j0::{j0, y0}; +pub use self::j0f::{j0f, y0f}; +pub use self::j1::{j1, y1}; +pub use self::j1f::{j1f, y1f}; +pub use self::jn::{jn, yn}; +pub use self::jnf::{jnf, ynf}; +pub use self::ldexp::{ldexp, ldexpf}; +pub use self::lgamma::lgamma; +pub use self::lgamma_r::lgamma_r; +pub use self::lgammaf::lgammaf; +pub use self::lgammaf_r::lgammaf_r; +pub use self::log::log; +pub use self::log1p::log1p; +pub use self::log1pf::log1pf; +pub use self::log2::log2; +pub use self::log2f::log2f; +pub use self::log10::log10; +pub use self::log10f::log10f; +pub use self::logf::logf; +pub use self::modf::modf; +pub use self::modff::modff; +pub use self::nextafter::nextafter; +pub use self::nextafterf::nextafterf; +pub use self::pow::pow; +pub use self::powf::powf; +pub use self::remainder::remainder; +pub use self::remainderf::remainderf; +pub use self::remquo::remquo; +pub use self::remquof::remquof; +pub use self::rint::{rint, rintf}; +pub use self::round::{round, roundf}; +pub use self::roundeven::{roundeven, roundevenf}; +pub use self::scalbn::{scalbn, scalbnf}; +pub use self::sin::sin; +pub use self::sincos::sincos; +pub use self::sincosf::sincosf; +pub use self::sinf::sinf; +pub use self::sinh::sinh; +pub use self::sinhf::sinhf; +pub use self::sqrt::{sqrt, sqrtf}; +pub use self::tan::tan; +pub use self::tanf::tanf; +pub use self::tanh::tanh; +pub use self::tanhf::tanhf; +pub use self::tgamma::tgamma; +pub use self::tgammaf::tgammaf; +pub use self::trunc::{trunc, truncf}; + +cfg_if! { + if #[cfg(f16_enabled)] { + // verify-sorted-start + pub use self::ceil::ceilf16; + pub use self::copysign::copysignf16; + pub use self::fabs::fabsf16; + pub use self::fdim::fdimf16; + pub use self::floor::floorf16; + pub use self::fmin_fmax::{fmaxf16, fminf16}; + pub use self::fminimum_fmaximum::{fmaximumf16, fminimumf16}; + pub use self::fminimum_fmaximum_num::{fmaximum_numf16, fminimum_numf16}; + pub use self::fmod::fmodf16; + pub use self::ldexp::ldexpf16; + pub use self::rint::rintf16; + pub use self::round::roundf16; + pub use self::roundeven::roundevenf16; + pub use self::scalbn::scalbnf16; + pub use self::sqrt::sqrtf16; + pub use self::trunc::truncf16; + // verify-sorted-end + + #[allow(unused_imports)] + pub(crate) use self::fma::fmaf16; + } +} + +cfg_if! { + if #[cfg(f128_enabled)] { + // verify-sorted-start + pub use self::ceil::ceilf128; + pub use self::copysign::copysignf128; + pub use self::fabs::fabsf128; + pub use self::fdim::fdimf128; + pub use self::floor::floorf128; + pub use self::fma::fmaf128; + pub use self::fmin_fmax::{fmaxf128, fminf128}; + pub use self::fminimum_fmaximum::{fmaximumf128, fminimumf128}; + pub use self::fminimum_fmaximum_num::{fmaximum_numf128, fminimum_numf128}; + pub use self::fmod::fmodf128; + pub use self::ldexp::ldexpf128; + pub use self::rint::rintf128; + pub use self::round::roundf128; + pub use self::roundeven::roundevenf128; + pub use self::scalbn::scalbnf128; + pub use self::sqrt::sqrtf128; + pub use self::trunc::truncf128; + // verify-sorted-end + } +} + +#[inline] +fn get_high_word(x: f64) -> u32 { + (x.to_bits() >> 32) as u32 +} + +#[inline] +fn get_low_word(x: f64) -> u32 { + x.to_bits() as u32 +} + +#[inline] +fn with_set_high_word(f: f64, hi: u32) -> f64 { + let mut tmp = f.to_bits(); + tmp &= 0x00000000_ffffffff; + tmp |= (hi as u64) << 32; + f64::from_bits(tmp) +} + +#[inline] +fn with_set_low_word(f: f64, lo: u32) -> f64 { + let mut tmp = f.to_bits(); + tmp &= 0xffffffff_00000000; + tmp |= lo as u64; + f64::from_bits(tmp) +} + +#[inline] +fn combine_words(hi: u32, lo: u32) -> f64 { + f64::from_bits(((hi as u64) << 32) | lo as u64) +} diff --git a/library/compiler-builtins/libm/src/math/modf.rs b/library/compiler-builtins/libm/src/math/modf.rs new file mode 100644 index 0000000000000..6541862cdd98e --- /dev/null +++ b/library/compiler-builtins/libm/src/math/modf.rs @@ -0,0 +1,35 @@ +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn modf(x: f64) -> (f64, f64) { + let rv2: f64; + let mut u = x.to_bits(); + let mask: u64; + let e = (((u >> 52) & 0x7ff) as i32) - 0x3ff; + + /* no fractional part */ + if e >= 52 { + rv2 = x; + if e == 0x400 && (u << 12) != 0 { + /* nan */ + return (x, rv2); + } + u &= 1 << 63; + return (f64::from_bits(u), rv2); + } + + /* no integral part*/ + if e < 0 { + u &= 1 << 63; + rv2 = f64::from_bits(u); + return (x, rv2); + } + + mask = ((!0) >> 12) >> e; + if (u & mask) == 0 { + rv2 = x; + u &= 1 << 63; + return (f64::from_bits(u), rv2); + } + u &= !mask; + rv2 = f64::from_bits(u); + return (x - rv2, rv2); +} diff --git a/library/compiler-builtins/libm/src/math/modff.rs b/library/compiler-builtins/libm/src/math/modff.rs new file mode 100644 index 0000000000000..90c6bca7d8da9 --- /dev/null +++ b/library/compiler-builtins/libm/src/math/modff.rs @@ -0,0 +1,34 @@ +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn modff(x: f32) -> (f32, f32) { + let rv2: f32; + let mut u: u32 = x.to_bits(); + let mask: u32; + let e = (((u >> 23) & 0xff) as i32) - 0x7f; + + /* no fractional part */ + if e >= 23 { + rv2 = x; + if e == 0x80 && (u << 9) != 0 { + /* nan */ + return (x, rv2); + } + u &= 0x80000000; + return (f32::from_bits(u), rv2); + } + /* no integral part */ + if e < 0 { + u &= 0x80000000; + rv2 = f32::from_bits(u); + return (x, rv2); + } + + mask = 0x007fffff >> e; + if (u & mask) == 0 { + rv2 = x; + u &= 0x80000000; + return (f32::from_bits(u), rv2); + } + u &= !mask; + rv2 = f32::from_bits(u); + return (x - rv2, rv2); +} diff --git a/library/compiler-builtins/libm/src/math/nextafter.rs b/library/compiler-builtins/libm/src/math/nextafter.rs new file mode 100644 index 0000000000000..c991ff6f2330d --- /dev/null +++ b/library/compiler-builtins/libm/src/math/nextafter.rs @@ -0,0 +1,37 @@ +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn nextafter(x: f64, y: f64) -> f64 { + if x.is_nan() || y.is_nan() { + return x + y; + } + + let mut ux_i = x.to_bits(); + let uy_i = y.to_bits(); + if ux_i == uy_i { + return y; + } + + let ax = ux_i & (!1_u64 / 2); + let ay = uy_i & (!1_u64 / 2); + if ax == 0 { + if ay == 0 { + return y; + } + ux_i = (uy_i & (1_u64 << 63)) | 1; + } else if ax > ay || ((ux_i ^ uy_i) & (1_u64 << 63)) != 0 { + ux_i -= 1; + } else { + ux_i += 1; + } + + let e = (ux_i >> 52) & 0x7ff; + // raise overflow if ux.f is infinite and x is finite + if e == 0x7ff { + force_eval!(x + x); + } + let ux_f = f64::from_bits(ux_i); + // raise underflow if ux.f is subnormal or zero + if e == 0 { + force_eval!(x * x + ux_f * ux_f); + } + ux_f +} diff --git a/library/compiler-builtins/libm/src/math/nextafterf.rs b/library/compiler-builtins/libm/src/math/nextafterf.rs new file mode 100644 index 0000000000000..8ba3833562fbe --- /dev/null +++ b/library/compiler-builtins/libm/src/math/nextafterf.rs @@ -0,0 +1,37 @@ +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn nextafterf(x: f32, y: f32) -> f32 { + if x.is_nan() || y.is_nan() { + return x + y; + } + + let mut ux_i = x.to_bits(); + let uy_i = y.to_bits(); + if ux_i == uy_i { + return y; + } + + let ax = ux_i & 0x7fff_ffff_u32; + let ay = uy_i & 0x7fff_ffff_u32; + if ax == 0 { + if ay == 0 { + return y; + } + ux_i = (uy_i & 0x8000_0000_u32) | 1; + } else if ax > ay || ((ux_i ^ uy_i) & 0x8000_0000_u32) != 0 { + ux_i -= 1; + } else { + ux_i += 1; + } + + let e = ux_i & 0x7f80_0000_u32; + // raise overflow if ux_f is infinite and x is finite + if e == 0x7f80_0000_u32 { + force_eval!(x + x); + } + let ux_f = f32::from_bits(ux_i); + // raise underflow if ux_f is subnormal or zero + if e == 0 { + force_eval!(x * x + ux_f * ux_f); + } + ux_f +} diff --git a/library/compiler-builtins/libm/src/math/pow.rs b/library/compiler-builtins/libm/src/math/pow.rs new file mode 100644 index 0000000000000..94ae31cf0dab2 --- /dev/null +++ b/library/compiler-builtins/libm/src/math/pow.rs @@ -0,0 +1,624 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/e_pow.c */ +/* + * ==================================================== + * Copyright (C) 2004 by Sun Microsystems, Inc. All rights reserved. + * + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ + +// pow(x,y) return x**y +// +// n +// Method: Let x = 2 * (1+f) +// 1. Compute and return log2(x) in two pieces: +// log2(x) = w1 + w2, +// where w1 has 53-24 = 29 bit trailing zeros. +// 2. Perform y*log2(x) = n+y' by simulating multi-precision +// arithmetic, where |y'|<=0.5. +// 3. Return x**y = 2**n*exp(y'*log2) +// +// Special cases: +// 1. (anything) ** 0 is 1 +// 2. 1 ** (anything) is 1 +// 3. (anything except 1) ** NAN is NAN +// 4. NAN ** (anything except 0) is NAN +// 5. +-(|x| > 1) ** +INF is +INF +// 6. +-(|x| > 1) ** -INF is +0 +// 7. +-(|x| < 1) ** +INF is +0 +// 8. +-(|x| < 1) ** -INF is +INF +// 9. -1 ** +-INF is 1 +// 10. +0 ** (+anything except 0, NAN) is +0 +// 11. -0 ** (+anything except 0, NAN, odd integer) is +0 +// 12. +0 ** (-anything except 0, NAN) is +INF, raise divbyzero +// 13. -0 ** (-anything except 0, NAN, odd integer) is +INF, raise divbyzero +// 14. -0 ** (+odd integer) is -0 +// 15. -0 ** (-odd integer) is -INF, raise divbyzero +// 16. +INF ** (+anything except 0,NAN) is +INF +// 17. +INF ** (-anything except 0,NAN) is +0 +// 18. -INF ** (+odd integer) is -INF +// 19. -INF ** (anything) = -0 ** (-anything), (anything except odd integer) +// 20. (anything) ** 1 is (anything) +// 21. (anything) ** -1 is 1/(anything) +// 22. (-anything) ** (integer) is (-1)**(integer)*(+anything**integer) +// 23. (-anything except 0 and inf) ** (non-integer) is NAN +// +// Accuracy: +// pow(x,y) returns x**y nearly rounded. In particular +// pow(integer,integer) +// always returns the correct integer provided it is +// representable. +// +// Constants : +// The hexadecimal values are the intended ones for the following +// constants. The decimal values may be used, provided that the +// compiler will convert from decimal to binary accurately enough +// to produce the hexadecimal values shown. +// +use super::{fabs, get_high_word, scalbn, sqrt, with_set_high_word, with_set_low_word}; + +const BP: [f64; 2] = [1.0, 1.5]; +const DP_H: [f64; 2] = [0.0, 5.84962487220764160156e-01]; /* 0x3fe2b803_40000000 */ +const DP_L: [f64; 2] = [0.0, 1.35003920212974897128e-08]; /* 0x3E4CFDEB, 0x43CFD006 */ +const TWO53: f64 = 9007199254740992.0; /* 0x43400000_00000000 */ +const HUGE: f64 = 1.0e300; +const TINY: f64 = 1.0e-300; + +// poly coefs for (3/2)*(log(x)-2s-2/3*s**3: +const L1: f64 = 5.99999999999994648725e-01; /* 0x3fe33333_33333303 */ +const L2: f64 = 4.28571428578550184252e-01; /* 0x3fdb6db6_db6fabff */ +const L3: f64 = 3.33333329818377432918e-01; /* 0x3fd55555_518f264d */ +const L4: f64 = 2.72728123808534006489e-01; /* 0x3fd17460_a91d4101 */ +const L5: f64 = 2.30660745775561754067e-01; /* 0x3fcd864a_93c9db65 */ +const L6: f64 = 2.06975017800338417784e-01; /* 0x3fca7e28_4a454eef */ +const P1: f64 = 1.66666666666666019037e-01; /* 0x3fc55555_5555553e */ +const P2: f64 = -2.77777777770155933842e-03; /* 0xbf66c16c_16bebd93 */ +const P3: f64 = 6.61375632143793436117e-05; /* 0x3f11566a_af25de2c */ +const P4: f64 = -1.65339022054652515390e-06; /* 0xbebbbd41_c5d26bf1 */ +const P5: f64 = 4.13813679705723846039e-08; /* 0x3e663769_72bea4d0 */ +const LG2: f64 = 6.93147180559945286227e-01; /* 0x3fe62e42_fefa39ef */ +const LG2_H: f64 = 6.93147182464599609375e-01; /* 0x3fe62e43_00000000 */ +const LG2_L: f64 = -1.90465429995776804525e-09; /* 0xbe205c61_0ca86c39 */ +const OVT: f64 = 8.0085662595372944372e-017; /* -(1024-log2(ovfl+.5ulp)) */ +const CP: f64 = 9.61796693925975554329e-01; /* 0x3feec709_dc3a03fd =2/(3ln2) */ +const CP_H: f64 = 9.61796700954437255859e-01; /* 0x3feec709_e0000000 =(float)cp */ +const CP_L: f64 = -7.02846165095275826516e-09; /* 0xbe3e2fe0_145b01f5 =tail of cp_h*/ +const IVLN2: f64 = 1.44269504088896338700e+00; /* 0x3ff71547_652b82fe =1/ln2 */ +const IVLN2_H: f64 = 1.44269502162933349609e+00; /* 0x3ff71547_60000000 =24b 1/ln2*/ +const IVLN2_L: f64 = 1.92596299112661746887e-08; /* 0x3e54ae0b_f85ddf44 =1/ln2 tail*/ + +/// Returns `x` to the power of `y` (f64). +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn pow(x: f64, y: f64) -> f64 { + let t1: f64; + let t2: f64; + + let (hx, lx): (i32, u32) = ((x.to_bits() >> 32) as i32, x.to_bits() as u32); + let (hy, ly): (i32, u32) = ((y.to_bits() >> 32) as i32, y.to_bits() as u32); + + let mut ix: i32 = hx & 0x7fffffff_i32; + let iy: i32 = hy & 0x7fffffff_i32; + + /* x**0 = 1, even if x is NaN */ + if ((iy as u32) | ly) == 0 { + return 1.0; + } + + /* 1**y = 1, even if y is NaN */ + if hx == 0x3ff00000 && lx == 0 { + return 1.0; + } + + /* NaN if either arg is NaN */ + if ix > 0x7ff00000 + || (ix == 0x7ff00000 && lx != 0) + || iy > 0x7ff00000 + || (iy == 0x7ff00000 && ly != 0) + { + return x + y; + } + + /* determine if y is an odd int when x < 0 + * yisint = 0 ... y is not an integer + * yisint = 1 ... y is an odd int + * yisint = 2 ... y is an even int + */ + let mut yisint: i32 = 0; + let mut k: i32; + let mut j: i32; + if hx < 0 { + if iy >= 0x43400000 { + yisint = 2; /* even integer y */ + } else if iy >= 0x3ff00000 { + k = (iy >> 20) - 0x3ff; /* exponent */ + + if k > 20 { + j = (ly >> (52 - k)) as i32; + + if (j << (52 - k)) == (ly as i32) { + yisint = 2 - (j & 1); + } + } else if ly == 0 { + j = iy >> (20 - k); + + if (j << (20 - k)) == iy { + yisint = 2 - (j & 1); + } + } + } + } + + if ly == 0 { + /* special value of y */ + if iy == 0x7ff00000 { + /* y is +-inf */ + + return if ((ix - 0x3ff00000) | (lx as i32)) == 0 { + /* (-1)**+-inf is 1 */ + 1.0 + } else if ix >= 0x3ff00000 { + /* (|x|>1)**+-inf = inf,0 */ + if hy >= 0 { y } else { 0.0 } + } else { + /* (|x|<1)**+-inf = 0,inf */ + if hy >= 0 { 0.0 } else { -y } + }; + } + + if iy == 0x3ff00000 { + /* y is +-1 */ + return if hy >= 0 { x } else { 1.0 / x }; + } + + if hy == 0x40000000 { + /* y is 2 */ + return x * x; + } + + if hy == 0x3fe00000 { + /* y is 0.5 */ + if hx >= 0 { + /* x >= +0 */ + return sqrt(x); + } + } + } + + let mut ax: f64 = fabs(x); + if lx == 0 { + /* special value of x */ + if ix == 0x7ff00000 || ix == 0 || ix == 0x3ff00000 { + /* x is +-0,+-inf,+-1 */ + let mut z: f64 = ax; + + if hy < 0 { + /* z = (1/|x|) */ + z = 1.0 / z; + } + + if hx < 0 { + if ((ix - 0x3ff00000) | yisint) == 0 { + z = (z - z) / (z - z); /* (-1)**non-int is NaN */ + } else if yisint == 1 { + z = -z; /* (x<0)**odd = -(|x|**odd) */ + } + } + + return z; + } + } + + let mut s: f64 = 1.0; /* sign of result */ + if hx < 0 { + if yisint == 0 { + /* (x<0)**(non-int) is NaN */ + return (x - x) / (x - x); + } + + if yisint == 1 { + /* (x<0)**(odd int) */ + s = -1.0; + } + } + + /* |y| is HUGE */ + if iy > 0x41e00000 { + /* if |y| > 2**31 */ + if iy > 0x43f00000 { + /* if |y| > 2**64, must o/uflow */ + if ix <= 0x3fefffff { + return if hy < 0 { HUGE * HUGE } else { TINY * TINY }; + } + + if ix >= 0x3ff00000 { + return if hy > 0 { HUGE * HUGE } else { TINY * TINY }; + } + } + + /* over/underflow if x is not close to one */ + if ix < 0x3fefffff { + return if hy < 0 { + s * HUGE * HUGE + } else { + s * TINY * TINY + }; + } + if ix > 0x3ff00000 { + return if hy > 0 { + s * HUGE * HUGE + } else { + s * TINY * TINY + }; + } + + /* now |1-x| is TINY <= 2**-20, suffice to compute + log(x) by x-x^2/2+x^3/3-x^4/4 */ + let t: f64 = ax - 1.0; /* t has 20 trailing zeros */ + let w: f64 = (t * t) * (0.5 - t * (0.3333333333333333333333 - t * 0.25)); + let u: f64 = IVLN2_H * t; /* ivln2_h has 21 sig. bits */ + let v: f64 = t * IVLN2_L - w * IVLN2; + t1 = with_set_low_word(u + v, 0); + t2 = v - (t1 - u); + } else { + // double ss,s2,s_h,s_l,t_h,t_l; + let mut n: i32 = 0; + + if ix < 0x00100000 { + /* take care subnormal number */ + ax *= TWO53; + n -= 53; + ix = get_high_word(ax) as i32; + } + + n += (ix >> 20) - 0x3ff; + j = ix & 0x000fffff; + + /* determine interval */ + let k: i32; + ix = j | 0x3ff00000; /* normalize ix */ + if j <= 0x3988E { + /* |x|> 1) | 0x20000000) + 0x00080000 + ((k as u32) << 18), + ); + let t_l: f64 = ax - (t_h - i!(BP, k as usize)); + let s_l: f64 = v * ((u - s_h * t_h) - s_h * t_l); + + /* compute log(ax) */ + let s2: f64 = ss * ss; + let mut r: f64 = s2 * s2 * (L1 + s2 * (L2 + s2 * (L3 + s2 * (L4 + s2 * (L5 + s2 * L6))))); + r += s_l * (s_h + ss); + let s2: f64 = s_h * s_h; + let t_h: f64 = with_set_low_word(3.0 + s2 + r, 0); + let t_l: f64 = r - ((t_h - 3.0) - s2); + + /* u+v = ss*(1+...) */ + let u: f64 = s_h * t_h; + let v: f64 = s_l * t_h + t_l * ss; + + /* 2/(3log2)*(ss+...) */ + let p_h: f64 = with_set_low_word(u + v, 0); + let p_l = v - (p_h - u); + let z_h: f64 = CP_H * p_h; /* cp_h+cp_l = 2/(3*log2) */ + let z_l: f64 = CP_L * p_h + p_l * CP + i!(DP_L, k as usize); + + /* log2(ax) = (ss+..)*2/(3*log2) = n + dp_h + z_h + z_l */ + let t: f64 = n as f64; + t1 = with_set_low_word(((z_h + z_l) + i!(DP_H, k as usize)) + t, 0); + t2 = z_l - (((t1 - t) - i!(DP_H, k as usize)) - z_h); + } + + /* split up y into y1+y2 and compute (y1+y2)*(t1+t2) */ + let y1: f64 = with_set_low_word(y, 0); + let p_l: f64 = (y - y1) * t1 + y * t2; + let mut p_h: f64 = y1 * t1; + let z: f64 = p_l + p_h; + let mut j: i32 = (z.to_bits() >> 32) as i32; + let i: i32 = z.to_bits() as i32; + // let (j, i): (i32, i32) = ((z.to_bits() >> 32) as i32, z.to_bits() as i32); + + if j >= 0x40900000 { + /* z >= 1024 */ + if (j - 0x40900000) | i != 0 { + /* if z > 1024 */ + return s * HUGE * HUGE; /* overflow */ + } + + if p_l + OVT > z - p_h { + return s * HUGE * HUGE; /* overflow */ + } + } else if (j & 0x7fffffff) >= 0x4090cc00 { + /* z <= -1075 */ + // FIXME: instead of abs(j) use unsigned j + + if (((j as u32) - 0xc090cc00) | (i as u32)) != 0 { + /* z < -1075 */ + return s * TINY * TINY; /* underflow */ + } + + if p_l <= z - p_h { + return s * TINY * TINY; /* underflow */ + } + } + + /* compute 2**(p_h+p_l) */ + let i: i32 = j & 0x7fffffff_i32; + k = (i >> 20) - 0x3ff; + let mut n: i32 = 0; + + if i > 0x3fe00000 { + /* if |z| > 0.5, set n = [z+0.5] */ + n = j + (0x00100000 >> (k + 1)); + k = ((n & 0x7fffffff) >> 20) - 0x3ff; /* new k for n */ + let t: f64 = with_set_high_word(0.0, (n & !(0x000fffff >> k)) as u32); + n = ((n & 0x000fffff) | 0x00100000) >> (20 - k); + if j < 0 { + n = -n; + } + p_h -= t; + } + + let t: f64 = with_set_low_word(p_l + p_h, 0); + let u: f64 = t * LG2_H; + let v: f64 = (p_l - (t - p_h)) * LG2 + t * LG2_L; + let mut z: f64 = u + v; + let w: f64 = v - (z - u); + let t: f64 = z * z; + let t1: f64 = z - t * (P1 + t * (P2 + t * (P3 + t * (P4 + t * P5)))); + let r: f64 = (z * t1) / (t1 - 2.0) - (w + z * w); + z = 1.0 - (r - z); + j = get_high_word(z) as i32; + j += n << 20; + + if (j >> 20) <= 0 { + /* subnormal output */ + z = scalbn(z, n); + } else { + z = with_set_high_word(z, j as u32); + } + + s * z +} + +#[cfg(test)] +mod tests { + extern crate core; + + use self::core::f64::consts::{E, PI}; + use super::pow; + + const POS_ZERO: &[f64] = &[0.0]; + const NEG_ZERO: &[f64] = &[-0.0]; + const POS_ONE: &[f64] = &[1.0]; + const NEG_ONE: &[f64] = &[-1.0]; + const POS_FLOATS: &[f64] = &[99.0 / 70.0, E, PI]; + const NEG_FLOATS: &[f64] = &[-99.0 / 70.0, -E, -PI]; + const POS_SMALL_FLOATS: &[f64] = &[(1.0 / 2.0), f64::MIN_POSITIVE, f64::EPSILON]; + const NEG_SMALL_FLOATS: &[f64] = &[-(1.0 / 2.0), -f64::MIN_POSITIVE, -f64::EPSILON]; + const POS_EVENS: &[f64] = &[2.0, 6.0, 8.0, 10.0, 22.0, 100.0, f64::MAX]; + const NEG_EVENS: &[f64] = &[f64::MIN, -100.0, -22.0, -10.0, -8.0, -6.0, -2.0]; + const POS_ODDS: &[f64] = &[3.0, 7.0]; + const NEG_ODDS: &[f64] = &[-7.0, -3.0]; + const NANS: &[f64] = &[f64::NAN]; + const POS_INF: &[f64] = &[f64::INFINITY]; + const NEG_INF: &[f64] = &[f64::NEG_INFINITY]; + + const ALL: &[&[f64]] = &[ + POS_ZERO, + NEG_ZERO, + NANS, + NEG_SMALL_FLOATS, + POS_SMALL_FLOATS, + NEG_FLOATS, + POS_FLOATS, + NEG_EVENS, + POS_EVENS, + NEG_ODDS, + POS_ODDS, + NEG_INF, + POS_INF, + NEG_ONE, + POS_ONE, + ]; + const POS: &[&[f64]] = &[POS_ZERO, POS_ODDS, POS_ONE, POS_FLOATS, POS_EVENS, POS_INF]; + const NEG: &[&[f64]] = &[NEG_ZERO, NEG_ODDS, NEG_ONE, NEG_FLOATS, NEG_EVENS, NEG_INF]; + + fn pow_test(base: f64, exponent: f64, expected: f64) { + let res = pow(base, exponent); + assert!( + if expected.is_nan() { + res.is_nan() + } else { + pow(base, exponent) == expected + }, + "{base} ** {exponent} was {res} instead of {expected}", + ); + } + + fn test_sets_as_base(sets: &[&[f64]], exponent: f64, expected: f64) { + sets.iter() + .for_each(|s| s.iter().for_each(|val| pow_test(*val, exponent, expected))); + } + + fn test_sets_as_exponent(base: f64, sets: &[&[f64]], expected: f64) { + sets.iter() + .for_each(|s| s.iter().for_each(|val| pow_test(base, *val, expected))); + } + + fn test_sets(sets: &[&[f64]], computed: &dyn Fn(f64) -> f64, expected: &dyn Fn(f64) -> f64) { + sets.iter().for_each(|s| { + s.iter().for_each(|val| { + let exp = expected(*val); + let res = computed(*val); + + #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))] + let exp = force_eval!(exp); + #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))] + let res = force_eval!(res); + assert!( + if exp.is_nan() { + res.is_nan() + } else { + exp == res + }, + "test for {val} was {res} instead of {exp}", + ); + }) + }); + } + + #[test] + fn zero_as_exponent() { + test_sets_as_base(ALL, 0.0, 1.0); + test_sets_as_base(ALL, -0.0, 1.0); + } + + #[test] + fn one_as_base() { + test_sets_as_exponent(1.0, ALL, 1.0); + } + + #[test] + fn nan_inputs() { + // NAN as the base: + // (f64::NAN ^ anything *but 0* should be f64::NAN) + test_sets_as_exponent(f64::NAN, &ALL[2..], f64::NAN); + + // f64::NAN as the exponent: + // (anything *but 1* ^ f64::NAN should be f64::NAN) + test_sets_as_base(&ALL[..(ALL.len() - 2)], f64::NAN, f64::NAN); + } + + #[test] + fn infinity_as_base() { + // Positive Infinity as the base: + // (+Infinity ^ positive anything but 0 and f64::NAN should be +Infinity) + test_sets_as_exponent(f64::INFINITY, &POS[1..], f64::INFINITY); + + // (+Infinity ^ negative anything except 0 and f64::NAN should be 0.0) + test_sets_as_exponent(f64::INFINITY, &NEG[1..], 0.0); + + // Negative Infinity as the base: + // (-Infinity ^ positive odd ints should be -Infinity) + test_sets_as_exponent(f64::NEG_INFINITY, &[POS_ODDS], f64::NEG_INFINITY); + + // (-Infinity ^ anything but odd ints should be == -0 ^ (-anything)) + // We can lump in pos/neg odd ints here because they don't seem to + // cause panics (div by zero) in release mode (I think). + test_sets(ALL, &|v: f64| pow(f64::NEG_INFINITY, v), &|v: f64| { + pow(-0.0, -v) + }); + } + + #[test] + fn infinity_as_exponent() { + // Positive/Negative base greater than 1: + // (pos/neg > 1 ^ Infinity should be Infinity - note this excludes f64::NAN as the base) + test_sets_as_base(&ALL[5..(ALL.len() - 2)], f64::INFINITY, f64::INFINITY); + + // (pos/neg > 1 ^ -Infinity should be 0.0) + test_sets_as_base(&ALL[5..ALL.len() - 2], f64::NEG_INFINITY, 0.0); + + // Positive/Negative base less than 1: + let base_below_one = &[POS_ZERO, NEG_ZERO, NEG_SMALL_FLOATS, POS_SMALL_FLOATS]; + + // (pos/neg < 1 ^ Infinity should be 0.0 - this also excludes f64::NAN as the base) + test_sets_as_base(base_below_one, f64::INFINITY, 0.0); + + // (pos/neg < 1 ^ -Infinity should be Infinity) + test_sets_as_base(base_below_one, f64::NEG_INFINITY, f64::INFINITY); + + // Positive/Negative 1 as the base: + // (pos/neg 1 ^ Infinity should be 1) + test_sets_as_base(&[NEG_ONE, POS_ONE], f64::INFINITY, 1.0); + + // (pos/neg 1 ^ -Infinity should be 1) + test_sets_as_base(&[NEG_ONE, POS_ONE], f64::NEG_INFINITY, 1.0); + } + + #[test] + fn zero_as_base() { + // Positive Zero as the base: + // (+0 ^ anything positive but 0 and f64::NAN should be +0) + test_sets_as_exponent(0.0, &POS[1..], 0.0); + + // (+0 ^ anything negative but 0 and f64::NAN should be Infinity) + // (this should panic because we're dividing by zero) + test_sets_as_exponent(0.0, &NEG[1..], f64::INFINITY); + + // Negative Zero as the base: + // (-0 ^ anything positive but 0, f64::NAN, and odd ints should be +0) + test_sets_as_exponent(-0.0, &POS[3..], 0.0); + + // (-0 ^ anything negative but 0, f64::NAN, and odd ints should be Infinity) + // (should panic because of divide by zero) + test_sets_as_exponent(-0.0, &NEG[3..], f64::INFINITY); + + // (-0 ^ positive odd ints should be -0) + test_sets_as_exponent(-0.0, &[POS_ODDS], -0.0); + + // (-0 ^ negative odd ints should be -Infinity) + // (should panic because of divide by zero) + test_sets_as_exponent(-0.0, &[NEG_ODDS], f64::NEG_INFINITY); + } + + #[test] + fn special_cases() { + // One as the exponent: + // (anything ^ 1 should be anything - i.e. the base) + test_sets(ALL, &|v: f64| pow(v, 1.0), &|v: f64| v); + + // Negative One as the exponent: + // (anything ^ -1 should be 1/anything) + test_sets(ALL, &|v: f64| pow(v, -1.0), &|v: f64| 1.0 / v); + + // Factoring -1 out: + // (negative anything ^ integer should be (-1 ^ integer) * (positive anything ^ integer)) + [POS_ZERO, NEG_ZERO, POS_ONE, NEG_ONE, POS_EVENS, NEG_EVENS] + .iter() + .for_each(|int_set| { + int_set.iter().for_each(|int| { + test_sets(ALL, &|v: f64| pow(-v, *int), &|v: f64| { + pow(-1.0, *int) * pow(v, *int) + }); + }) + }); + + // Negative base (imaginary results): + // (-anything except 0 and Infinity ^ non-integer should be NAN) + NEG[1..(NEG.len() - 1)].iter().for_each(|set| { + set.iter().for_each(|val| { + test_sets(&ALL[3..7], &|v: f64| pow(*val, v), &|_| f64::NAN); + }) + }); + } + + #[test] + fn normal_cases() { + assert_eq!(pow(2.0, 20.0), (1 << 20) as f64); + assert_eq!(pow(-1.0, 9.0), -1.0); + assert!(pow(-1.0, 2.2).is_nan()); + assert!(pow(-1.0, -1.14).is_nan()); + } +} diff --git a/library/compiler-builtins/libm/src/math/powf.rs b/library/compiler-builtins/libm/src/math/powf.rs new file mode 100644 index 0000000000000..11c7a7cbd94d1 --- /dev/null +++ b/library/compiler-builtins/libm/src/math/powf.rs @@ -0,0 +1,343 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/e_powf.c */ +/* + * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. + */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ + +use core::cmp::Ordering; + +use super::{fabsf, scalbnf, sqrtf}; + +const BP: [f32; 2] = [1.0, 1.5]; +const DP_H: [f32; 2] = [0.0, 5.84960938e-01]; /* 0x3f15c000 */ +const DP_L: [f32; 2] = [0.0, 1.56322085e-06]; /* 0x35d1cfdc */ +const TWO24: f32 = 16777216.0; /* 0x4b800000 */ +const HUGE: f32 = 1.0e30; +const TINY: f32 = 1.0e-30; +const L1: f32 = 6.0000002384e-01; /* 0x3f19999a */ +const L2: f32 = 4.2857143283e-01; /* 0x3edb6db7 */ +const L3: f32 = 3.3333334327e-01; /* 0x3eaaaaab */ +const L4: f32 = 2.7272811532e-01; /* 0x3e8ba305 */ +const L5: f32 = 2.3066075146e-01; /* 0x3e6c3255 */ +const L6: f32 = 2.0697501302e-01; /* 0x3e53f142 */ +const P1: f32 = 1.6666667163e-01; /* 0x3e2aaaab */ +const P2: f32 = -2.7777778450e-03; /* 0xbb360b61 */ +const P3: f32 = 6.6137559770e-05; /* 0x388ab355 */ +const P4: f32 = -1.6533901999e-06; /* 0xb5ddea0e */ +const P5: f32 = 4.1381369442e-08; /* 0x3331bb4c */ +const LG2: f32 = 6.9314718246e-01; /* 0x3f317218 */ +const LG2_H: f32 = 6.93145752e-01; /* 0x3f317200 */ +const LG2_L: f32 = 1.42860654e-06; /* 0x35bfbe8c */ +const OVT: f32 = 4.2995665694e-08; /* -(128-log2(ovfl+.5ulp)) */ +const CP: f32 = 9.6179670095e-01; /* 0x3f76384f =2/(3ln2) */ +const CP_H: f32 = 9.6191406250e-01; /* 0x3f764000 =12b cp */ +const CP_L: f32 = -1.1736857402e-04; /* 0xb8f623c6 =tail of cp_h */ +const IVLN2: f32 = 1.4426950216e+00; +const IVLN2_H: f32 = 1.4426879883e+00; +const IVLN2_L: f32 = 7.0526075433e-06; + +/// Returns `x` to the power of `y` (f32). +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn powf(x: f32, y: f32) -> f32 { + let mut z: f32; + let mut ax: f32; + let z_h: f32; + let z_l: f32; + let mut p_h: f32; + let mut p_l: f32; + let y1: f32; + let mut t1: f32; + let t2: f32; + let mut r: f32; + let s: f32; + let mut sn: f32; + let mut t: f32; + let mut u: f32; + let mut v: f32; + let mut w: f32; + let i: i32; + let mut j: i32; + let mut k: i32; + let mut yisint: i32; + let mut n: i32; + let hx: i32; + let hy: i32; + let mut ix: i32; + let iy: i32; + let mut is: i32; + + hx = x.to_bits() as i32; + hy = y.to_bits() as i32; + + ix = hx & 0x7fffffff; + iy = hy & 0x7fffffff; + + /* x**0 = 1, even if x is NaN */ + if iy == 0 { + return 1.0; + } + + /* 1**y = 1, even if y is NaN */ + if hx == 0x3f800000 { + return 1.0; + } + + /* NaN if either arg is NaN */ + if ix > 0x7f800000 || iy > 0x7f800000 { + return x + y; + } + + /* determine if y is an odd int when x < 0 + * yisint = 0 ... y is not an integer + * yisint = 1 ... y is an odd int + * yisint = 2 ... y is an even int + */ + yisint = 0; + if hx < 0 { + if iy >= 0x4b800000 { + yisint = 2; /* even integer y */ + } else if iy >= 0x3f800000 { + k = (iy >> 23) - 0x7f; /* exponent */ + j = iy >> (23 - k); + if (j << (23 - k)) == iy { + yisint = 2 - (j & 1); + } + } + } + + /* special value of y */ + if iy == 0x7f800000 { + /* y is +-inf */ + match ix.cmp(&0x3f800000) { + /* (-1)**+-inf is 1 */ + Ordering::Equal => return 1.0, + /* (|x|>1)**+-inf = inf,0 */ + Ordering::Greater => return if hy >= 0 { y } else { 0.0 }, + /* (|x|<1)**+-inf = 0,inf */ + Ordering::Less => return if hy >= 0 { 0.0 } else { -y }, + } + } + if iy == 0x3f800000 { + /* y is +-1 */ + return if hy >= 0 { x } else { 1.0 / x }; + } + + if hy == 0x40000000 { + /* y is 2 */ + return x * x; + } + + if hy == 0x3f000000 + /* y is 0.5 */ + && hx >= 0 + { + /* x >= +0 */ + return sqrtf(x); + } + + ax = fabsf(x); + /* special value of x */ + if ix == 0x7f800000 || ix == 0 || ix == 0x3f800000 { + /* x is +-0,+-inf,+-1 */ + z = ax; + if hy < 0 { + /* z = (1/|x|) */ + z = 1.0 / z; + } + + if hx < 0 { + if ((ix - 0x3f800000) | yisint) == 0 { + z = (z - z) / (z - z); /* (-1)**non-int is NaN */ + } else if yisint == 1 { + z = -z; /* (x<0)**odd = -(|x|**odd) */ + } + } + return z; + } + + sn = 1.0; /* sign of result */ + if hx < 0 { + if yisint == 0 { + /* (x<0)**(non-int) is NaN */ + return (x - x) / (x - x); + } + + if yisint == 1 { + /* (x<0)**(odd int) */ + sn = -1.0; + } + } + + /* |y| is HUGE */ + if iy > 0x4d000000 { + /* if |y| > 2**27 */ + /* over/underflow if x is not close to one */ + if ix < 0x3f7ffff8 { + return if hy < 0 { + sn * HUGE * HUGE + } else { + sn * TINY * TINY + }; + } + + if ix > 0x3f800007 { + return if hy > 0 { + sn * HUGE * HUGE + } else { + sn * TINY * TINY + }; + } + + /* now |1-x| is TINY <= 2**-20, suffice to compute + log(x) by x-x^2/2+x^3/3-x^4/4 */ + t = ax - 1.; /* t has 20 trailing zeros */ + w = (t * t) * (0.5 - t * (0.333333333333 - t * 0.25)); + u = IVLN2_H * t; /* IVLN2_H has 16 sig. bits */ + v = t * IVLN2_L - w * IVLN2; + t1 = u + v; + is = t1.to_bits() as i32; + t1 = f32::from_bits(is as u32 & 0xfffff000); + t2 = v - (t1 - u); + } else { + let mut s2: f32; + let mut s_h: f32; + let s_l: f32; + let mut t_h: f32; + let mut t_l: f32; + + n = 0; + /* take care subnormal number */ + if ix < 0x00800000 { + ax *= TWO24; + n -= 24; + ix = ax.to_bits() as i32; + } + n += ((ix) >> 23) - 0x7f; + j = ix & 0x007fffff; + /* determine interval */ + ix = j | 0x3f800000; /* normalize ix */ + if j <= 0x1cc471 { + /* |x|> 1) & 0xfffff000) | 0x20000000) as i32; + t_h = f32::from_bits(is as u32 + 0x00400000 + ((k as u32) << 21)); + t_l = ax - (t_h - i!(BP, k as usize)); + s_l = v * ((u - s_h * t_h) - s_h * t_l); + /* compute log(ax) */ + s2 = s * s; + r = s2 * s2 * (L1 + s2 * (L2 + s2 * (L3 + s2 * (L4 + s2 * (L5 + s2 * L6))))); + r += s_l * (s_h + s); + s2 = s_h * s_h; + t_h = 3.0 + s2 + r; + is = t_h.to_bits() as i32; + t_h = f32::from_bits(is as u32 & 0xfffff000); + t_l = r - ((t_h - 3.0) - s2); + /* u+v = s*(1+...) */ + u = s_h * t_h; + v = s_l * t_h + t_l * s; + /* 2/(3log2)*(s+...) */ + p_h = u + v; + is = p_h.to_bits() as i32; + p_h = f32::from_bits(is as u32 & 0xfffff000); + p_l = v - (p_h - u); + z_h = CP_H * p_h; /* cp_h+cp_l = 2/(3*log2) */ + z_l = CP_L * p_h + p_l * CP + i!(DP_L, k as usize); + /* log2(ax) = (s+..)*2/(3*log2) = n + dp_h + z_h + z_l */ + t = n as f32; + t1 = ((z_h + z_l) + i!(DP_H, k as usize)) + t; + is = t1.to_bits() as i32; + t1 = f32::from_bits(is as u32 & 0xfffff000); + t2 = z_l - (((t1 - t) - i!(DP_H, k as usize)) - z_h); + }; + + /* split up y into y1+y2 and compute (y1+y2)*(t1+t2) */ + is = y.to_bits() as i32; + y1 = f32::from_bits(is as u32 & 0xfffff000); + p_l = (y - y1) * t1 + y * t2; + p_h = y1 * t1; + z = p_l + p_h; + j = z.to_bits() as i32; + if j > 0x43000000 { + /* if z > 128 */ + return sn * HUGE * HUGE; /* overflow */ + } else if j == 0x43000000 { + /* if z == 128 */ + if p_l + OVT > z - p_h { + return sn * HUGE * HUGE; /* overflow */ + } + } else if (j & 0x7fffffff) > 0x43160000 { + /* z < -150 */ + // FIXME: check should be (uint32_t)j > 0xc3160000 + return sn * TINY * TINY; /* underflow */ + } else if j as u32 == 0xc3160000 + /* z == -150 */ + && p_l <= z - p_h + { + return sn * TINY * TINY; /* underflow */ + } + + /* + * compute 2**(p_h+p_l) + */ + i = j & 0x7fffffff; + k = (i >> 23) - 0x7f; + n = 0; + if i > 0x3f000000 { + /* if |z| > 0.5, set n = [z+0.5] */ + n = j + (0x00800000 >> (k + 1)); + k = ((n & 0x7fffffff) >> 23) - 0x7f; /* new k for n */ + t = f32::from_bits(n as u32 & !(0x007fffff >> k)); + n = ((n & 0x007fffff) | 0x00800000) >> (23 - k); + if j < 0 { + n = -n; + } + p_h -= t; + } + t = p_l + p_h; + is = t.to_bits() as i32; + t = f32::from_bits(is as u32 & 0xffff8000); + u = t * LG2_H; + v = (p_l - (t - p_h)) * LG2 + t * LG2_L; + z = u + v; + w = v - (z - u); + t = z * z; + t1 = z - t * (P1 + t * (P2 + t * (P3 + t * (P4 + t * P5)))); + r = (z * t1) / (t1 - 2.0) - (w + z * w); + z = 1.0 - (r - z); + j = z.to_bits() as i32; + j += n << 23; + if (j >> 23) <= 0 { + /* subnormal output */ + z = scalbnf(z, n); + } else { + z = f32::from_bits(j as u32); + } + sn * z +} diff --git a/library/compiler-builtins/libm/src/math/rem_pio2.rs b/library/compiler-builtins/libm/src/math/rem_pio2.rs new file mode 100644 index 0000000000000..d677fd9dcb308 --- /dev/null +++ b/library/compiler-builtins/libm/src/math/rem_pio2.rs @@ -0,0 +1,235 @@ +// origin: FreeBSD /usr/src/lib/msun/src/e_rem_pio2.c +// +// ==================================================== +// Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. +// +// Developed at SunPro, a Sun Microsystems, Inc. business. +// Permission to use, copy, modify, and distribute this +// software is freely granted, provided that this notice +// is preserved. +// ==================================================== +// +// Optimized by Bruce D. Evans. */ +use super::rem_pio2_large; + +// #if FLT_EVAL_METHOD==0 || FLT_EVAL_METHOD==1 +// #define EPS DBL_EPSILON +const EPS: f64 = 2.2204460492503131e-16; +// #elif FLT_EVAL_METHOD==2 +// #define EPS LDBL_EPSILON +// #endif + +// TODO: Support FLT_EVAL_METHOD? + +const TO_INT: f64 = 1.5 / EPS; +/// 53 bits of 2/pi +const INV_PIO2: f64 = 6.36619772367581382433e-01; /* 0x3FE45F30, 0x6DC9C883 */ +/// first 33 bits of pi/2 +const PIO2_1: f64 = 1.57079632673412561417e+00; /* 0x3FF921FB, 0x54400000 */ +/// pi/2 - PIO2_1 +const PIO2_1T: f64 = 6.07710050650619224932e-11; /* 0x3DD0B461, 0x1A626331 */ +/// second 33 bits of pi/2 +const PIO2_2: f64 = 6.07710050630396597660e-11; /* 0x3DD0B461, 0x1A600000 */ +/// pi/2 - (PIO2_1+PIO2_2) +const PIO2_2T: f64 = 2.02226624879595063154e-21; /* 0x3BA3198A, 0x2E037073 */ +/// third 33 bits of pi/2 +const PIO2_3: f64 = 2.02226624871116645580e-21; /* 0x3BA3198A, 0x2E000000 */ +/// pi/2 - (PIO2_1+PIO2_2+PIO2_3) +const PIO2_3T: f64 = 8.47842766036889956997e-32; /* 0x397B839A, 0x252049C1 */ + +// return the remainder of x rem pi/2 in y[0]+y[1] +// use rem_pio2_large() for large x +// +// caller must handle the case when reduction is not needed: |x| ~<= pi/4 */ +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub(crate) fn rem_pio2(x: f64) -> (i32, f64, f64) { + let x1p24 = f64::from_bits(0x4170000000000000); + + let sign = (f64::to_bits(x) >> 63) as i32; + let ix = (f64::to_bits(x) >> 32) as u32 & 0x7fffffff; + + fn medium(x: f64, ix: u32) -> (i32, f64, f64) { + /* rint(x/(pi/2)), Assume round-to-nearest. */ + let tmp = x * INV_PIO2 + TO_INT; + // force rounding of tmp to it's storage format on x87 to avoid + // excess precision issues. + #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))] + let tmp = force_eval!(tmp); + let f_n = tmp - TO_INT; + let n = f_n as i32; + let mut r = x - f_n * PIO2_1; + let mut w = f_n * PIO2_1T; /* 1st round, good to 85 bits */ + let mut y0 = r - w; + let ui = f64::to_bits(y0); + let ey = (ui >> 52) as i32 & 0x7ff; + let ex = (ix >> 20) as i32; + if ex - ey > 16 { + /* 2nd round, good to 118 bits */ + let t = r; + w = f_n * PIO2_2; + r = t - w; + w = f_n * PIO2_2T - ((t - r) - w); + y0 = r - w; + let ey = (f64::to_bits(y0) >> 52) as i32 & 0x7ff; + if ex - ey > 49 { + /* 3rd round, good to 151 bits, covers all cases */ + let t = r; + w = f_n * PIO2_3; + r = t - w; + w = f_n * PIO2_3T - ((t - r) - w); + y0 = r - w; + } + } + let y1 = (r - y0) - w; + (n, y0, y1) + } + + if ix <= 0x400f6a7a { + /* |x| ~<= 5pi/4 */ + if (ix & 0xfffff) == 0x921fb { + /* |x| ~= pi/2 or 2pi/2 */ + return medium(x, ix); /* cancellation -- use medium case */ + } + if ix <= 0x4002d97c { + /* |x| ~<= 3pi/4 */ + if sign == 0 { + let z = x - PIO2_1; /* one round good to 85 bits */ + let y0 = z - PIO2_1T; + let y1 = (z - y0) - PIO2_1T; + return (1, y0, y1); + } else { + let z = x + PIO2_1; + let y0 = z + PIO2_1T; + let y1 = (z - y0) + PIO2_1T; + return (-1, y0, y1); + } + } else if sign == 0 { + let z = x - 2.0 * PIO2_1; + let y0 = z - 2.0 * PIO2_1T; + let y1 = (z - y0) - 2.0 * PIO2_1T; + return (2, y0, y1); + } else { + let z = x + 2.0 * PIO2_1; + let y0 = z + 2.0 * PIO2_1T; + let y1 = (z - y0) + 2.0 * PIO2_1T; + return (-2, y0, y1); + } + } + if ix <= 0x401c463b { + /* |x| ~<= 9pi/4 */ + if ix <= 0x4015fdbc { + /* |x| ~<= 7pi/4 */ + if ix == 0x4012d97c { + /* |x| ~= 3pi/2 */ + return medium(x, ix); + } + if sign == 0 { + let z = x - 3.0 * PIO2_1; + let y0 = z - 3.0 * PIO2_1T; + let y1 = (z - y0) - 3.0 * PIO2_1T; + return (3, y0, y1); + } else { + let z = x + 3.0 * PIO2_1; + let y0 = z + 3.0 * PIO2_1T; + let y1 = (z - y0) + 3.0 * PIO2_1T; + return (-3, y0, y1); + } + } else { + if ix == 0x401921fb { + /* |x| ~= 4pi/2 */ + return medium(x, ix); + } + if sign == 0 { + let z = x - 4.0 * PIO2_1; + let y0 = z - 4.0 * PIO2_1T; + let y1 = (z - y0) - 4.0 * PIO2_1T; + return (4, y0, y1); + } else { + let z = x + 4.0 * PIO2_1; + let y0 = z + 4.0 * PIO2_1T; + let y1 = (z - y0) + 4.0 * PIO2_1T; + return (-4, y0, y1); + } + } + } + if ix < 0x413921fb { + /* |x| ~< 2^20*(pi/2), medium size */ + return medium(x, ix); + } + /* + * all other (large) arguments + */ + if ix >= 0x7ff00000 { + /* x is inf or NaN */ + let y0 = x - x; + let y1 = y0; + return (0, y0, y1); + } + /* set z = scalbn(|x|,-ilogb(x)+23) */ + let mut ui = f64::to_bits(x); + ui &= (!1) >> 12; + ui |= (0x3ff + 23) << 52; + let mut z = f64::from_bits(ui); + let mut tx = [0.0; 3]; + for i in 0..2 { + i!(tx,i, =, z as i32 as f64); + z = (z - i!(tx, i)) * x1p24; + } + i!(tx,2, =, z); + /* skip zero terms, first term is non-zero */ + let mut i = 2; + while i != 0 && i!(tx, i) == 0.0 { + i -= 1; + } + let mut ty = [0.0; 3]; + let n = rem_pio2_large(&tx[..=i], &mut ty, ((ix as i32) >> 20) - (0x3ff + 23), 1); + if sign != 0 { + return (-n, -i!(ty, 0), -i!(ty, 1)); + } + (n, i!(ty, 0), i!(ty, 1)) +} + +#[cfg(test)] +mod tests { + use super::rem_pio2; + + #[test] + // FIXME(correctness): inaccurate results on i586 + #[cfg_attr(all(target_arch = "x86", not(target_feature = "sse")), ignore)] + fn test_near_pi() { + let arg = 3.141592025756836; + let arg = force_eval!(arg); + assert_eq!( + rem_pio2(arg), + (2, -6.278329573009626e-7, -2.1125998133974653e-23) + ); + let arg = 3.141592033207416; + let arg = force_eval!(arg); + assert_eq!( + rem_pio2(arg), + (2, -6.20382377148128e-7, -2.1125998133974653e-23) + ); + let arg = 3.141592144966125; + let arg = force_eval!(arg); + assert_eq!( + rem_pio2(arg), + (2, -5.086236681942706e-7, -2.1125998133974653e-23) + ); + let arg = 3.141592979431152; + let arg = force_eval!(arg); + assert_eq!( + rem_pio2(arg), + (2, 3.2584135866119817e-7, -2.1125998133974653e-23) + ); + } + + #[test] + fn test_overflow_b9b847() { + let _ = rem_pio2(-3054214.5490637687); + } + + #[test] + fn test_overflow_4747b9() { + let _ = rem_pio2(917340800458.2274); + } +} diff --git a/library/compiler-builtins/libm/src/math/rem_pio2_large.rs b/library/compiler-builtins/libm/src/math/rem_pio2_large.rs new file mode 100644 index 0000000000000..6d679bbe98c48 --- /dev/null +++ b/library/compiler-builtins/libm/src/math/rem_pio2_large.rs @@ -0,0 +1,468 @@ +#![allow(unused_unsafe)] +/* origin: FreeBSD /usr/src/lib/msun/src/k_rem_pio2.c */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunSoft, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ + +use super::{floor, scalbn}; + +// initial value for jk +const INIT_JK: [usize; 4] = [3, 4, 4, 6]; + +// Table of constants for 2/pi, 396 Hex digits (476 decimal) of 2/pi +// +// integer array, contains the (24*i)-th to (24*i+23)-th +// bit of 2/pi after binary point. The corresponding +// floating value is +// +// ipio2[i] * 2^(-24(i+1)). +// +// NB: This table must have at least (e0-3)/24 + jk terms. +// For quad precision (e0 <= 16360, jk = 6), this is 686. +#[cfg(any(target_pointer_width = "32", target_pointer_width = "16"))] +const IPIO2: [i32; 66] = [ + 0xA2F983, 0x6E4E44, 0x1529FC, 0x2757D1, 0xF534DD, 0xC0DB62, 0x95993C, 0x439041, 0xFE5163, + 0xABDEBB, 0xC561B7, 0x246E3A, 0x424DD2, 0xE00649, 0x2EEA09, 0xD1921C, 0xFE1DEB, 0x1CB129, + 0xA73EE8, 0x8235F5, 0x2EBB44, 0x84E99C, 0x7026B4, 0x5F7E41, 0x3991D6, 0x398353, 0x39F49C, + 0x845F8B, 0xBDF928, 0x3B1FF8, 0x97FFDE, 0x05980F, 0xEF2F11, 0x8B5A0A, 0x6D1F6D, 0x367ECF, + 0x27CB09, 0xB74F46, 0x3F669E, 0x5FEA2D, 0x7527BA, 0xC7EBE5, 0xF17B3D, 0x0739F7, 0x8A5292, + 0xEA6BFB, 0x5FB11F, 0x8D5D08, 0x560330, 0x46FC7B, 0x6BABF0, 0xCFBC20, 0x9AF436, 0x1DA9E3, + 0x91615E, 0xE61B08, 0x659985, 0x5F14A0, 0x68408D, 0xFFD880, 0x4D7327, 0x310606, 0x1556CA, + 0x73A8C9, 0x60E27B, 0xC08C6B, +]; + +#[cfg(target_pointer_width = "64")] +const IPIO2: [i32; 690] = [ + 0xA2F983, 0x6E4E44, 0x1529FC, 0x2757D1, 0xF534DD, 0xC0DB62, 0x95993C, 0x439041, 0xFE5163, + 0xABDEBB, 0xC561B7, 0x246E3A, 0x424DD2, 0xE00649, 0x2EEA09, 0xD1921C, 0xFE1DEB, 0x1CB129, + 0xA73EE8, 0x8235F5, 0x2EBB44, 0x84E99C, 0x7026B4, 0x5F7E41, 0x3991D6, 0x398353, 0x39F49C, + 0x845F8B, 0xBDF928, 0x3B1FF8, 0x97FFDE, 0x05980F, 0xEF2F11, 0x8B5A0A, 0x6D1F6D, 0x367ECF, + 0x27CB09, 0xB74F46, 0x3F669E, 0x5FEA2D, 0x7527BA, 0xC7EBE5, 0xF17B3D, 0x0739F7, 0x8A5292, + 0xEA6BFB, 0x5FB11F, 0x8D5D08, 0x560330, 0x46FC7B, 0x6BABF0, 0xCFBC20, 0x9AF436, 0x1DA9E3, + 0x91615E, 0xE61B08, 0x659985, 0x5F14A0, 0x68408D, 0xFFD880, 0x4D7327, 0x310606, 0x1556CA, + 0x73A8C9, 0x60E27B, 0xC08C6B, 0x47C419, 0xC367CD, 0xDCE809, 0x2A8359, 0xC4768B, 0x961CA6, + 0xDDAF44, 0xD15719, 0x053EA5, 0xFF0705, 0x3F7E33, 0xE832C2, 0xDE4F98, 0x327DBB, 0xC33D26, + 0xEF6B1E, 0x5EF89F, 0x3A1F35, 0xCAF27F, 0x1D87F1, 0x21907C, 0x7C246A, 0xFA6ED5, 0x772D30, + 0x433B15, 0xC614B5, 0x9D19C3, 0xC2C4AD, 0x414D2C, 0x5D000C, 0x467D86, 0x2D71E3, 0x9AC69B, + 0x006233, 0x7CD2B4, 0x97A7B4, 0xD55537, 0xF63ED7, 0x1810A3, 0xFC764D, 0x2A9D64, 0xABD770, + 0xF87C63, 0x57B07A, 0xE71517, 0x5649C0, 0xD9D63B, 0x3884A7, 0xCB2324, 0x778AD6, 0x23545A, + 0xB91F00, 0x1B0AF1, 0xDFCE19, 0xFF319F, 0x6A1E66, 0x615799, 0x47FBAC, 0xD87F7E, 0xB76522, + 0x89E832, 0x60BFE6, 0xCDC4EF, 0x09366C, 0xD43F5D, 0xD7DE16, 0xDE3B58, 0x929BDE, 0x2822D2, + 0xE88628, 0x4D58E2, 0x32CAC6, 0x16E308, 0xCB7DE0, 0x50C017, 0xA71DF3, 0x5BE018, 0x34132E, + 0x621283, 0x014883, 0x5B8EF5, 0x7FB0AD, 0xF2E91E, 0x434A48, 0xD36710, 0xD8DDAA, 0x425FAE, + 0xCE616A, 0xA4280A, 0xB499D3, 0xF2A606, 0x7F775C, 0x83C2A3, 0x883C61, 0x78738A, 0x5A8CAF, + 0xBDD76F, 0x63A62D, 0xCBBFF4, 0xEF818D, 0x67C126, 0x45CA55, 0x36D9CA, 0xD2A828, 0x8D61C2, + 0x77C912, 0x142604, 0x9B4612, 0xC459C4, 0x44C5C8, 0x91B24D, 0xF31700, 0xAD43D4, 0xE54929, + 0x10D5FD, 0xFCBE00, 0xCC941E, 0xEECE70, 0xF53E13, 0x80F1EC, 0xC3E7B3, 0x28F8C7, 0x940593, + 0x3E71C1, 0xB3092E, 0xF3450B, 0x9C1288, 0x7B20AB, 0x9FB52E, 0xC29247, 0x2F327B, 0x6D550C, + 0x90A772, 0x1FE76B, 0x96CB31, 0x4A1679, 0xE27941, 0x89DFF4, 0x9794E8, 0x84E6E2, 0x973199, + 0x6BED88, 0x365F5F, 0x0EFDBB, 0xB49A48, 0x6CA467, 0x427271, 0x325D8D, 0xB8159F, 0x09E5BC, + 0x25318D, 0x3974F7, 0x1C0530, 0x010C0D, 0x68084B, 0x58EE2C, 0x90AA47, 0x02E774, 0x24D6BD, + 0xA67DF7, 0x72486E, 0xEF169F, 0xA6948E, 0xF691B4, 0x5153D1, 0xF20ACF, 0x339820, 0x7E4BF5, + 0x6863B2, 0x5F3EDD, 0x035D40, 0x7F8985, 0x295255, 0xC06437, 0x10D86D, 0x324832, 0x754C5B, + 0xD4714E, 0x6E5445, 0xC1090B, 0x69F52A, 0xD56614, 0x9D0727, 0x50045D, 0xDB3BB4, 0xC576EA, + 0x17F987, 0x7D6B49, 0xBA271D, 0x296996, 0xACCCC6, 0x5414AD, 0x6AE290, 0x89D988, 0x50722C, + 0xBEA404, 0x940777, 0x7030F3, 0x27FC00, 0xA871EA, 0x49C266, 0x3DE064, 0x83DD97, 0x973FA3, + 0xFD9443, 0x8C860D, 0xDE4131, 0x9D3992, 0x8C70DD, 0xE7B717, 0x3BDF08, 0x2B3715, 0xA0805C, + 0x93805A, 0x921110, 0xD8E80F, 0xAF806C, 0x4BFFDB, 0x0F9038, 0x761859, 0x15A562, 0xBBCB61, + 0xB989C7, 0xBD4010, 0x04F2D2, 0x277549, 0xF6B6EB, 0xBB22DB, 0xAA140A, 0x2F2689, 0x768364, + 0x333B09, 0x1A940E, 0xAA3A51, 0xC2A31D, 0xAEEDAF, 0x12265C, 0x4DC26D, 0x9C7A2D, 0x9756C0, + 0x833F03, 0xF6F009, 0x8C402B, 0x99316D, 0x07B439, 0x15200C, 0x5BC3D8, 0xC492F5, 0x4BADC6, + 0xA5CA4E, 0xCD37A7, 0x36A9E6, 0x9492AB, 0x6842DD, 0xDE6319, 0xEF8C76, 0x528B68, 0x37DBFC, + 0xABA1AE, 0x3115DF, 0xA1AE00, 0xDAFB0C, 0x664D64, 0xB705ED, 0x306529, 0xBF5657, 0x3AFF47, + 0xB9F96A, 0xF3BE75, 0xDF9328, 0x3080AB, 0xF68C66, 0x15CB04, 0x0622FA, 0x1DE4D9, 0xA4B33D, + 0x8F1B57, 0x09CD36, 0xE9424E, 0xA4BE13, 0xB52333, 0x1AAAF0, 0xA8654F, 0xA5C1D2, 0x0F3F0B, + 0xCD785B, 0x76F923, 0x048B7B, 0x721789, 0x53A6C6, 0xE26E6F, 0x00EBEF, 0x584A9B, 0xB7DAC4, + 0xBA66AA, 0xCFCF76, 0x1D02D1, 0x2DF1B1, 0xC1998C, 0x77ADC3, 0xDA4886, 0xA05DF7, 0xF480C6, + 0x2FF0AC, 0x9AECDD, 0xBC5C3F, 0x6DDED0, 0x1FC790, 0xB6DB2A, 0x3A25A3, 0x9AAF00, 0x9353AD, + 0x0457B6, 0xB42D29, 0x7E804B, 0xA707DA, 0x0EAA76, 0xA1597B, 0x2A1216, 0x2DB7DC, 0xFDE5FA, + 0xFEDB89, 0xFDBE89, 0x6C76E4, 0xFCA906, 0x70803E, 0x156E85, 0xFF87FD, 0x073E28, 0x336761, + 0x86182A, 0xEABD4D, 0xAFE7B3, 0x6E6D8F, 0x396795, 0x5BBF31, 0x48D784, 0x16DF30, 0x432DC7, + 0x356125, 0xCE70C9, 0xB8CB30, 0xFD6CBF, 0xA200A4, 0xE46C05, 0xA0DD5A, 0x476F21, 0xD21262, + 0x845CB9, 0x496170, 0xE0566B, 0x015299, 0x375550, 0xB7D51E, 0xC4F133, 0x5F6E13, 0xE4305D, + 0xA92E85, 0xC3B21D, 0x3632A1, 0xA4B708, 0xD4B1EA, 0x21F716, 0xE4698F, 0x77FF27, 0x80030C, + 0x2D408D, 0xA0CD4F, 0x99A520, 0xD3A2B3, 0x0A5D2F, 0x42F9B4, 0xCBDA11, 0xD0BE7D, 0xC1DB9B, + 0xBD17AB, 0x81A2CA, 0x5C6A08, 0x17552E, 0x550027, 0xF0147F, 0x8607E1, 0x640B14, 0x8D4196, + 0xDEBE87, 0x2AFDDA, 0xB6256B, 0x34897B, 0xFEF305, 0x9EBFB9, 0x4F6A68, 0xA82A4A, 0x5AC44F, + 0xBCF82D, 0x985AD7, 0x95C7F4, 0x8D4D0D, 0xA63A20, 0x5F57A4, 0xB13F14, 0x953880, 0x0120CC, + 0x86DD71, 0xB6DEC9, 0xF560BF, 0x11654D, 0x6B0701, 0xACB08C, 0xD0C0B2, 0x485551, 0x0EFB1E, + 0xC37295, 0x3B06A3, 0x3540C0, 0x7BDC06, 0xCC45E0, 0xFA294E, 0xC8CAD6, 0x41F3E8, 0xDE647C, + 0xD8649B, 0x31BED9, 0xC397A4, 0xD45877, 0xC5E369, 0x13DAF0, 0x3C3ABA, 0x461846, 0x5F7555, + 0xF5BDD2, 0xC6926E, 0x5D2EAC, 0xED440E, 0x423E1C, 0x87C461, 0xE9FD29, 0xF3D6E7, 0xCA7C22, + 0x35916F, 0xC5E008, 0x8DD7FF, 0xE26A6E, 0xC6FDB0, 0xC10893, 0x745D7C, 0xB2AD6B, 0x9D6ECD, + 0x7B723E, 0x6A11C6, 0xA9CFF7, 0xDF7329, 0xBAC9B5, 0x5100B7, 0x0DB2E2, 0x24BA74, 0x607DE5, + 0x8AD874, 0x2C150D, 0x0C1881, 0x94667E, 0x162901, 0x767A9F, 0xBEFDFD, 0xEF4556, 0x367ED9, + 0x13D9EC, 0xB9BA8B, 0xFC97C4, 0x27A831, 0xC36EF1, 0x36C594, 0x56A8D8, 0xB5A8B4, 0x0ECCCF, + 0x2D8912, 0x34576F, 0x89562C, 0xE3CE99, 0xB920D6, 0xAA5E6B, 0x9C2A3E, 0xCC5F11, 0x4A0BFD, + 0xFBF4E1, 0x6D3B8E, 0x2C86E2, 0x84D4E9, 0xA9B4FC, 0xD1EEEF, 0xC9352E, 0x61392F, 0x442138, + 0xC8D91B, 0x0AFC81, 0x6A4AFB, 0xD81C2F, 0x84B453, 0x8C994E, 0xCC2254, 0xDC552A, 0xD6C6C0, + 0x96190B, 0xB8701A, 0x649569, 0x605A26, 0xEE523F, 0x0F117F, 0x11B5F4, 0xF5CBFC, 0x2DBC34, + 0xEEBC34, 0xCC5DE8, 0x605EDD, 0x9B8E67, 0xEF3392, 0xB817C9, 0x9B5861, 0xBC57E1, 0xC68351, + 0x103ED8, 0x4871DD, 0xDD1C2D, 0xA118AF, 0x462C21, 0xD7F359, 0x987AD9, 0xC0549E, 0xFA864F, + 0xFC0656, 0xAE79E5, 0x362289, 0x22AD38, 0xDC9367, 0xAAE855, 0x382682, 0x9BE7CA, 0xA40D51, + 0xB13399, 0x0ED7A9, 0x480569, 0xF0B265, 0xA7887F, 0x974C88, 0x36D1F9, 0xB39221, 0x4A827B, + 0x21CF98, 0xDC9F40, 0x5547DC, 0x3A74E1, 0x42EB67, 0xDF9DFE, 0x5FD45E, 0xA4677B, 0x7AACBA, + 0xA2F655, 0x23882B, 0x55BA41, 0x086E59, 0x862A21, 0x834739, 0xE6E389, 0xD49EE5, 0x40FB49, + 0xE956FF, 0xCA0F1C, 0x8A59C5, 0x2BFA94, 0xC5C1D3, 0xCFC50F, 0xAE5ADB, 0x86C547, 0x624385, + 0x3B8621, 0x94792C, 0x876110, 0x7B4C2A, 0x1A2C80, 0x12BF43, 0x902688, 0x893C78, 0xE4C4A8, + 0x7BDBE5, 0xC23AC4, 0xEAF426, 0x8A67F7, 0xBF920D, 0x2BA365, 0xB1933D, 0x0B7CBD, 0xDC51A4, + 0x63DD27, 0xDDE169, 0x19949A, 0x9529A8, 0x28CE68, 0xB4ED09, 0x209F44, 0xCA984E, 0x638270, + 0x237C7E, 0x32B90F, 0x8EF5A7, 0xE75614, 0x08F121, 0x2A9DB5, 0x4D7E6F, 0x5119A5, 0xABF9B5, + 0xD6DF82, 0x61DD96, 0x023616, 0x9F3AC4, 0xA1A283, 0x6DED72, 0x7A8D39, 0xA9B882, 0x5C326B, + 0x5B2746, 0xED3400, 0x7700D2, 0x55F4FC, 0x4D5901, 0x8071E0, +]; + +const PIO2: [f64; 8] = [ + 1.57079625129699707031e+00, /* 0x3FF921FB, 0x40000000 */ + 7.54978941586159635335e-08, /* 0x3E74442D, 0x00000000 */ + 5.39030252995776476554e-15, /* 0x3CF84698, 0x80000000 */ + 3.28200341580791294123e-22, /* 0x3B78CC51, 0x60000000 */ + 1.27065575308067607349e-29, /* 0x39F01B83, 0x80000000 */ + 1.22933308981111328932e-36, /* 0x387A2520, 0x40000000 */ + 2.73370053816464559624e-44, /* 0x36E38222, 0x80000000 */ + 2.16741683877804819444e-51, /* 0x3569F31D, 0x00000000 */ +]; + +// fn rem_pio2_large(x : &[f64], y : &mut [f64], e0 : i32, prec : usize) -> i32 +// +// Input parameters: +// x[] The input value (must be positive) is broken into nx +// pieces of 24-bit integers in double precision format. +// x[i] will be the i-th 24 bit of x. The scaled exponent +// of x[0] is given in input parameter e0 (i.e., x[0]*2^e0 +// match x's up to 24 bits. +// +// Example of breaking a double positive z into x[0]+x[1]+x[2]: +// e0 = ilogb(z)-23 +// z = scalbn(z,-e0) +// for i = 0,1,2 +// x[i] = floor(z) +// z = (z-x[i])*2**24 +// +// y[] ouput result in an array of double precision numbers. +// The dimension of y[] is: +// 24-bit precision 1 +// 53-bit precision 2 +// 64-bit precision 2 +// 113-bit precision 3 +// The actual value is the sum of them. Thus for 113-bit +// precison, one may have to do something like: +// +// long double t,w,r_head, r_tail; +// t = (long double)y[2] + (long double)y[1]; +// w = (long double)y[0]; +// r_head = t+w; +// r_tail = w - (r_head - t); +// +// e0 The exponent of x[0]. Must be <= 16360 or you need to +// expand the ipio2 table. +// +// prec an integer indicating the precision: +// 0 24 bits (single) +// 1 53 bits (double) +// 2 64 bits (extended) +// 3 113 bits (quad) +// +// Here is the description of some local variables: +// +// jk jk+1 is the initial number of terms of ipio2[] needed +// in the computation. The minimum and recommended value +// for jk is 3,4,4,6 for single, double, extended, and quad. +// jk+1 must be 2 larger than you might expect so that our +// recomputation test works. (Up to 24 bits in the integer +// part (the 24 bits of it that we compute) and 23 bits in +// the fraction part may be lost to cancelation before we +// recompute.) +// +// jz local integer variable indicating the number of +// terms of ipio2[] used. +// +// jx nx - 1 +// +// jv index for pointing to the suitable ipio2[] for the +// computation. In general, we want +// ( 2^e0*x[0] * ipio2[jv-1]*2^(-24jv) )/8 +// is an integer. Thus +// e0-3-24*jv >= 0 or (e0-3)/24 >= jv +// Hence jv = max(0,(e0-3)/24). +// +// jp jp+1 is the number of terms in PIo2[] needed, jp = jk. +// +// q[] double array with integral value, representing the +// 24-bits chunk of the product of x and 2/pi. +// +// q0 the corresponding exponent of q[0]. Note that the +// exponent for q[i] would be q0-24*i. +// +// PIo2[] double precision array, obtained by cutting pi/2 +// into 24 bits chunks. +// +// f[] ipio2[] in floating point +// +// iq[] integer array by breaking up q[] in 24-bits chunk. +// +// fq[] final product of x*(2/pi) in fq[0],..,fq[jk] +// +// ih integer. If >0 it indicates q[] is >= 0.5, hence +// it also indicates the *sign* of the result. + +/// Return the last three digits of N with y = x - N*pi/2 +/// so that |y| < pi/2. +/// +/// The method is to compute the integer (mod 8) and fraction parts of +/// (2/pi)*x without doing the full multiplication. In general we +/// skip the part of the product that are known to be a huge integer ( +/// more accurately, = 0 mod 8 ). Thus the number of operations are +/// independent of the exponent of the input. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub(crate) fn rem_pio2_large(x: &[f64], y: &mut [f64], e0: i32, prec: usize) -> i32 { + let x1p24 = f64::from_bits(0x4170000000000000); // 0x1p24 === 2 ^ 24 + let x1p_24 = f64::from_bits(0x3e70000000000000); // 0x1p_24 === 2 ^ (-24) + + if cfg!(target_pointer_width = "64") { + debug_assert!(e0 <= 16360); + } + + let nx = x.len(); + + let mut fw: f64; + let mut n: i32; + let mut ih: i32; + let mut z: f64; + let mut f: [f64; 20] = [0.; 20]; + let mut fq: [f64; 20] = [0.; 20]; + let mut q: [f64; 20] = [0.; 20]; + let mut iq: [i32; 20] = [0; 20]; + + /* initialize jk*/ + let jk = i!(INIT_JK, prec); + let jp = jk; + + /* determine jx,jv,q0, note that 3>q0 */ + let jx = nx - 1; + let mut jv = div!(e0 - 3, 24); + if jv < 0 { + jv = 0; + } + let mut q0 = e0 - 24 * (jv + 1); + let jv = jv as usize; + + /* set up f[0] to f[jx+jk] where f[jx+jk] = ipio2[jv+jk] */ + let mut j = (jv as i32) - (jx as i32); + let m = jx + jk; + for i in 0..=m { + i!(f, i, =, if j < 0 { + 0. + } else { + i!(IPIO2, j as usize) as f64 + }); + j += 1; + } + + /* compute q[0],q[1],...q[jk] */ + for i in 0..=jk { + fw = 0f64; + for j in 0..=jx { + fw += i!(x, j) * i!(f, jx + i - j); + } + i!(q, i, =, fw); + } + + let mut jz = jk; + + 'recompute: loop { + /* distill q[] into iq[] reversingly */ + let mut i = 0i32; + z = i!(q, jz); + for j in (1..=jz).rev() { + fw = (x1p_24 * z) as i32 as f64; + i!(iq, i as usize, =, (z - x1p24 * fw) as i32); + z = i!(q, j - 1) + fw; + i += 1; + } + + /* compute n */ + z = scalbn(z, q0); /* actual value of z */ + z -= 8.0 * floor(z * 0.125); /* trim off integer >= 8 */ + n = z as i32; + z -= n as f64; + ih = 0; + if q0 > 0 { + /* need iq[jz-1] to determine n */ + i = i!(iq, jz - 1) >> (24 - q0); + n += i; + i!(iq, jz - 1, -=, i << (24 - q0)); + ih = i!(iq, jz - 1) >> (23 - q0); + } else if q0 == 0 { + ih = i!(iq, jz - 1) >> 23; + } else if z >= 0.5 { + ih = 2; + } + + if ih > 0 { + /* q > 0.5 */ + n += 1; + let mut carry = 0i32; + for i in 0..jz { + /* compute 1-q */ + let j = i!(iq, i); + if carry == 0 { + if j != 0 { + carry = 1; + i!(iq, i, =, 0x1000000 - j); + } + } else { + i!(iq, i, =, 0xffffff - j); + } + } + if q0 > 0 { + /* rare case: chance is 1 in 12 */ + match q0 { + 1 => { + i!(iq, jz - 1, &=, 0x7fffff); + } + 2 => { + i!(iq, jz - 1, &=, 0x3fffff); + } + _ => {} + } + } + if ih == 2 { + z = 1. - z; + if carry != 0 { + z -= scalbn(1., q0); + } + } + } + + /* check if recomputation is needed */ + if z == 0. { + let mut j = 0; + for i in (jk..=jz - 1).rev() { + j |= i!(iq, i); + } + if j == 0 { + /* need recomputation */ + let mut k = 1; + while i!(iq, jk - k, ==, 0) { + k += 1; /* k = no. of terms needed */ + } + + for i in (jz + 1)..=(jz + k) { + /* add q[jz+1] to q[jz+k] */ + i!(f, jx + i, =, i!(IPIO2, jv + i) as f64); + fw = 0f64; + for j in 0..=jx { + fw += i!(x, j) * i!(f, jx + i - j); + } + i!(q, i, =, fw); + } + jz += k; + continue 'recompute; + } + } + + break; + } + + /* chop off zero terms */ + if z == 0. { + jz -= 1; + q0 -= 24; + while i!(iq, jz) == 0 { + jz -= 1; + q0 -= 24; + } + } else { + /* break z into 24-bit if necessary */ + z = scalbn(z, -q0); + if z >= x1p24 { + fw = (x1p_24 * z) as i32 as f64; + i!(iq, jz, =, (z - x1p24 * fw) as i32); + jz += 1; + q0 += 24; + i!(iq, jz, =, fw as i32); + } else { + i!(iq, jz, =, z as i32); + } + } + + /* convert integer "bit" chunk to floating-point value */ + fw = scalbn(1., q0); + for i in (0..=jz).rev() { + i!(q, i, =, fw * (i!(iq, i) as f64)); + fw *= x1p_24; + } + + /* compute PIo2[0,...,jp]*q[jz,...,0] */ + for i in (0..=jz).rev() { + fw = 0f64; + let mut k = 0; + while (k <= jp) && (k <= jz - i) { + fw += i!(PIO2, k) * i!(q, i + k); + k += 1; + } + i!(fq, jz - i, =, fw); + } + + /* compress fq[] into y[] */ + match prec { + 0 => { + fw = 0f64; + for i in (0..=jz).rev() { + fw += i!(fq, i); + } + i!(y, 0, =, if ih == 0 { fw } else { -fw }); + } + 1 | 2 => { + fw = 0f64; + for i in (0..=jz).rev() { + fw += i!(fq, i); + } + i!(y, 0, =, if ih == 0 { fw } else { -fw }); + fw = i!(fq, 0) - fw; + for i in 1..=jz { + fw += i!(fq, i); + } + i!(y, 1, =, if ih == 0 { fw } else { -fw }); + } + 3 => { + /* painful */ + for i in (1..=jz).rev() { + fw = i!(fq, i - 1) + i!(fq, i); + i!(fq, i, +=, i!(fq, i - 1) - fw); + i!(fq, i - 1, =, fw); + } + for i in (2..=jz).rev() { + fw = i!(fq, i - 1) + i!(fq, i); + i!(fq, i, +=, i!(fq, i - 1) - fw); + i!(fq, i - 1, =, fw); + } + fw = 0f64; + for i in (2..=jz).rev() { + fw += i!(fq, i); + } + if ih == 0 { + i!(y, 0, =, i!(fq, 0)); + i!(y, 1, =, i!(fq, 1)); + i!(y, 2, =, fw); + } else { + i!(y, 0, =, -i!(fq, 0)); + i!(y, 1, =, -i!(fq, 1)); + i!(y, 2, =, -fw); + } + } + #[cfg(debug_assertions)] + _ => unreachable!(), + #[cfg(not(debug_assertions))] + _ => {} + } + n & 7 +} diff --git a/library/compiler-builtins/libm/src/math/rem_pio2f.rs b/library/compiler-builtins/libm/src/math/rem_pio2f.rs new file mode 100644 index 0000000000000..3c658fe3dbce2 --- /dev/null +++ b/library/compiler-builtins/libm/src/math/rem_pio2f.rs @@ -0,0 +1,67 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/e_rem_pio2f.c */ +/* + * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. + * Debugged and optimized by Bruce D. Evans. + */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ + +use core::f64; + +use super::rem_pio2_large; + +const TOINT: f64 = 1.5 / f64::EPSILON; + +/// 53 bits of 2/pi +const INV_PIO2: f64 = 6.36619772367581382433e-01; /* 0x3FE45F30, 0x6DC9C883 */ +/// first 25 bits of pi/2 +const PIO2_1: f64 = 1.57079631090164184570e+00; /* 0x3FF921FB, 0x50000000 */ +/// pi/2 - pio2_1 +const PIO2_1T: f64 = 1.58932547735281966916e-08; /* 0x3E5110b4, 0x611A6263 */ + +/// Return the remainder of x rem pi/2 in *y +/// +/// use double precision for everything except passing x +/// use __rem_pio2_large() for large x +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub(crate) fn rem_pio2f(x: f32) -> (i32, f64) { + let x64 = x as f64; + + let mut tx: [f64; 1] = [0.]; + let mut ty: [f64; 1] = [0.]; + + let ix = x.to_bits() & 0x7fffffff; + /* 25+53 bit pi is good enough for medium size */ + if ix < 0x4dc90fdb { + /* |x| ~< 2^28*(pi/2), medium size */ + /* Use a specialized rint() to get fn. Assume round-to-nearest. */ + let tmp = x64 * INV_PIO2 + TOINT; + // force rounding of tmp to it's storage format on x87 to avoid + // excess precision issues. + #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))] + let tmp = force_eval!(tmp); + let f_n = tmp - TOINT; + return (f_n as i32, x64 - f_n * PIO2_1 - f_n * PIO2_1T); + } + if ix >= 0x7f800000 { + /* x is inf or NaN */ + return (0, x64 - x64); + } + /* scale x into [2^23, 2^24-1] */ + let sign = (x.to_bits() >> 31) != 0; + let e0 = ((ix >> 23) - (0x7f + 23)) as i32; /* e0 = ilogb(|x|)-23, positive */ + tx[0] = f32::from_bits(ix - (e0 << 23) as u32) as f64; + let n = rem_pio2_large(&tx, &mut ty, e0, 0); + if sign { + return (-n, -ty[0]); + } + (n, ty[0]) +} diff --git a/library/compiler-builtins/libm/src/math/remainder.rs b/library/compiler-builtins/libm/src/math/remainder.rs new file mode 100644 index 0000000000000..9e966c9ed7f4a --- /dev/null +++ b/library/compiler-builtins/libm/src/math/remainder.rs @@ -0,0 +1,5 @@ +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn remainder(x: f64, y: f64) -> f64 { + let (result, _) = super::remquo(x, y); + result +} diff --git a/library/compiler-builtins/libm/src/math/remainderf.rs b/library/compiler-builtins/libm/src/math/remainderf.rs new file mode 100644 index 0000000000000..b1407cf2ace36 --- /dev/null +++ b/library/compiler-builtins/libm/src/math/remainderf.rs @@ -0,0 +1,5 @@ +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn remainderf(x: f32, y: f32) -> f32 { + let (result, _) = super::remquof(x, y); + result +} diff --git a/library/compiler-builtins/libm/src/math/remquo.rs b/library/compiler-builtins/libm/src/math/remquo.rs new file mode 100644 index 0000000000000..4c11e848746bc --- /dev/null +++ b/library/compiler-builtins/libm/src/math/remquo.rs @@ -0,0 +1,106 @@ +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn remquo(mut x: f64, mut y: f64) -> (f64, i32) { + let ux: u64 = x.to_bits(); + let mut uy: u64 = y.to_bits(); + let mut ex = ((ux >> 52) & 0x7ff) as i32; + let mut ey = ((uy >> 52) & 0x7ff) as i32; + let sx = (ux >> 63) != 0; + let sy = (uy >> 63) != 0; + let mut q: u32; + let mut i: u64; + let mut uxi: u64 = ux; + + if (uy << 1) == 0 || y.is_nan() || ex == 0x7ff { + return ((x * y) / (x * y), 0); + } + if (ux << 1) == 0 { + return (x, 0); + } + + /* normalize x and y */ + if ex == 0 { + i = uxi << 12; + while (i >> 63) == 0 { + ex -= 1; + i <<= 1; + } + uxi <<= -ex + 1; + } else { + uxi &= (!0) >> 12; + uxi |= 1 << 52; + } + if ey == 0 { + i = uy << 12; + while (i >> 63) == 0 { + ey -= 1; + i <<= 1; + } + uy <<= -ey + 1; + } else { + uy &= (!0) >> 12; + uy |= 1 << 52; + } + + q = 0; + + if ex + 1 != ey { + if ex < ey { + return (x, 0); + } + /* x mod y */ + while ex > ey { + i = uxi.wrapping_sub(uy); + if (i >> 63) == 0 { + uxi = i; + q += 1; + } + uxi <<= 1; + q <<= 1; + ex -= 1; + } + i = uxi.wrapping_sub(uy); + if (i >> 63) == 0 { + uxi = i; + q += 1; + } + if uxi == 0 { + ex = -60; + } else { + while (uxi >> 52) == 0 { + uxi <<= 1; + ex -= 1; + } + } + } + + /* scale result and decide between |x| and |x|-|y| */ + if ex > 0 { + uxi -= 1 << 52; + uxi |= (ex as u64) << 52; + } else { + uxi >>= -ex + 1; + } + x = f64::from_bits(uxi); + if sy { + y = -y; + } + if ex == ey || (ex + 1 == ey && (2.0 * x > y || (2.0 * x == y && (q % 2) != 0))) { + x -= y; + // TODO: this matches musl behavior, but it is incorrect + q = q.wrapping_add(1); + } + q &= 0x7fffffff; + let quo = if sx ^ sy { -(q as i32) } else { q as i32 }; + if sx { (-x, quo) } else { (x, quo) } +} + +#[cfg(test)] +mod tests { + use super::remquo; + + #[test] + fn test_q_overflow() { + // 0xc000000000000001, 0x04c0000000000004 + let _ = remquo(-2.0000000000000004, 8.406091369059082e-286); + } +} diff --git a/library/compiler-builtins/libm/src/math/remquof.rs b/library/compiler-builtins/libm/src/math/remquof.rs new file mode 100644 index 0000000000000..b0e85ca661160 --- /dev/null +++ b/library/compiler-builtins/libm/src/math/remquof.rs @@ -0,0 +1,93 @@ +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn remquof(mut x: f32, mut y: f32) -> (f32, i32) { + let ux: u32 = x.to_bits(); + let mut uy: u32 = y.to_bits(); + let mut ex = ((ux >> 23) & 0xff) as i32; + let mut ey = ((uy >> 23) & 0xff) as i32; + let sx = (ux >> 31) != 0; + let sy = (uy >> 31) != 0; + let mut q: u32; + let mut i: u32; + let mut uxi: u32 = ux; + + if (uy << 1) == 0 || y.is_nan() || ex == 0xff { + return ((x * y) / (x * y), 0); + } + if (ux << 1) == 0 { + return (x, 0); + } + + /* normalize x and y */ + if ex == 0 { + i = uxi << 9; + while (i >> 31) == 0 { + ex -= 1; + i <<= 1; + } + uxi <<= -ex + 1; + } else { + uxi &= (!0) >> 9; + uxi |= 1 << 23; + } + if ey == 0 { + i = uy << 9; + while (i >> 31) == 0 { + ey -= 1; + i <<= 1; + } + uy <<= -ey + 1; + } else { + uy &= (!0) >> 9; + uy |= 1 << 23; + } + + q = 0; + if ex + 1 != ey { + if ex < ey { + return (x, 0); + } + /* x mod y */ + while ex > ey { + i = uxi.wrapping_sub(uy); + if (i >> 31) == 0 { + uxi = i; + q += 1; + } + uxi <<= 1; + q <<= 1; + ex -= 1; + } + i = uxi.wrapping_sub(uy); + if (i >> 31) == 0 { + uxi = i; + q += 1; + } + if uxi == 0 { + ex = -30; + } else { + while (uxi >> 23) == 0 { + uxi <<= 1; + ex -= 1; + } + } + } + + /* scale result and decide between |x| and |x|-|y| */ + if ex > 0 { + uxi -= 1 << 23; + uxi |= (ex as u32) << 23; + } else { + uxi >>= -ex + 1; + } + x = f32::from_bits(uxi); + if sy { + y = -y; + } + if ex == ey || (ex + 1 == ey && (2.0 * x > y || (2.0 * x == y && (q % 2) != 0))) { + x -= y; + q += 1; + } + q &= 0x7fffffff; + let quo = if sx ^ sy { -(q as i32) } else { q as i32 }; + if sx { (-x, quo) } else { (x, quo) } +} diff --git a/library/compiler-builtins/libm/src/math/rint.rs b/library/compiler-builtins/libm/src/math/rint.rs new file mode 100644 index 0000000000000..e1c32c9435522 --- /dev/null +++ b/library/compiler-builtins/libm/src/math/rint.rs @@ -0,0 +1,51 @@ +use super::support::Round; + +/// Round `x` to the nearest integer, breaking ties toward even. +#[cfg(f16_enabled)] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn rintf16(x: f16) -> f16 { + select_implementation! { + name: rintf16, + use_arch: all(target_arch = "aarch64", target_feature = "fp16"), + args: x, + } + + super::generic::rint_round(x, Round::Nearest).val +} + +/// Round `x` to the nearest integer, breaking ties toward even. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn rintf(x: f32) -> f32 { + select_implementation! { + name: rintf, + use_arch: any( + all(target_arch = "aarch64", target_feature = "neon"), + all(target_arch = "wasm32", intrinsics_enabled), + ), + args: x, + } + + super::generic::rint_round(x, Round::Nearest).val +} + +/// Round `x` to the nearest integer, breaking ties toward even. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn rint(x: f64) -> f64 { + select_implementation! { + name: rint, + use_arch: any( + all(target_arch = "aarch64", target_feature = "neon"), + all(target_arch = "wasm32", intrinsics_enabled), + ), + args: x, + } + + super::generic::rint_round(x, Round::Nearest).val +} + +/// Round `x` to the nearest integer, breaking ties toward even. +#[cfg(f128_enabled)] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn rintf128(x: f128) -> f128 { + super::generic::rint_round(x, Round::Nearest).val +} diff --git a/library/compiler-builtins/libm/src/math/round.rs b/library/compiler-builtins/libm/src/math/round.rs new file mode 100644 index 0000000000000..6cd091cd73cdc --- /dev/null +++ b/library/compiler-builtins/libm/src/math/round.rs @@ -0,0 +1,25 @@ +/// Round `x` to the nearest integer, breaking ties away from zero. +#[cfg(f16_enabled)] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn roundf16(x: f16) -> f16 { + super::generic::round(x) +} + +/// Round `x` to the nearest integer, breaking ties away from zero. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn roundf(x: f32) -> f32 { + super::generic::round(x) +} + +/// Round `x` to the nearest integer, breaking ties away from zero. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn round(x: f64) -> f64 { + super::generic::round(x) +} + +/// Round `x` to the nearest integer, breaking ties away from zero. +#[cfg(f128_enabled)] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn roundf128(x: f128) -> f128 { + super::generic::round(x) +} diff --git a/library/compiler-builtins/libm/src/math/roundeven.rs b/library/compiler-builtins/libm/src/math/roundeven.rs new file mode 100644 index 0000000000000..6e621d7628f2a --- /dev/null +++ b/library/compiler-builtins/libm/src/math/roundeven.rs @@ -0,0 +1,36 @@ +use super::support::{Float, Round}; + +/// Round `x` to the nearest integer, breaking ties toward even. This is IEEE 754 +/// `roundToIntegralTiesToEven`. +#[cfg(f16_enabled)] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn roundevenf16(x: f16) -> f16 { + roundeven_impl(x) +} + +/// Round `x` to the nearest integer, breaking ties toward even. This is IEEE 754 +/// `roundToIntegralTiesToEven`. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn roundevenf(x: f32) -> f32 { + roundeven_impl(x) +} + +/// Round `x` to the nearest integer, breaking ties toward even. This is IEEE 754 +/// `roundToIntegralTiesToEven`. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn roundeven(x: f64) -> f64 { + roundeven_impl(x) +} + +/// Round `x` to the nearest integer, breaking ties toward even. This is IEEE 754 +/// `roundToIntegralTiesToEven`. +#[cfg(f128_enabled)] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn roundevenf128(x: f128) -> f128 { + roundeven_impl(x) +} + +#[inline] +pub fn roundeven_impl(x: F) -> F { + super::generic::rint_round(x, Round::Nearest).val +} diff --git a/library/compiler-builtins/libm/src/math/roundf.rs b/library/compiler-builtins/libm/src/math/roundf.rs new file mode 100644 index 0000000000000..b5d7c9d693e71 --- /dev/null +++ b/library/compiler-builtins/libm/src/math/roundf.rs @@ -0,0 +1,5 @@ +/// Round `x` to the nearest integer, breaking ties away from zero. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn roundf(x: f32) -> f32 { + super::generic::round(x) +} diff --git a/library/compiler-builtins/libm/src/math/roundf128.rs b/library/compiler-builtins/libm/src/math/roundf128.rs new file mode 100644 index 0000000000000..fc3164929fe4f --- /dev/null +++ b/library/compiler-builtins/libm/src/math/roundf128.rs @@ -0,0 +1,5 @@ +/// Round `x` to the nearest integer, breaking ties away from zero. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn roundf128(x: f128) -> f128 { + super::generic::round(x) +} diff --git a/library/compiler-builtins/libm/src/math/roundf16.rs b/library/compiler-builtins/libm/src/math/roundf16.rs new file mode 100644 index 0000000000000..8b356eaabeecd --- /dev/null +++ b/library/compiler-builtins/libm/src/math/roundf16.rs @@ -0,0 +1,5 @@ +/// Round `x` to the nearest integer, breaking ties away from zero. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn roundf16(x: f16) -> f16 { + super::generic::round(x) +} diff --git a/library/compiler-builtins/libm/src/math/scalbn.rs b/library/compiler-builtins/libm/src/math/scalbn.rs new file mode 100644 index 0000000000000..ed73c3f94f000 --- /dev/null +++ b/library/compiler-builtins/libm/src/math/scalbn.rs @@ -0,0 +1,87 @@ +#[cfg(f16_enabled)] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn scalbnf16(x: f16, n: i32) -> f16 { + super::generic::scalbn(x, n) +} + +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn scalbnf(x: f32, n: i32) -> f32 { + super::generic::scalbn(x, n) +} + +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn scalbn(x: f64, n: i32) -> f64 { + super::generic::scalbn(x, n) +} + +#[cfg(f128_enabled)] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn scalbnf128(x: f128, n: i32) -> f128 { + super::generic::scalbn(x, n) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::support::{CastFrom, CastInto, Float}; + + // Tests against N3220 + fn spec_test(f: impl Fn(F, i32) -> F) + where + u32: CastInto, + F::Int: CastFrom, + F::Int: CastFrom, + { + // `scalbn(±0, n)` returns `±0`. + assert_biteq!(f(F::NEG_ZERO, 10), F::NEG_ZERO); + assert_biteq!(f(F::NEG_ZERO, 0), F::NEG_ZERO); + assert_biteq!(f(F::NEG_ZERO, -10), F::NEG_ZERO); + assert_biteq!(f(F::ZERO, 10), F::ZERO); + assert_biteq!(f(F::ZERO, 0), F::ZERO); + assert_biteq!(f(F::ZERO, -10), F::ZERO); + + // `scalbn(x, 0)` returns `x`. + assert_biteq!(f(F::MIN, 0), F::MIN); + assert_biteq!(f(F::MAX, 0), F::MAX); + assert_biteq!(f(F::INFINITY, 0), F::INFINITY); + assert_biteq!(f(F::NEG_INFINITY, 0), F::NEG_INFINITY); + assert_biteq!(f(F::ZERO, 0), F::ZERO); + assert_biteq!(f(F::NEG_ZERO, 0), F::NEG_ZERO); + + // `scalbn(±∞, n)` returns `±∞`. + assert_biteq!(f(F::INFINITY, 10), F::INFINITY); + assert_biteq!(f(F::INFINITY, -10), F::INFINITY); + assert_biteq!(f(F::NEG_INFINITY, 10), F::NEG_INFINITY); + assert_biteq!(f(F::NEG_INFINITY, -10), F::NEG_INFINITY); + + // NaN should remain NaNs. + assert!(f(F::NAN, 10).is_nan()); + assert!(f(F::NAN, 0).is_nan()); + assert!(f(F::NAN, -10).is_nan()); + assert!(f(-F::NAN, 10).is_nan()); + assert!(f(-F::NAN, 0).is_nan()); + assert!(f(-F::NAN, -10).is_nan()); + } + + #[test] + #[cfg(f16_enabled)] + fn spec_test_f16() { + spec_test::(scalbnf16); + } + + #[test] + fn spec_test_f32() { + spec_test::(scalbnf); + } + + #[test] + fn spec_test_f64() { + spec_test::(scalbn); + } + + #[test] + #[cfg(f128_enabled)] + fn spec_test_f128() { + spec_test::(scalbnf128); + } +} diff --git a/library/compiler-builtins/libm/src/math/scalbnf.rs b/library/compiler-builtins/libm/src/math/scalbnf.rs new file mode 100644 index 0000000000000..57e7ba76f60b5 --- /dev/null +++ b/library/compiler-builtins/libm/src/math/scalbnf.rs @@ -0,0 +1,4 @@ +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn scalbnf(x: f32, n: i32) -> f32 { + super::generic::scalbn(x, n) +} diff --git a/library/compiler-builtins/libm/src/math/scalbnf128.rs b/library/compiler-builtins/libm/src/math/scalbnf128.rs new file mode 100644 index 0000000000000..c1d2b48558568 --- /dev/null +++ b/library/compiler-builtins/libm/src/math/scalbnf128.rs @@ -0,0 +1,4 @@ +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn scalbnf128(x: f128, n: i32) -> f128 { + super::generic::scalbn(x, n) +} diff --git a/library/compiler-builtins/libm/src/math/scalbnf16.rs b/library/compiler-builtins/libm/src/math/scalbnf16.rs new file mode 100644 index 0000000000000..2209e1a179566 --- /dev/null +++ b/library/compiler-builtins/libm/src/math/scalbnf16.rs @@ -0,0 +1,4 @@ +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn scalbnf16(x: f16, n: i32) -> f16 { + super::generic::scalbn(x, n) +} diff --git a/library/compiler-builtins/libm/src/math/sin.rs b/library/compiler-builtins/libm/src/math/sin.rs new file mode 100644 index 0000000000000..229fa4bef0830 --- /dev/null +++ b/library/compiler-builtins/libm/src/math/sin.rs @@ -0,0 +1,95 @@ +// origin: FreeBSD /usr/src/lib/msun/src/s_sin.c */ +// +// ==================================================== +// Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. +// +// Developed at SunPro, a Sun Microsystems, Inc. business. +// Permission to use, copy, modify, and distribute this +// software is freely granted, provided that this notice +// is preserved. +// ==================================================== + +use super::{k_cos, k_sin, rem_pio2}; + +// sin(x) +// Return sine function of x. +// +// kernel function: +// k_sin ... sine function on [-pi/4,pi/4] +// k_cos ... cose function on [-pi/4,pi/4] +// rem_pio2 ... argument reduction routine +// +// Method. +// Let S,C and T denote the sin, cos and tan respectively on +// [-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2 +// in [-pi/4 , +pi/4], and let n = k mod 4. +// We have +// +// n sin(x) cos(x) tan(x) +// ---------------------------------------------------------- +// 0 S C T +// 1 C -S -1/T +// 2 -S -C T +// 3 -C S -1/T +// ---------------------------------------------------------- +// +// Special cases: +// Let trig be any of sin, cos, or tan. +// trig(+-INF) is NaN, with signals; +// trig(NaN) is that NaN; +// +// Accuracy: +// TRIG(x) returns trig(x) nearly rounded + +/// The sine of `x` (f64). +/// +/// `x` is specified in radians. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn sin(x: f64) -> f64 { + let x1p120 = f64::from_bits(0x4770000000000000); // 0x1p120f === 2 ^ 120 + + /* High word of x. */ + let ix = (f64::to_bits(x) >> 32) as u32 & 0x7fffffff; + + /* |x| ~< pi/4 */ + if ix <= 0x3fe921fb { + if ix < 0x3e500000 { + /* |x| < 2**-26 */ + /* raise inexact if x != 0 and underflow if subnormal*/ + if ix < 0x00100000 { + force_eval!(x / x1p120); + } else { + force_eval!(x + x1p120); + } + return x; + } + return k_sin(x, 0.0, 0); + } + + /* sin(Inf or NaN) is NaN */ + if ix >= 0x7ff00000 { + return x - x; + } + + /* argument reduction needed */ + let (n, y0, y1) = rem_pio2(x); + match n & 3 { + 0 => k_sin(y0, y1, 1), + 1 => k_cos(y0, y1), + 2 => -k_sin(y0, y1, 1), + _ => -k_cos(y0, y1), + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + #[cfg_attr(x86_no_sse, ignore = "FIXME(i586): possible incorrect rounding")] + fn test_near_pi() { + let x = f64::from_bits(0x400921fb000FD5DD); // 3.141592026217707 + let sx = f64::from_bits(0x3ea50d15ced1a4a2); // 6.273720864039205e-7 + assert_eq!(sin(x), sx); + } +} diff --git a/library/compiler-builtins/libm/src/math/sincos.rs b/library/compiler-builtins/libm/src/math/sincos.rs new file mode 100644 index 0000000000000..ebf482f2df350 --- /dev/null +++ b/library/compiler-builtins/libm/src/math/sincos.rs @@ -0,0 +1,137 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/s_sin.c */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ + +use super::{get_high_word, k_cos, k_sin, rem_pio2}; + +/// Both the sine and cosine of `x` (f64). +/// +/// `x` is specified in radians and the return value is (sin(x), cos(x)). +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn sincos(x: f64) -> (f64, f64) { + let s: f64; + let c: f64; + let mut ix: u32; + + ix = get_high_word(x); + ix &= 0x7fffffff; + + /* |x| ~< pi/4 */ + if ix <= 0x3fe921fb { + /* if |x| < 2**-27 * sqrt(2) */ + if ix < 0x3e46a09e { + /* raise inexact if x!=0 and underflow if subnormal */ + let x1p120 = f64::from_bits(0x4770000000000000); // 0x1p120 == 2^120 + if ix < 0x00100000 { + force_eval!(x / x1p120); + } else { + force_eval!(x + x1p120); + } + return (x, 1.0); + } + return (k_sin(x, 0.0, 0), k_cos(x, 0.0)); + } + + /* sincos(Inf or NaN) is NaN */ + if ix >= 0x7ff00000 { + let rv = x - x; + return (rv, rv); + } + + /* argument reduction needed */ + let (n, y0, y1) = rem_pio2(x); + s = k_sin(y0, y1, 1); + c = k_cos(y0, y1); + match n & 3 { + 0 => (s, c), + 1 => (c, -s), + 2 => (-s, -c), + 3 => (-c, s), + #[cfg(debug_assertions)] + _ => unreachable!(), + #[cfg(not(debug_assertions))] + _ => (0.0, 1.0), + } +} + +// These tests are based on those from sincosf.rs +#[cfg(test)] +mod tests { + use super::sincos; + + const TOLERANCE: f64 = 1e-6; + + #[test] + fn with_pi() { + let (s, c) = sincos(core::f64::consts::PI); + assert!( + (s - 0.0).abs() < TOLERANCE, + "|{} - {}| = {} >= {}", + s, + 0.0, + (s - 0.0).abs(), + TOLERANCE + ); + assert!( + (c + 1.0).abs() < TOLERANCE, + "|{} + {}| = {} >= {}", + c, + 1.0, + (s + 1.0).abs(), + TOLERANCE + ); + } + + #[test] + fn rotational_symmetry() { + use core::f64::consts::PI; + const N: usize = 24; + for n in 0..N { + let theta = 2. * PI * (n as f64) / (N as f64); + let (s, c) = sincos(theta); + let (s_plus, c_plus) = sincos(theta + 2. * PI); + let (s_minus, c_minus) = sincos(theta - 2. * PI); + + assert!( + (s - s_plus).abs() < TOLERANCE, + "|{} - {}| = {} >= {}", + s, + s_plus, + (s - s_plus).abs(), + TOLERANCE + ); + assert!( + (s - s_minus).abs() < TOLERANCE, + "|{} - {}| = {} >= {}", + s, + s_minus, + (s - s_minus).abs(), + TOLERANCE + ); + assert!( + (c - c_plus).abs() < TOLERANCE, + "|{} - {}| = {} >= {}", + c, + c_plus, + (c - c_plus).abs(), + TOLERANCE + ); + assert!( + (c - c_minus).abs() < TOLERANCE, + "|{} - {}| = {} >= {}", + c, + c_minus, + (c - c_minus).abs(), + TOLERANCE + ); + } + } +} diff --git a/library/compiler-builtins/libm/src/math/sincosf.rs b/library/compiler-builtins/libm/src/math/sincosf.rs new file mode 100644 index 0000000000000..f3360767683eb --- /dev/null +++ b/library/compiler-builtins/libm/src/math/sincosf.rs @@ -0,0 +1,176 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/s_sinf.c */ +/* + * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. + * Optimized by Bruce D. Evans. + */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ + +use super::{k_cosf, k_sinf, rem_pio2f}; + +/* Small multiples of pi/2 rounded to double precision. */ +const PI_2: f64 = 0.5 * 3.1415926535897931160E+00; +const S1PIO2: f64 = 1.0 * PI_2; /* 0x3FF921FB, 0x54442D18 */ +const S2PIO2: f64 = 2.0 * PI_2; /* 0x400921FB, 0x54442D18 */ +const S3PIO2: f64 = 3.0 * PI_2; /* 0x4012D97C, 0x7F3321D2 */ +const S4PIO2: f64 = 4.0 * PI_2; /* 0x401921FB, 0x54442D18 */ + +/// Both the sine and cosine of `x` (f32). +/// +/// `x` is specified in radians and the return value is (sin(x), cos(x)). +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn sincosf(x: f32) -> (f32, f32) { + let s: f32; + let c: f32; + let mut ix: u32; + let sign: bool; + + ix = x.to_bits(); + sign = (ix >> 31) != 0; + ix &= 0x7fffffff; + + /* |x| ~<= pi/4 */ + if ix <= 0x3f490fda { + /* |x| < 2**-12 */ + if ix < 0x39800000 { + /* raise inexact if x!=0 and underflow if subnormal */ + + let x1p120 = f32::from_bits(0x7b800000); // 0x1p120 == 2^120 + if ix < 0x00100000 { + force_eval!(x / x1p120); + } else { + force_eval!(x + x1p120); + } + return (x, 1.0); + } + return (k_sinf(x as f64), k_cosf(x as f64)); + } + + /* |x| ~<= 5*pi/4 */ + if ix <= 0x407b53d1 { + if ix <= 0x4016cbe3 { + /* |x| ~<= 3pi/4 */ + if sign { + s = -k_cosf(x as f64 + S1PIO2); + c = k_sinf(x as f64 + S1PIO2); + } else { + s = k_cosf(S1PIO2 - x as f64); + c = k_sinf(S1PIO2 - x as f64); + } + } + /* -sin(x+c) is not correct if x+c could be 0: -0 vs +0 */ + else if sign { + s = -k_sinf(x as f64 + S2PIO2); + c = -k_cosf(x as f64 + S2PIO2); + } else { + s = -k_sinf(x as f64 - S2PIO2); + c = -k_cosf(x as f64 - S2PIO2); + } + + return (s, c); + } + + /* |x| ~<= 9*pi/4 */ + if ix <= 0x40e231d5 { + if ix <= 0x40afeddf { + /* |x| ~<= 7*pi/4 */ + if sign { + s = k_cosf(x as f64 + S3PIO2); + c = -k_sinf(x as f64 + S3PIO2); + } else { + s = -k_cosf(x as f64 - S3PIO2); + c = k_sinf(x as f64 - S3PIO2); + } + } else if sign { + s = k_sinf(x as f64 + S4PIO2); + c = k_cosf(x as f64 + S4PIO2); + } else { + s = k_sinf(x as f64 - S4PIO2); + c = k_cosf(x as f64 - S4PIO2); + } + + return (s, c); + } + + /* sin(Inf or NaN) is NaN */ + if ix >= 0x7f800000 { + let rv = x - x; + return (rv, rv); + } + + /* general argument reduction needed */ + let (n, y) = rem_pio2f(x); + s = k_sinf(y); + c = k_cosf(y); + match n & 3 { + 0 => (s, c), + 1 => (c, -s), + 2 => (-s, -c), + 3 => (-c, s), + #[cfg(debug_assertions)] + _ => unreachable!(), + #[cfg(not(debug_assertions))] + _ => (0.0, 1.0), + } +} + +// PowerPC tests are failing on LLVM 13: https://github.com/rust-lang/rust/issues/88520 +#[cfg(not(target_arch = "powerpc64"))] +#[cfg(test)] +mod tests { + use super::sincosf; + + #[test] + fn rotational_symmetry() { + use core::f32::consts::PI; + const N: usize = 24; + for n in 0..N { + let theta = 2. * PI * (n as f32) / (N as f32); + let (s, c) = sincosf(theta); + let (s_plus, c_plus) = sincosf(theta + 2. * PI); + let (s_minus, c_minus) = sincosf(theta - 2. * PI); + + const TOLERANCE: f32 = 1e-6; + assert!( + (s - s_plus).abs() < TOLERANCE, + "|{} - {}| = {} >= {}", + s, + s_plus, + (s - s_plus).abs(), + TOLERANCE + ); + assert!( + (s - s_minus).abs() < TOLERANCE, + "|{} - {}| = {} >= {}", + s, + s_minus, + (s - s_minus).abs(), + TOLERANCE + ); + assert!( + (c - c_plus).abs() < TOLERANCE, + "|{} - {}| = {} >= {}", + c, + c_plus, + (c - c_plus).abs(), + TOLERANCE + ); + assert!( + (c - c_minus).abs() < TOLERANCE, + "|{} - {}| = {} >= {}", + c, + c_minus, + (c - c_minus).abs(), + TOLERANCE + ); + } + } +} diff --git a/library/compiler-builtins/libm/src/math/sinf.rs b/library/compiler-builtins/libm/src/math/sinf.rs new file mode 100644 index 0000000000000..709b63fcf297d --- /dev/null +++ b/library/compiler-builtins/libm/src/math/sinf.rs @@ -0,0 +1,96 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/s_sinf.c */ +/* + * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. + * Optimized by Bruce D. Evans. + */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ + +use core::f64::consts::FRAC_PI_2; + +use super::{k_cosf, k_sinf, rem_pio2f}; + +/* Small multiples of pi/2 rounded to double precision. */ +const S1_PIO2: f64 = 1. * FRAC_PI_2; /* 0x3FF921FB, 0x54442D18 */ +const S2_PIO2: f64 = 2. * FRAC_PI_2; /* 0x400921FB, 0x54442D18 */ +const S3_PIO2: f64 = 3. * FRAC_PI_2; /* 0x4012D97C, 0x7F3321D2 */ +const S4_PIO2: f64 = 4. * FRAC_PI_2; /* 0x401921FB, 0x54442D18 */ + +/// The sine of `x` (f32). +/// +/// `x` is specified in radians. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn sinf(x: f32) -> f32 { + let x64 = x as f64; + + let x1p120 = f32::from_bits(0x7b800000); // 0x1p120f === 2 ^ 120 + + let mut ix = x.to_bits(); + let sign = (ix >> 31) != 0; + ix &= 0x7fffffff; + + if ix <= 0x3f490fda { + /* |x| ~<= pi/4 */ + if ix < 0x39800000 { + /* |x| < 2**-12 */ + /* raise inexact if x!=0 and underflow if subnormal */ + force_eval!(if ix < 0x00800000 { + x / x1p120 + } else { + x + x1p120 + }); + return x; + } + return k_sinf(x64); + } + if ix <= 0x407b53d1 { + /* |x| ~<= 5*pi/4 */ + if ix <= 0x4016cbe3 { + /* |x| ~<= 3pi/4 */ + if sign { + return -k_cosf(x64 + S1_PIO2); + } else { + return k_cosf(x64 - S1_PIO2); + } + } + return k_sinf(if sign { + -(x64 + S2_PIO2) + } else { + -(x64 - S2_PIO2) + }); + } + if ix <= 0x40e231d5 { + /* |x| ~<= 9*pi/4 */ + if ix <= 0x40afeddf { + /* |x| ~<= 7*pi/4 */ + if sign { + return k_cosf(x64 + S3_PIO2); + } else { + return -k_cosf(x64 - S3_PIO2); + } + } + return k_sinf(if sign { x64 + S4_PIO2 } else { x64 - S4_PIO2 }); + } + + /* sin(Inf or NaN) is NaN */ + if ix >= 0x7f800000 { + return x - x; + } + + /* general argument reduction needed */ + let (n, y) = rem_pio2f(x); + match n & 3 { + 0 => k_sinf(y), + 1 => k_cosf(y), + 2 => k_sinf(-y), + _ => -k_cosf(y), + } +} diff --git a/library/compiler-builtins/libm/src/math/sinh.rs b/library/compiler-builtins/libm/src/math/sinh.rs new file mode 100644 index 0000000000000..79184198263c6 --- /dev/null +++ b/library/compiler-builtins/libm/src/math/sinh.rs @@ -0,0 +1,51 @@ +use super::{expm1, expo2}; + +// sinh(x) = (exp(x) - 1/exp(x))/2 +// = (exp(x)-1 + (exp(x)-1)/exp(x))/2 +// = x + x^3/6 + o(x^5) +// + +/// The hyperbolic sine of `x` (f64). +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn sinh(x: f64) -> f64 { + // union {double f; uint64_t i;} u = {.f = x}; + // uint32_t w; + // double t, h, absx; + + let mut uf: f64 = x; + let mut ui: u64 = f64::to_bits(uf); + let w: u32; + let t: f64; + let mut h: f64; + let absx: f64; + + h = 0.5; + if ui >> 63 != 0 { + h = -h; + } + /* |x| */ + ui &= !1 / 2; + uf = f64::from_bits(ui); + absx = uf; + w = (ui >> 32) as u32; + + /* |x| < log(DBL_MAX) */ + if w < 0x40862e42 { + t = expm1(absx); + if w < 0x3ff00000 { + if w < 0x3ff00000 - (26 << 20) { + /* note: inexact and underflow are raised by expm1 */ + /* note: this branch avoids spurious underflow */ + return x; + } + return h * (2.0 * t - t * t / (t + 1.0)); + } + /* note: |x|>log(0x1p26)+eps could be just h*exp(x) */ + return h * (t + t / (t + 1.0)); + } + + /* |x| > log(DBL_MAX) or nan */ + /* note: the result is stored to handle overflow */ + t = 2.0 * h * expo2(absx); + t +} diff --git a/library/compiler-builtins/libm/src/math/sinhf.rs b/library/compiler-builtins/libm/src/math/sinhf.rs new file mode 100644 index 0000000000000..44d2e3560d579 --- /dev/null +++ b/library/compiler-builtins/libm/src/math/sinhf.rs @@ -0,0 +1,30 @@ +use super::{expm1f, k_expo2f}; + +/// The hyperbolic sine of `x` (f32). +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn sinhf(x: f32) -> f32 { + let mut h = 0.5f32; + let mut ix = x.to_bits(); + if (ix >> 31) != 0 { + h = -h; + } + /* |x| */ + ix &= 0x7fffffff; + let absx = f32::from_bits(ix); + let w = ix; + + /* |x| < log(FLT_MAX) */ + if w < 0x42b17217 { + let t = expm1f(absx); + if w < 0x3f800000 { + if w < (0x3f800000 - (12 << 23)) { + return x; + } + return h * (2. * t - t * t / (t + 1.)); + } + return h * (t + t / (t + 1.)); + } + + /* |x| > logf(FLT_MAX) or nan */ + 2. * h * k_expo2f(absx) +} diff --git a/library/compiler-builtins/libm/src/math/sqrt.rs b/library/compiler-builtins/libm/src/math/sqrt.rs new file mode 100644 index 0000000000000..76bc240cf01cb --- /dev/null +++ b/library/compiler-builtins/libm/src/math/sqrt.rs @@ -0,0 +1,51 @@ +/// The square root of `x` (f16). +#[cfg(f16_enabled)] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn sqrtf16(x: f16) -> f16 { + select_implementation! { + name: sqrtf16, + use_arch: all(target_arch = "aarch64", target_feature = "fp16"), + args: x, + } + + return super::generic::sqrt(x); +} + +/// The square root of `x` (f32). +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn sqrtf(x: f32) -> f32 { + select_implementation! { + name: sqrtf, + use_arch: any( + all(target_arch = "aarch64", target_feature = "neon"), + all(target_arch = "wasm32", intrinsics_enabled), + target_feature = "sse2" + ), + args: x, + } + + super::generic::sqrt(x) +} + +/// The square root of `x` (f64). +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn sqrt(x: f64) -> f64 { + select_implementation! { + name: sqrt, + use_arch: any( + all(target_arch = "aarch64", target_feature = "neon"), + all(target_arch = "wasm32", intrinsics_enabled), + target_feature = "sse2" + ), + args: x, + } + + super::generic::sqrt(x) +} + +/// The square root of `x` (f128). +#[cfg(f128_enabled)] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn sqrtf128(x: f128) -> f128 { + return super::generic::sqrt(x); +} diff --git a/library/compiler-builtins/libm/src/math/sqrtf.rs b/library/compiler-builtins/libm/src/math/sqrtf.rs new file mode 100644 index 0000000000000..c28a705e378e6 --- /dev/null +++ b/library/compiler-builtins/libm/src/math/sqrtf.rs @@ -0,0 +1,15 @@ +/// The square root of `x` (f32). +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn sqrtf(x: f32) -> f32 { + select_implementation! { + name: sqrtf, + use_arch: any( + all(target_arch = "aarch64", target_feature = "neon"), + all(target_arch = "wasm32", intrinsics_enabled), + target_feature = "sse2" + ), + args: x, + } + + super::generic::sqrt(x) +} diff --git a/library/compiler-builtins/libm/src/math/sqrtf128.rs b/library/compiler-builtins/libm/src/math/sqrtf128.rs new file mode 100644 index 0000000000000..eaef6ae0c1c85 --- /dev/null +++ b/library/compiler-builtins/libm/src/math/sqrtf128.rs @@ -0,0 +1,5 @@ +/// The square root of `x` (f128). +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn sqrtf128(x: f128) -> f128 { + return super::generic::sqrt(x); +} diff --git a/library/compiler-builtins/libm/src/math/sqrtf16.rs b/library/compiler-builtins/libm/src/math/sqrtf16.rs new file mode 100644 index 0000000000000..7bedb7f8bbb6b --- /dev/null +++ b/library/compiler-builtins/libm/src/math/sqrtf16.rs @@ -0,0 +1,11 @@ +/// The square root of `x` (f16). +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn sqrtf16(x: f16) -> f16 { + select_implementation! { + name: sqrtf16, + use_arch: all(target_arch = "aarch64", target_feature = "fp16"), + args: x, + } + + return super::generic::sqrt(x); +} diff --git a/library/compiler-builtins/libm/src/math/support/big.rs b/library/compiler-builtins/libm/src/math/support/big.rs new file mode 100644 index 0000000000000..8a52d86cc9827 --- /dev/null +++ b/library/compiler-builtins/libm/src/math/support/big.rs @@ -0,0 +1,257 @@ +//! Integers used for wide operations, larger than `u128`. + +#[cfg(test)] +mod tests; + +use core::ops; + +use super::{DInt, HInt, Int, MinInt}; + +const U128_LO_MASK: u128 = u64::MAX as u128; + +/// A 256-bit unsigned integer represented as two 128-bit native-endian limbs. +#[allow(non_camel_case_types)] +#[derive(Clone, Copy, Debug, PartialEq, PartialOrd)] +pub struct u256 { + pub lo: u128, + pub hi: u128, +} + +impl u256 { + #[cfg(any(test, feature = "unstable-public-internals"))] + pub const MAX: Self = Self { + lo: u128::MAX, + hi: u128::MAX, + }; + + /// Reinterpret as a signed integer + pub fn signed(self) -> i256 { + i256 { + lo: self.lo, + hi: self.hi, + } + } +} + +/// A 256-bit signed integer represented as two 128-bit native-endian limbs. +#[allow(non_camel_case_types)] +#[derive(Clone, Copy, Debug, PartialEq, PartialOrd)] +pub struct i256 { + pub lo: u128, + pub hi: u128, +} + +impl i256 { + /// Reinterpret as an unsigned integer + #[cfg(any(test, feature = "unstable-public-internals"))] + pub fn unsigned(self) -> u256 { + u256 { + lo: self.lo, + hi: self.hi, + } + } +} + +impl MinInt for u256 { + type OtherSign = i256; + + type Unsigned = u256; + + const SIGNED: bool = false; + const BITS: u32 = 256; + const ZERO: Self = Self { lo: 0, hi: 0 }; + const ONE: Self = Self { lo: 1, hi: 0 }; + const MIN: Self = Self { lo: 0, hi: 0 }; + const MAX: Self = Self { + lo: u128::MAX, + hi: u128::MAX, + }; +} + +impl MinInt for i256 { + type OtherSign = u256; + + type Unsigned = u256; + + const SIGNED: bool = false; + const BITS: u32 = 256; + const ZERO: Self = Self { lo: 0, hi: 0 }; + const ONE: Self = Self { lo: 1, hi: 0 }; + const MIN: Self = Self { + lo: 0, + hi: 1 << 127, + }; + const MAX: Self = Self { + lo: u128::MAX, + hi: u128::MAX >> 1, + }; +} + +macro_rules! impl_common { + ($ty:ty) => { + impl ops::BitOr for $ty { + type Output = Self; + + fn bitor(mut self, rhs: Self) -> Self::Output { + self.lo |= rhs.lo; + self.hi |= rhs.hi; + self + } + } + + impl ops::Not for $ty { + type Output = Self; + + fn not(mut self) -> Self::Output { + self.lo = !self.lo; + self.hi = !self.hi; + self + } + } + + impl ops::Shl for $ty { + type Output = Self; + + fn shl(self, _rhs: u32) -> Self::Output { + unimplemented!("only used to meet trait bounds") + } + } + }; +} + +impl_common!(i256); +impl_common!(u256); + +impl ops::Add for u256 { + type Output = Self; + + fn add(self, rhs: Self) -> Self::Output { + let (lo, carry) = self.lo.overflowing_add(rhs.lo); + let hi = self.hi.wrapping_add(carry as u128).wrapping_add(rhs.hi); + + Self { lo, hi } + } +} + +impl ops::Shr for u256 { + type Output = Self; + + fn shr(mut self, rhs: u32) -> Self::Output { + debug_assert!(rhs < Self::BITS, "attempted to shift right with overflow"); + if rhs >= Self::BITS { + return Self::ZERO; + } + + if rhs == 0 { + return self; + } + + if rhs < 128 { + self.lo >>= rhs; + self.lo |= self.hi << (128 - rhs); + } else { + self.lo = self.hi >> (rhs - 128); + } + + if rhs < 128 { + self.hi >>= rhs; + } else { + self.hi = 0; + } + + self + } +} + +impl HInt for u128 { + type D = u256; + + fn widen(self) -> Self::D { + u256 { lo: self, hi: 0 } + } + + fn zero_widen(self) -> Self::D { + self.widen() + } + + fn zero_widen_mul(self, rhs: Self) -> Self::D { + let l0 = self & U128_LO_MASK; + let l1 = rhs & U128_LO_MASK; + let h0 = self >> 64; + let h1 = rhs >> 64; + + let p_ll: u128 = l0.overflowing_mul(l1).0; + let p_lh: u128 = l0.overflowing_mul(h1).0; + let p_hl: u128 = h0.overflowing_mul(l1).0; + let p_hh: u128 = h0.overflowing_mul(h1).0; + + let s0 = p_hl + (p_ll >> 64); + let s1 = (p_ll & U128_LO_MASK) + (s0 << 64); + let s2 = p_lh + (s1 >> 64); + + let lo = (p_ll & U128_LO_MASK) + (s2 << 64); + let hi = p_hh + (s0 >> 64) + (s2 >> 64); + + u256 { lo, hi } + } + + fn widen_mul(self, rhs: Self) -> Self::D { + self.zero_widen_mul(rhs) + } + + fn widen_hi(self) -> Self::D { + self.widen() << ::BITS + } +} + +impl HInt for i128 { + type D = i256; + + fn widen(self) -> Self::D { + let mut ret = self.unsigned().zero_widen().signed(); + if self.is_negative() { + ret.hi = u128::MAX; + } + ret + } + + fn zero_widen(self) -> Self::D { + self.unsigned().zero_widen().signed() + } + + fn zero_widen_mul(self, rhs: Self) -> Self::D { + self.unsigned().zero_widen_mul(rhs.unsigned()).signed() + } + + fn widen_mul(self, _rhs: Self) -> Self::D { + unimplemented!("signed i128 widening multiply is not used") + } + + fn widen_hi(self) -> Self::D { + self.widen() << ::BITS + } +} + +impl DInt for u256 { + type H = u128; + + fn lo(self) -> Self::H { + self.lo + } + + fn hi(self) -> Self::H { + self.hi + } +} + +impl DInt for i256 { + type H = i128; + + fn lo(self) -> Self::H { + self.lo as i128 + } + + fn hi(self) -> Self::H { + self.hi as i128 + } +} diff --git a/library/compiler-builtins/libm/src/math/support/big/tests.rs b/library/compiler-builtins/libm/src/math/support/big/tests.rs new file mode 100644 index 0000000000000..d2010f0216e35 --- /dev/null +++ b/library/compiler-builtins/libm/src/math/support/big/tests.rs @@ -0,0 +1,277 @@ +extern crate std; +use std::string::String; +use std::{eprintln, format}; + +use super::{HInt, MinInt, i256, u256}; + +const LOHI_SPLIT: u128 = 0xaaaaaaaaaaaaaaaaffffffffffffffff; + +/// Print a `u256` as hex since we can't add format implementations +fn hexu(v: u256) -> String { + format!("0x{:032x}{:032x}", v.hi, v.lo) +} + +#[test] +fn widen_u128() { + assert_eq!( + u128::MAX.widen(), + u256 { + lo: u128::MAX, + hi: 0 + } + ); + assert_eq!( + LOHI_SPLIT.widen(), + u256 { + lo: LOHI_SPLIT, + hi: 0 + } + ); +} + +#[test] +fn widen_i128() { + assert_eq!((-1i128).widen(), u256::MAX.signed()); + assert_eq!( + (LOHI_SPLIT as i128).widen(), + i256 { + lo: LOHI_SPLIT, + hi: u128::MAX + } + ); + assert_eq!((-1i128).zero_widen().unsigned(), (u128::MAX).widen()); +} + +#[test] +fn widen_mul_u128() { + let tests = [ + ( + u128::MAX / 2, + 2_u128, + u256 { + lo: u128::MAX - 1, + hi: 0, + }, + ), + ( + u128::MAX, + 2_u128, + u256 { + lo: u128::MAX - 1, + hi: 1, + }, + ), + ( + u128::MAX, + u128::MAX, + u256 { + lo: 1, + hi: u128::MAX - 1, + }, + ), + (0, 0, u256::ZERO), + (1234u128, 0, u256::ZERO), + (0, 1234, u256::ZERO), + ]; + + let mut has_errors = false; + let mut add_error = |i, a, b, expected, actual| { + has_errors = true; + eprintln!( + "\ + FAILURE ({i}): {a:#034x} * {b:#034x}\n\ + expected: {}\n\ + got: {}\ + ", + hexu(expected), + hexu(actual) + ); + }; + + for (i, (a, b, exp)) in tests.iter().copied().enumerate() { + let res = a.widen_mul(b); + let res_z = a.zero_widen_mul(b); + assert_eq!(res, res_z); + if res != exp { + add_error(i, a, b, exp, res); + } + } + + assert!(!has_errors); +} + +#[test] +fn not_u256() { + assert_eq!(!u256::ZERO, u256::MAX); +} + +#[test] +fn shr_u256() { + let only_low = [ + 1, + u16::MAX.into(), + u32::MAX.into(), + u64::MAX.into(), + u128::MAX, + ]; + let mut has_errors = false; + + let mut add_error = |a, b, expected, actual| { + has_errors = true; + eprintln!( + "\ + FAILURE: {} >> {b}\n\ + expected: {}\n\ + actual: {}\ + ", + hexu(a), + hexu(expected), + hexu(actual), + ); + }; + + for a in only_low { + for perturb in 0..10 { + let a = a.saturating_add(perturb); + for shift in 0..128 { + let res = a.widen() >> shift; + let expected = (a >> shift).widen(); + if res != expected { + add_error(a.widen(), shift, expected, res); + } + } + } + } + + let check = [ + ( + u256::MAX, + 1, + u256 { + lo: u128::MAX, + hi: u128::MAX >> 1, + }, + ), + ( + u256::MAX, + 5, + u256 { + lo: u128::MAX, + hi: u128::MAX >> 5, + }, + ), + ( + u256::MAX, + 63, + u256 { + lo: u128::MAX, + hi: u64::MAX as u128 | (1 << 64), + }, + ), + ( + u256::MAX, + 64, + u256 { + lo: u128::MAX, + hi: u64::MAX as u128, + }, + ), + ( + u256::MAX, + 65, + u256 { + lo: u128::MAX, + hi: (u64::MAX >> 1) as u128, + }, + ), + ( + u256::MAX, + 127, + u256 { + lo: u128::MAX, + hi: 1, + }, + ), + ( + u256::MAX, + 128, + u256 { + lo: u128::MAX, + hi: 0, + }, + ), + ( + u256::MAX, + 129, + u256 { + lo: u128::MAX >> 1, + hi: 0, + }, + ), + ( + u256::MAX, + 191, + u256 { + lo: u64::MAX as u128 | 1 << 64, + hi: 0, + }, + ), + ( + u256::MAX, + 192, + u256 { + lo: u64::MAX as u128, + hi: 0, + }, + ), + ( + u256::MAX, + 193, + u256 { + lo: u64::MAX as u128 >> 1, + hi: 0, + }, + ), + (u256::MAX, 254, u256 { lo: 0b11, hi: 0 }), + (u256::MAX, 255, u256 { lo: 1, hi: 0 }), + ( + u256 { + hi: LOHI_SPLIT, + lo: 0, + }, + 64, + u256 { + lo: 0xffffffffffffffff0000000000000000, + hi: 0xaaaaaaaaaaaaaaaa, + }, + ), + ]; + + for (input, shift, expected) in check { + let res = input >> shift; + if res != expected { + add_error(input, shift, expected, res); + } + } + + assert!(!has_errors); +} + +#[test] +#[should_panic] +#[cfg(debug_assertions)] +// FIXME(ppc): ppc64le seems to have issues with `should_panic` tests. +#[cfg(not(all(target_arch = "powerpc64", target_endian = "little")))] +fn shr_u256_overflow() { + // Like regular shr, panic on overflow with debug assertions + let _ = u256::MAX >> 256; +} + +#[test] +#[cfg(not(debug_assertions))] +fn shr_u256_overflow() { + // No panic without debug assertions + assert_eq!(u256::MAX >> 256, u256::ZERO); + assert_eq!(u256::MAX >> 257, u256::ZERO); + assert_eq!(u256::MAX >> u32::MAX, u256::ZERO); +} diff --git a/library/compiler-builtins/libm/src/math/support/env.rs b/library/compiler-builtins/libm/src/math/support/env.rs new file mode 100644 index 0000000000000..53ae32f658dbe --- /dev/null +++ b/library/compiler-builtins/libm/src/math/support/env.rs @@ -0,0 +1,130 @@ +//! Support for rounding directions and status flags as specified by IEEE 754. +//! +//! Rust does not support the floating point environment so rounding mode is passed as an argument +//! and status flags are returned as part of the result. There is currently not much support for +//! this; most existing ports from musl use a form of `force_eval!` to raise exceptions, but this +//! has no side effects in Rust. Further, correct behavior relies on elementary operations making +//! use of the correct rounding and raising relevant exceptions, which is not the case for Rust. +//! +//! This module exists so no functionality is lost when porting algorithms that respect floating +//! point environment, and so that some functionality may be tested (that which does not rely on +//! side effects from elementary operations). Full support would require wrappers around basic +//! operations, but there is no plan to add this at the current time. + +/// A value combined with a floating point status. +pub struct FpResult { + pub val: T, + #[cfg_attr(not(feature = "unstable-public-internals"), allow(dead_code))] + pub status: Status, +} + +impl FpResult { + pub fn new(val: T, status: Status) -> Self { + Self { val, status } + } + + /// Return `val` with `Status::OK`. + pub fn ok(val: T) -> Self { + Self { + val, + status: Status::OK, + } + } +} + +/// IEEE 754 rounding mode, excluding the optional `roundTiesToAway` version of nearest. +/// +/// Integer representation comes from what CORE-MATH uses for indexing. +#[cfg_attr(not(feature = "unstable-public-internals"), allow(dead_code))] +#[derive(Clone, Copy, Debug, PartialEq)] +pub enum Round { + /// IEEE 754 nearest, `roundTiesToEven`. + Nearest = 0, + /// IEEE 754 `roundTowardNegative`. + Negative = 1, + /// IEEE 754 `roundTowardPositive`. + Positive = 2, + /// IEEE 754 `roundTowardZero`. + Zero = 3, +} + +/// IEEE 754 exception status flags. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub struct Status(u8); + +impl Status { + /// Default status indicating no errors. + pub const OK: Self = Self(0); + + /// No definable result. + /// + /// Includes: + /// - Any ops on sNaN, with a few exceptions. + /// - `0 * inf`, `inf * 0`. + /// - `fma(0, inf, c)` or `fma(inf, 0, c)`, possibly excluding `c = qNaN`. + /// - `+inf + -inf` and similar (includes subtraction and fma). + /// - `0.0 / 0.0`, `inf / inf` + /// - `remainder(x, y)` if `y == 0.0` or `x == inf`, and neither is NaN. + /// - `sqrt(x)` with `x < 0.0`. + pub const INVALID: Self = Self(1); + + /// Division by zero. + /// + /// The default result for division is +/-inf based on operand sign. For `logB`, the default + /// result is -inf. + /// `x / y` when `x != 0.0` and `y == 0.0`, + #[cfg_attr(not(feature = "unstable-public-internals"), allow(dead_code))] + pub const DIVIDE_BY_ZERO: Self = Self(1 << 2); + + /// The result exceeds the maximum finite value. + /// + /// The default result depends on rounding mode. `Nearest*` rounds to +/- infinity, sign based + /// on the intermediate result. `Zero` rounds to the signed maximum finite. `Positive` and + /// `Negative` round to signed maximum finite in one direction, signed infinity in the other. + #[cfg_attr(not(feature = "unstable-public-internals"), allow(dead_code))] + pub const OVERFLOW: Self = Self(1 << 3); + + /// The result is subnormal and lost precision. + pub const UNDERFLOW: Self = Self(1 << 4); + + /// The finite-precision result does not match that of infinite precision, and the reason + /// is not represented by one of the other flags. + pub const INEXACT: Self = Self(1 << 5); + + /// True if `UNDERFLOW` is set. + #[cfg_attr(not(feature = "unstable-public-internals"), allow(dead_code))] + pub const fn underflow(self) -> bool { + self.0 & Self::UNDERFLOW.0 != 0 + } + + /// True if `OVERFLOW` is set. + #[cfg_attr(not(feature = "unstable-public-internals"), allow(dead_code))] + pub const fn overflow(self) -> bool { + self.0 & Self::OVERFLOW.0 != 0 + } + + pub fn set_underflow(&mut self, val: bool) { + self.set_flag(val, Self::UNDERFLOW); + } + + /// True if `INEXACT` is set. + pub const fn inexact(self) -> bool { + self.0 & Self::INEXACT.0 != 0 + } + + pub fn set_inexact(&mut self, val: bool) { + self.set_flag(val, Self::INEXACT); + } + + fn set_flag(&mut self, val: bool, mask: Self) { + if val { + self.0 |= mask.0; + } else { + self.0 &= !mask.0; + } + } + + pub(crate) const fn with(self, rhs: Self) -> Self { + Self(self.0 | rhs.0) + } +} diff --git a/library/compiler-builtins/libm/src/math/support/feature_detect.rs b/library/compiler-builtins/libm/src/math/support/feature_detect.rs new file mode 100644 index 0000000000000..9ebd434a5f857 --- /dev/null +++ b/library/compiler-builtins/libm/src/math/support/feature_detect.rs @@ -0,0 +1,211 @@ +//! Helpers for runtime target feature detection that are shared across architectures. + +// `AtomicU32` is preferred for a consistent size across targets. +#[cfg(all(target_has_atomic = "ptr", not(target_has_atomic = "32")))] +compile_error!("currently all targets that support `AtomicPtr` also support `AtomicU32`"); + +use core::sync::atomic::{AtomicU32, Ordering}; + +/// Given a list of identifiers, assign each one a unique sequential single-bit mask. +#[allow(unused_macros)] +macro_rules! unique_masks { + ($ty:ty, $($name:ident,)+) => { + #[cfg(test)] + pub const ALL: &[$ty] = &[$($name),+]; + #[cfg(test)] + pub const NAMES: &[&str] = &[$(stringify!($name)),+]; + + unique_masks!(@one; $ty; 0; $($name,)+); + }; + // Matcher for a single value + (@one; $_ty:ty; $_idx:expr;) => {}; + (@one; $ty:ty; $shift:expr; $name:ident, $($tail:tt)*) => { + pub const $name: $ty = 1 << $shift; + // Ensure the top bit is not used since it stores initialized state. + const _: () = assert!($name != (1 << (<$ty>::BITS - 1))); + // Increment the shift and invoke the next + unique_masks!(@one; $ty; $shift + 1; $($tail)*); + }; +} + +/// Call `init` once to choose an implementation, then use it for the rest of the program. +/// +/// - `sig` is the function type. +/// - `init` is an expression called at startup that chooses an implementation and returns a +/// function pointer. +/// - `call` is an expression to call a function returned by `init`, encapsulating any safety +/// preconditions. +/// +/// The type `Func` is available in `init` and `call`. +/// +/// This is effectively our version of an ifunc without linker support. Note that `init` may be +/// called more than once until one completes. +#[allow(unused_macros)] // only used on some architectures +macro_rules! select_once { + ( + sig: fn($($arg:ident: $ArgTy:ty),*) -> $RetTy:ty, + init: $init:expr, + call: $call:expr, + ) => {{ + use core::mem; + use core::sync::atomic::{AtomicPtr, Ordering}; + + type Func = unsafe fn($($arg: $ArgTy),*) -> $RetTy; + + /// Stores a pointer that is immediately jumped to. By default it is an init function + /// that sets FUNC to something else. + static FUNC: AtomicPtr<()> = AtomicPtr::new((initializer as Func) as *mut ()); + + /// Run once to set the function that will be used for all subsequent calls. + fn initializer($($arg: $ArgTy),*) -> $RetTy { + // Select an implementation, ensuring a 'static lifetime. + let fn_ptr: Func = $init(); + FUNC.store(fn_ptr as *mut (), Ordering::Relaxed); + + // Forward the call to the selected function. + $call(fn_ptr) + } + + let raw: *mut () = FUNC.load(Ordering::Relaxed); + + // SAFETY: will only ever be `initializer` or another function pointer that has the + // 'static lifetime. + let fn_ptr: Func = unsafe { mem::transmute::<*mut (), Func>(raw) }; + + $call(fn_ptr) + }} +} + +#[allow(unused_imports)] +pub(crate) use {select_once, unique_masks}; + +use crate::support::cold_path; + +/// Helper for working with bit flags, based on `bitflags`. +#[derive(Clone, Copy, Debug, PartialEq)] +pub struct Flags(u32); + +#[allow(dead_code)] // only used on some architectures +impl Flags { + /// No bits set. + pub const fn empty() -> Self { + Self(0) + } + + /// Create with bits already set. + pub const fn from_bits(val: u32) -> Self { + Self(val) + } + + /// Get the integer representation. + pub fn bits(&self) -> u32 { + self.0 + } + + /// Set any bits in `mask`. + pub fn insert(&mut self, mask: u32) { + self.0 |= mask; + } + + /// Check whether the mask is set. + pub fn contains(&self, mask: u32) -> bool { + self.0 & mask == mask + } + + /// Check whether the nth bit is set. + pub fn test_nth(&self, bit: u32) -> bool { + debug_assert!(bit < u32::BITS, "bit index out-of-bounds"); + self.0 & (1 << bit) != 0 + } +} + +/// Load flags from an atomic value. If the flags have not yet been initialized, call `init` +/// to do so. +/// +/// Note that `init` may run more than once. +#[allow(dead_code)] // only used on some architectures +pub fn get_or_init_flags_cache(cache: &AtomicU32, init: impl FnOnce() -> Flags) -> Flags { + // The top bit is used to indicate that the values have already been set once. + const INITIALIZED: u32 = 1 << 31; + + // Relaxed ops are sufficient since the result should always be the same. + let mut flags = Flags::from_bits(cache.load(Ordering::Relaxed)); + + if !flags.contains(INITIALIZED) { + // Without this, `init` is inlined and the bit check gets wrapped in `init`'s lengthy + // prologue/epilogue. Cold pathing gives a preferable load->test->?jmp->ret. + cold_path(); + + flags = init(); + debug_assert!( + !flags.contains(INITIALIZED), + "initialized bit shouldn't be set" + ); + flags.insert(INITIALIZED); + cache.store(flags.bits(), Ordering::Relaxed); + } + + flags +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn unique_masks() { + unique_masks! { + u32, + V0, + V1, + V2, + } + assert_eq!(V0, 1u32 << 0); + assert_eq!(V1, 1u32 << 1); + assert_eq!(V2, 1u32 << 2); + assert_eq!(ALL, [V0, V1, V2]); + assert_eq!(NAMES, ["V0", "V1", "V2"]); + } + + #[test] + fn flag_cache_is_used() { + // Sanity check that flags are only ever set once + static CACHE: AtomicU32 = AtomicU32::new(0); + + let mut f1 = Flags::from_bits(0x1); + let f2 = Flags::from_bits(0x2); + + let r1 = get_or_init_flags_cache(&CACHE, || f1); + let r2 = get_or_init_flags_cache(&CACHE, || f2); + + f1.insert(1 << 31); // init bit + + assert_eq!(r1, f1); + assert_eq!(r2, f1); + } + + #[test] + fn select_cache_is_used() { + // Sanity check that cache is used + static CALLED: AtomicU32 = AtomicU32::new(0); + + fn inner() { + fn nop() {} + + select_once! { + sig: fn() -> (), + init: || { + CALLED.fetch_add(1, Ordering::Relaxed); + nop + }, + call: |fn_ptr: Func| unsafe { fn_ptr() }, + } + } + + // `init` should only have been called once. + inner(); + assert_eq!(CALLED.load(Ordering::Relaxed), 1); + inner(); + assert_eq!(CALLED.load(Ordering::Relaxed), 1); + } +} diff --git a/library/compiler-builtins/libm/src/math/support/float_traits.rs b/library/compiler-builtins/libm/src/math/support/float_traits.rs new file mode 100644 index 0000000000000..4c866ef10bdd7 --- /dev/null +++ b/library/compiler-builtins/libm/src/math/support/float_traits.rs @@ -0,0 +1,551 @@ +#![allow(unknown_lints)] // FIXME(msrv) we shouldn't need this + +use core::{fmt, mem, ops}; + +use super::int_traits::{CastFrom, Int, MinInt}; + +/// Trait for some basic operations on floats +// #[allow(dead_code)] +pub trait Float: + Copy + + fmt::Debug + + PartialEq + + PartialOrd + + ops::AddAssign + + ops::MulAssign + + ops::Add + + ops::Sub + + ops::Mul + + ops::Div + + ops::Rem + + ops::Neg + + 'static +{ + /// A uint of the same width as the float + type Int: Int; + + /// A int of the same width as the float + type SignedInt: Int + + MinInt + + ops::Neg; + + const ZERO: Self; + const NEG_ZERO: Self; + const ONE: Self; + const NEG_ONE: Self; + const INFINITY: Self; + const NEG_INFINITY: Self; + const NAN: Self; + const NEG_NAN: Self; + const MAX: Self; + const MIN: Self; + const EPSILON: Self; + const PI: Self; + const NEG_PI: Self; + const FRAC_PI_2: Self; + + const MIN_POSITIVE_NORMAL: Self; + + /// The bitwidth of the float type + const BITS: u32; + + /// The bitwidth of the significand + const SIG_BITS: u32; + + /// The bitwidth of the exponent + const EXP_BITS: u32 = Self::BITS - Self::SIG_BITS - 1; + + /// The saturated (maximum bitpattern) value of the exponent, i.e. the infinite + /// representation. + /// + /// This shifted fully right, use `EXP_MASK` for the shifted value. + const EXP_SAT: u32 = (1 << Self::EXP_BITS) - 1; + + /// The exponent bias value + const EXP_BIAS: u32 = Self::EXP_SAT >> 1; + + /// Maximum unbiased exponent value. + const EXP_MAX: i32 = Self::EXP_BIAS as i32; + + /// Minimum *NORMAL* unbiased exponent value. + const EXP_MIN: i32 = -(Self::EXP_MAX - 1); + + /// Minimum subnormal exponent value. + const EXP_MIN_SUBNORM: i32 = Self::EXP_MIN - Self::SIG_BITS as i32; + + /// A mask for the sign bit + const SIGN_MASK: Self::Int; + + /// A mask for the significand + const SIG_MASK: Self::Int; + + /// A mask for the exponent + const EXP_MASK: Self::Int; + + /// The implicit bit of the float format + const IMPLICIT_BIT: Self::Int; + + /// Returns `self` transmuted to `Self::Int` + fn to_bits(self) -> Self::Int; + + /// Returns `self` transmuted to `Self::SignedInt` + #[allow(dead_code)] + fn to_bits_signed(self) -> Self::SignedInt { + self.to_bits().signed() + } + + /// Check bitwise equality. + #[allow(dead_code)] + fn biteq(self, rhs: Self) -> bool { + self.to_bits() == rhs.to_bits() + } + + /// Checks if two floats have the same bit representation. *Except* for NaNs! NaN can be + /// represented in multiple different ways. + /// + /// This method returns `true` if two NaNs are compared. Use [`biteq`](Self::biteq) instead + /// if `NaN` should not be treated separately. + #[allow(dead_code)] + fn eq_repr(self, rhs: Self) -> bool { + if self.is_nan() && rhs.is_nan() { + true + } else { + self.biteq(rhs) + } + } + + /// Returns true if the value is NaN. + fn is_nan(self) -> bool; + + /// Returns true if the value is +inf or -inf. + fn is_infinite(self) -> bool; + + /// Returns true if the sign is negative. Extracts the sign bit regardless of zero or NaN. + fn is_sign_negative(self) -> bool; + + /// Returns true if the sign is positive. Extracts the sign bit regardless of zero or NaN. + fn is_sign_positive(self) -> bool { + !self.is_sign_negative() + } + + /// Returns if `self` is subnormal. + #[allow(dead_code)] + fn is_subnormal(self) -> bool { + (self.to_bits() & Self::EXP_MASK) == Self::Int::ZERO + } + + /// Returns the exponent, not adjusting for bias, not accounting for subnormals or zero. + fn ex(self) -> u32 { + u32::cast_from(self.to_bits() >> Self::SIG_BITS) & Self::EXP_SAT + } + + /// Extract the exponent and adjust it for bias, not accounting for subnormals or zero. + fn exp_unbiased(self) -> i32 { + self.ex().signed() - (Self::EXP_BIAS as i32) + } + + /// Returns the significand with no implicit bit (or the "fractional" part) + #[allow(dead_code)] + fn frac(self) -> Self::Int { + self.to_bits() & Self::SIG_MASK + } + + /// Returns a `Self::Int` transmuted back to `Self` + fn from_bits(a: Self::Int) -> Self; + + /// Constructs a `Self` from its parts. Inputs are treated as bits and shifted into position. + fn from_parts(negative: bool, exponent: u32, significand: Self::Int) -> Self { + let sign = if negative { + Self::Int::ONE + } else { + Self::Int::ZERO + }; + Self::from_bits( + (sign << (Self::BITS - 1)) + | (Self::Int::cast_from(exponent & Self::EXP_SAT) << Self::SIG_BITS) + | (significand & Self::SIG_MASK), + ) + } + + #[allow(dead_code)] + fn abs(self) -> Self; + + /// Returns a number composed of the magnitude of self and the sign of sign. + fn copysign(self, other: Self) -> Self; + + /// Fused multiply add, rounding once. + fn fma(self, y: Self, z: Self) -> Self; + + /// Returns (normalized exponent, normalized significand) + #[allow(dead_code)] + fn normalize(significand: Self::Int) -> (i32, Self::Int); + + /// Returns a number that represents the sign of self. + #[allow(dead_code)] + fn signum(self) -> Self { + if self.is_nan() { + self + } else { + Self::ONE.copysign(self) + } + } +} + +/// Access the associated `Int` type from a float (helper to avoid ambiguous associated types). +pub type IntTy = ::Int; + +macro_rules! float_impl { + ( + $ty:ident, + $ity:ident, + $sity:ident, + $bits:expr, + $significand_bits:expr, + $from_bits:path, + $to_bits:path, + $fma_fn:ident, + $fma_intrinsic:ident + ) => { + impl Float for $ty { + type Int = $ity; + type SignedInt = $sity; + + const ZERO: Self = 0.0; + const NEG_ZERO: Self = -0.0; + const ONE: Self = 1.0; + const NEG_ONE: Self = -1.0; + const INFINITY: Self = Self::INFINITY; + const NEG_INFINITY: Self = Self::NEG_INFINITY; + const NAN: Self = Self::NAN; + // NAN isn't guaranteed to be positive but it usually is. We only use this for + // tests. + const NEG_NAN: Self = $from_bits($to_bits(Self::NAN) | Self::SIGN_MASK); + const MAX: Self = -Self::MIN; + // Sign bit set, saturated mantissa, saturated exponent with last bit zeroed + const MIN: Self = $from_bits(Self::Int::MAX & !(1 << Self::SIG_BITS)); + const EPSILON: Self = <$ty>::EPSILON; + + // Exponent is a 1 in the LSB + const MIN_POSITIVE_NORMAL: Self = $from_bits(1 << Self::SIG_BITS); + + const PI: Self = core::$ty::consts::PI; + const NEG_PI: Self = -Self::PI; + const FRAC_PI_2: Self = core::$ty::consts::FRAC_PI_2; + + const BITS: u32 = $bits; + const SIG_BITS: u32 = $significand_bits; + + const SIGN_MASK: Self::Int = 1 << (Self::BITS - 1); + const SIG_MASK: Self::Int = (1 << Self::SIG_BITS) - 1; + const EXP_MASK: Self::Int = !(Self::SIGN_MASK | Self::SIG_MASK); + const IMPLICIT_BIT: Self::Int = 1 << Self::SIG_BITS; + + fn to_bits(self) -> Self::Int { + self.to_bits() + } + fn is_nan(self) -> bool { + self.is_nan() + } + fn is_infinite(self) -> bool { + self.is_infinite() + } + fn is_sign_negative(self) -> bool { + self.is_sign_negative() + } + fn from_bits(a: Self::Int) -> Self { + Self::from_bits(a) + } + fn abs(self) -> Self { + cfg_if! { + // FIXME(msrv): `abs` is available in `core` starting with 1.85. + if #[cfg(intrinsics_enabled)] { + self.abs() + } else { + super::super::generic::fabs(self) + } + } + } + fn copysign(self, other: Self) -> Self { + cfg_if! { + // FIXME(msrv): `copysign` is available in `core` starting with 1.85. + if #[cfg(intrinsics_enabled)] { + self.copysign(other) + } else { + super::super::generic::copysign(self, other) + } + } + } + fn fma(self, y: Self, z: Self) -> Self { + cfg_if! { + // fma is not yet available in `core` + if #[cfg(intrinsics_enabled)] { + unsafe{ core::intrinsics::$fma_intrinsic(self, y, z) } + } else { + super::super::$fma_fn(self, y, z) + } + } + } + fn normalize(significand: Self::Int) -> (i32, Self::Int) { + let shift = significand.leading_zeros().wrapping_sub(Self::EXP_BITS); + ( + 1i32.wrapping_sub(shift as i32), + significand << shift as Self::Int, + ) + } + } + }; +} + +#[cfg(f16_enabled)] +float_impl!( + f16, + u16, + i16, + 16, + 10, + f16::from_bits, + f16::to_bits, + fmaf16, + fmaf16 +); +float_impl!( + f32, + u32, + i32, + 32, + 23, + f32_from_bits, + f32_to_bits, + fmaf, + fmaf32 +); +float_impl!( + f64, + u64, + i64, + 64, + 52, + f64_from_bits, + f64_to_bits, + fma, + fmaf64 +); +#[cfg(f128_enabled)] +float_impl!( + f128, + u128, + i128, + 128, + 112, + f128::from_bits, + f128::to_bits, + fmaf128, + fmaf128 +); + +/* FIXME(msrv): vendor some things that are not const stable at our MSRV */ + +/// `f32::from_bits` +#[allow(unnecessary_transmutes)] // lint appears in newer versions of Rust +pub const fn f32_from_bits(bits: u32) -> f32 { + // SAFETY: POD cast with no preconditions + unsafe { mem::transmute::(bits) } +} + +/// `f32::to_bits` +#[allow(unnecessary_transmutes)] // lint appears in newer versions of Rust +pub const fn f32_to_bits(x: f32) -> u32 { + // SAFETY: POD cast with no preconditions + unsafe { mem::transmute::(x) } +} + +/// `f64::from_bits` +#[allow(unnecessary_transmutes)] // lint appears in newer versions of Rust +pub const fn f64_from_bits(bits: u64) -> f64 { + // SAFETY: POD cast with no preconditions + unsafe { mem::transmute::(bits) } +} + +/// `f64::to_bits` +#[allow(unnecessary_transmutes)] // lint appears in newer versions of Rust +pub const fn f64_to_bits(x: f64) -> u64 { + // SAFETY: POD cast with no preconditions + unsafe { mem::transmute::(x) } +} + +/// Trait for floats twice the bit width of another integer. +pub trait DFloat: Float { + /// Float that is half the bit width of the floatthis trait is implemented for. + type H: HFloat; + + /// Narrow the float type. + fn narrow(self) -> Self::H; +} + +/// Trait for floats half the bit width of another float. +pub trait HFloat: Float { + /// Float that is double the bit width of the float this trait is implemented for. + type D: DFloat; + + /// Widen the float type. + fn widen(self) -> Self::D; +} + +macro_rules! impl_d_float { + ($($X:ident $D:ident),*) => { + $( + impl DFloat for $D { + type H = $X; + + fn narrow(self) -> Self::H { + self as $X + } + } + )* + }; +} + +macro_rules! impl_h_float { + ($($H:ident $X:ident),*) => { + $( + impl HFloat for $H { + type D = $X; + + fn widen(self) -> Self::D { + self as $X + } + } + )* + }; +} + +impl_d_float!(f32 f64); +#[cfg(f16_enabled)] +impl_d_float!(f16 f32); +#[cfg(f128_enabled)] +impl_d_float!(f64 f128); + +impl_h_float!(f32 f64); +#[cfg(f16_enabled)] +impl_h_float!(f16 f32); +#[cfg(f128_enabled)] +impl_h_float!(f64 f128); + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + #[cfg(f16_enabled)] + fn check_f16() { + // Constants + assert_eq!(f16::EXP_SAT, 0b11111); + assert_eq!(f16::EXP_BIAS, 15); + assert_eq!(f16::EXP_MAX, 15); + assert_eq!(f16::EXP_MIN, -14); + assert_eq!(f16::EXP_MIN_SUBNORM, -24); + + // `exp_unbiased` + assert_eq!(f16::FRAC_PI_2.exp_unbiased(), 0); + assert_eq!((1.0f16 / 2.0).exp_unbiased(), -1); + assert_eq!(f16::MAX.exp_unbiased(), 15); + assert_eq!(f16::MIN.exp_unbiased(), 15); + assert_eq!(f16::MIN_POSITIVE.exp_unbiased(), -14); + // This is a convenience method and not ldexp, `exp_unbiased` does not return correct + // results for zero and subnormals. + assert_eq!(f16::ZERO.exp_unbiased(), -15); + assert_eq!(f16::from_bits(0x1).exp_unbiased(), -15); + assert_eq!(f16::MIN_POSITIVE, f16::MIN_POSITIVE_NORMAL); + + // `from_parts` + assert_biteq!(f16::from_parts(true, f16::EXP_BIAS, 0), -1.0f16); + assert_biteq!(f16::from_parts(false, 0, 1), f16::from_bits(0x1)); + } + + #[test] + fn check_f32() { + // Constants + assert_eq!(f32::EXP_SAT, 0b11111111); + assert_eq!(f32::EXP_BIAS, 127); + assert_eq!(f32::EXP_MAX, 127); + assert_eq!(f32::EXP_MIN, -126); + assert_eq!(f32::EXP_MIN_SUBNORM, -149); + + // `exp_unbiased` + assert_eq!(f32::FRAC_PI_2.exp_unbiased(), 0); + assert_eq!((1.0f32 / 2.0).exp_unbiased(), -1); + assert_eq!(f32::MAX.exp_unbiased(), 127); + assert_eq!(f32::MIN.exp_unbiased(), 127); + assert_eq!(f32::MIN_POSITIVE.exp_unbiased(), -126); + // This is a convenience method and not ldexp, `exp_unbiased` does not return correct + // results for zero and subnormals. + assert_eq!(f32::ZERO.exp_unbiased(), -127); + assert_eq!(f32::from_bits(0x1).exp_unbiased(), -127); + assert_eq!(f32::MIN_POSITIVE, f32::MIN_POSITIVE_NORMAL); + + // `from_parts` + assert_biteq!(f32::from_parts(true, f32::EXP_BIAS, 0), -1.0f32); + assert_biteq!( + f32::from_parts(false, 10 + f32::EXP_BIAS, 0), + hf32!("0x1p10") + ); + assert_biteq!(f32::from_parts(false, 0, 1), f32::from_bits(0x1)); + } + + #[test] + fn check_f64() { + // Constants + assert_eq!(f64::EXP_SAT, 0b11111111111); + assert_eq!(f64::EXP_BIAS, 1023); + assert_eq!(f64::EXP_MAX, 1023); + assert_eq!(f64::EXP_MIN, -1022); + assert_eq!(f64::EXP_MIN_SUBNORM, -1074); + + // `exp_unbiased` + assert_eq!(f64::FRAC_PI_2.exp_unbiased(), 0); + assert_eq!((1.0f64 / 2.0).exp_unbiased(), -1); + assert_eq!(f64::MAX.exp_unbiased(), 1023); + assert_eq!(f64::MIN.exp_unbiased(), 1023); + assert_eq!(f64::MIN_POSITIVE.exp_unbiased(), -1022); + // This is a convenience method and not ldexp, `exp_unbiased` does not return correct + // results for zero and subnormals. + assert_eq!(f64::ZERO.exp_unbiased(), -1023); + assert_eq!(f64::from_bits(0x1).exp_unbiased(), -1023); + assert_eq!(f64::MIN_POSITIVE, f64::MIN_POSITIVE_NORMAL); + + // `from_parts` + assert_biteq!(f64::from_parts(true, f64::EXP_BIAS, 0), -1.0f64); + assert_biteq!( + f64::from_parts(false, 10 + f64::EXP_BIAS, 0), + hf64!("0x1p10") + ); + assert_biteq!(f64::from_parts(false, 0, 1), f64::from_bits(0x1)); + } + + #[test] + #[cfg(f128_enabled)] + fn check_f128() { + // Constants + assert_eq!(f128::EXP_SAT, 0b111111111111111); + assert_eq!(f128::EXP_BIAS, 16383); + assert_eq!(f128::EXP_MAX, 16383); + assert_eq!(f128::EXP_MIN, -16382); + assert_eq!(f128::EXP_MIN_SUBNORM, -16494); + + // `exp_unbiased` + assert_eq!(f128::FRAC_PI_2.exp_unbiased(), 0); + assert_eq!((1.0f128 / 2.0).exp_unbiased(), -1); + assert_eq!(f128::MAX.exp_unbiased(), 16383); + assert_eq!(f128::MIN.exp_unbiased(), 16383); + assert_eq!(f128::MIN_POSITIVE.exp_unbiased(), -16382); + // This is a convenience method and not ldexp, `exp_unbiased` does not return correct + // results for zero and subnormals. + assert_eq!(f128::ZERO.exp_unbiased(), -16383); + assert_eq!(f128::from_bits(0x1).exp_unbiased(), -16383); + assert_eq!(f128::MIN_POSITIVE, f128::MIN_POSITIVE_NORMAL); + + // `from_parts` + assert_biteq!(f128::from_parts(true, f128::EXP_BIAS, 0), -1.0f128); + assert_biteq!(f128::from_parts(false, 0, 1), f128::from_bits(0x1)); + } +} diff --git a/library/compiler-builtins/libm/src/math/support/hex_float.rs b/library/compiler-builtins/libm/src/math/support/hex_float.rs new file mode 100644 index 0000000000000..85569d98aef48 --- /dev/null +++ b/library/compiler-builtins/libm/src/math/support/hex_float.rs @@ -0,0 +1,1181 @@ +//! Utilities for working with hex float formats. + +use core::fmt; + +use super::{Float, Round, Status, f32_from_bits, f64_from_bits}; + +/// Construct a 16-bit float from hex float representation (C-style) +#[cfg(f16_enabled)] +pub const fn hf16(s: &str) -> f16 { + match parse_hex_exact(s, 16, 10) { + Ok(bits) => f16::from_bits(bits as u16), + Err(HexFloatParseError(s)) => panic!("{}", s), + } +} + +/// Construct a 32-bit float from hex float representation (C-style) +#[allow(unused)] +pub const fn hf32(s: &str) -> f32 { + match parse_hex_exact(s, 32, 23) { + Ok(bits) => f32_from_bits(bits as u32), + Err(HexFloatParseError(s)) => panic!("{}", s), + } +} + +/// Construct a 64-bit float from hex float representation (C-style) +pub const fn hf64(s: &str) -> f64 { + match parse_hex_exact(s, 64, 52) { + Ok(bits) => f64_from_bits(bits as u64), + Err(HexFloatParseError(s)) => panic!("{}", s), + } +} + +/// Construct a 128-bit float from hex float representation (C-style) +#[cfg(f128_enabled)] +pub const fn hf128(s: &str) -> f128 { + match parse_hex_exact(s, 128, 112) { + Ok(bits) => f128::from_bits(bits), + Err(HexFloatParseError(s)) => panic!("{}", s), + } +} +#[derive(Copy, Clone, Debug)] +pub struct HexFloatParseError(&'static str); + +/// Parses any float to its bitwise representation, returning an error if it cannot be represented exactly +pub const fn parse_hex_exact( + s: &str, + bits: u32, + sig_bits: u32, +) -> Result { + match parse_any(s, bits, sig_bits, Round::Nearest) { + Err(e) => Err(e), + Ok((bits, Status::OK)) => Ok(bits), + Ok((_, status)) if status.overflow() => Err(HexFloatParseError("the value is too huge")), + Ok((_, status)) if status.underflow() => Err(HexFloatParseError("the value is too tiny")), + Ok((_, status)) if status.inexact() => Err(HexFloatParseError("the value is too precise")), + Ok(_) => unreachable!(), + } +} + +/// Parse any float from hex to its bitwise representation. +pub const fn parse_any( + s: &str, + bits: u32, + sig_bits: u32, + round: Round, +) -> Result<(u128, Status), HexFloatParseError> { + let mut b = s.as_bytes(); + + if sig_bits > 119 || bits > 128 || bits < sig_bits + 3 || bits > sig_bits + 30 { + return Err(HexFloatParseError("unsupported target float configuration")); + } + + let neg = matches!(b, [b'-', ..]); + if let &[b'-' | b'+', ref rest @ ..] = b { + b = rest; + } + + let sign_bit = 1 << (bits - 1); + let quiet_bit = 1 << (sig_bits - 1); + let nan = sign_bit - quiet_bit; + let inf = nan - quiet_bit; + + let (mut x, status) = match *b { + [b'i' | b'I', b'n' | b'N', b'f' | b'F'] => (inf, Status::OK), + [b'n' | b'N', b'a' | b'A', b'n' | b'N'] => (nan, Status::OK), + [b'0', b'x' | b'X', ref rest @ ..] => { + let round = match (neg, round) { + // parse("-x", Round::Positive) == -parse("x", Round::Negative) + (true, Round::Positive) => Round::Negative, + (true, Round::Negative) => Round::Positive, + // rounding toward nearest or zero are symmetric + (true, Round::Nearest | Round::Zero) | (false, _) => round, + }; + match parse_finite(rest, bits, sig_bits, round) { + Err(e) => return Err(e), + Ok(res) => res, + } + } + _ => return Err(HexFloatParseError("no hex indicator")), + }; + + if neg { + x ^= sign_bit; + } + + Ok((x, status)) +} + +const fn parse_finite( + b: &[u8], + bits: u32, + sig_bits: u32, + rounding_mode: Round, +) -> Result<(u128, Status), HexFloatParseError> { + let exp_bits: u32 = bits - sig_bits - 1; + let max_msb: i32 = (1 << (exp_bits - 1)) - 1; + // The exponent of one ULP in the subnormals + let min_lsb: i32 = 1 - max_msb - sig_bits as i32; + + let (mut sig, mut exp) = match parse_hex(b) { + Err(e) => return Err(e), + Ok(Parsed { sig: 0, .. }) => return Ok((0, Status::OK)), + Ok(Parsed { sig, exp }) => (sig, exp), + }; + + let mut round_bits = u128_ilog2(sig) as i32 - sig_bits as i32; + + // Round at least up to min_lsb + if exp < min_lsb - round_bits { + round_bits = min_lsb - exp; + } + + let mut status = Status::OK; + + exp += round_bits; + + if round_bits > 0 { + // first, prepare for rounding exactly two bits + if round_bits == 1 { + sig <<= 1; + } else if round_bits > 2 { + sig = shr_odd_rounding(sig, (round_bits - 2) as u32); + } + + if sig & 0b11 != 0 { + status = Status::INEXACT; + } + + sig = shr2_round(sig, rounding_mode); + } else if round_bits < 0 { + sig <<= -round_bits; + } + + // The parsed value is X = sig * 2^exp + // Expressed as a multiple U of the smallest subnormal value: + // X = U * 2^min_lsb, so U = sig * 2^(exp-min_lsb) + let uexp = (exp - min_lsb) as u128; + let uexp = uexp << sig_bits; + + // Note that it is possible for the exponent bits to equal 2 here + // if the value rounded up, but that means the mantissa is all zeroes + // so the value is still correct + debug_assert!(sig <= 2 << sig_bits); + + let inf = ((1 << exp_bits) - 1) << sig_bits; + + let bits = match sig.checked_add(uexp) { + Some(bits) if bits < inf => { + // inexact subnormal or zero? + if status.inexact() && bits < (1 << sig_bits) { + status = status.with(Status::UNDERFLOW); + } + bits + } + _ => { + // overflow to infinity + status = status.with(Status::OVERFLOW).with(Status::INEXACT); + match rounding_mode { + Round::Positive | Round::Nearest => inf, + Round::Negative | Round::Zero => inf - 1, + } + } + }; + Ok((bits, status)) +} + +/// Shift right, rounding all inexact divisions to the nearest odd number +/// E.g. (0 >> 4) -> 0, (1..=31 >> 4) -> 1, (32 >> 4) -> 2, ... +/// +/// Useful for reducing a number before rounding the last two bits, since +/// the result of the final rounding is preserved for all rounding modes. +const fn shr_odd_rounding(x: u128, k: u32) -> u128 { + if k < 128 { + let inexact = x.trailing_zeros() < k; + (x >> k) | (inexact as u128) + } else { + (x != 0) as u128 + } +} + +/// Divide by 4, rounding with the given mode +const fn shr2_round(mut x: u128, round: Round) -> u128 { + let t = (x as u32) & 0b111; + x >>= 2; + match round { + // Look-up-table on the last three bits for when to round up + Round::Nearest => x + ((0b11001000_u8 >> t) & 1) as u128, + + Round::Negative => x, + Round::Zero => x, + Round::Positive => x + (t & 0b11 != 0) as u128, + } +} + +/// A parsed finite and unsigned floating point number. +struct Parsed { + /// Absolute value sig * 2^exp + sig: u128, + exp: i32, +} + +/// Parse a hexadecimal float x +const fn parse_hex(mut b: &[u8]) -> Result { + let mut sig: u128 = 0; + let mut exp: i32 = 0; + + let mut seen_point = false; + let mut some_digits = false; + let mut inexact = false; + + while let &[c, ref rest @ ..] = b { + b = rest; + + match c { + b'.' => { + if seen_point { + return Err(HexFloatParseError( + "unexpected '.' parsing fractional digits", + )); + } + seen_point = true; + continue; + } + b'p' | b'P' => break, + c => { + let digit = match hex_digit(c) { + Some(d) => d, + None => return Err(HexFloatParseError("expected hexadecimal digit")), + }; + some_digits = true; + + if (sig >> 124) == 0 { + sig <<= 4; + sig |= digit as u128; + } else { + // FIXME: it is technically possible for exp to overflow if parsing a string with >500M digits + exp += 4; + inexact |= digit != 0; + } + // Up until the fractional point, the value grows + // with more digits, but after it the exponent is + // compensated to match. + if seen_point { + exp -= 4; + } + } + } + } + // If we've set inexact, the exact value has more than 125 + // significant bits, and lies somewhere between sig and sig + 1. + // Because we'll round off at least two of the trailing bits, + // setting the last bit gives correct rounding for inexact values. + sig |= inexact as u128; + + if !some_digits { + return Err(HexFloatParseError("at least one digit is required")); + }; + + some_digits = false; + + let negate_exp = matches!(b, [b'-', ..]); + if let &[b'-' | b'+', ref rest @ ..] = b { + b = rest; + } + + let mut pexp: u32 = 0; + while let &[c, ref rest @ ..] = b { + b = rest; + let digit = match dec_digit(c) { + Some(d) => d, + None => return Err(HexFloatParseError("expected decimal digit")), + }; + some_digits = true; + pexp = pexp.saturating_mul(10); + pexp += digit as u32; + } + + if !some_digits { + return Err(HexFloatParseError( + "at least one exponent digit is required", + )); + }; + + { + let e; + if negate_exp { + e = (exp as i64) - (pexp as i64); + } else { + e = (exp as i64) + (pexp as i64); + }; + + exp = if e < i32::MIN as i64 { + i32::MIN + } else if e > i32::MAX as i64 { + i32::MAX + } else { + e as i32 + }; + } + /* FIXME(msrv): once MSRV >= 1.66, replace the above workaround block with: + if negate_exp { + exp = exp.saturating_sub_unsigned(pexp); + } else { + exp = exp.saturating_add_unsigned(pexp); + }; + */ + + Ok(Parsed { sig, exp }) +} + +const fn dec_digit(c: u8) -> Option { + match c { + b'0'..=b'9' => Some(c - b'0'), + _ => None, + } +} + +const fn hex_digit(c: u8) -> Option { + match c { + b'0'..=b'9' => Some(c - b'0'), + b'a'..=b'f' => Some(c - b'a' + 10), + b'A'..=b'F' => Some(c - b'A' + 10), + _ => None, + } +} + +/* FIXME(msrv): vendor some things that are not const stable at our MSRV */ + +/// `u128::ilog2` +const fn u128_ilog2(v: u128) -> u32 { + assert!(v != 0); + u128::BITS - 1 - v.leading_zeros() +} + +/// Format a floating point number as its IEEE hex (`%a`) representation. +pub struct Hexf(pub F); + +// Adapted from https://github.com/ericseppanen/hexfloat2/blob/a5c27932f0ff/src/format.rs +#[cfg(not(feature = "compiler-builtins"))] +fn fmt_any_hex(x: &F, f: &mut fmt::Formatter<'_>) -> fmt::Result { + if x.is_sign_negative() { + write!(f, "-")?; + } + + if x.is_nan() { + return write!(f, "NaN"); + } else if x.is_infinite() { + return write!(f, "inf"); + } else if *x == F::ZERO { + return write!(f, "0x0p+0"); + } + + let mut exponent = x.exp_unbiased(); + let sig = x.to_bits() & F::SIG_MASK; + + let bias = F::EXP_BIAS as i32; + // The mantissa MSB needs to be shifted up to the nearest nibble. + let mshift = (4 - (F::SIG_BITS % 4)) % 4; + let sig = sig << mshift; + // The width is rounded up to the nearest char (4 bits) + let mwidth = (F::SIG_BITS as usize + 3) / 4; + let leading = if exponent == -bias { + // subnormal number means we shift our output by 1 bit. + exponent += 1; + "0." + } else { + "1." + }; + + write!(f, "0x{leading}{sig:0mwidth$x}p{exponent:+}") +} + +#[cfg(feature = "compiler-builtins")] +fn fmt_any_hex(_x: &F, _f: &mut fmt::Formatter<'_>) -> fmt::Result { + unimplemented!() +} + +impl fmt::LowerHex for Hexf { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + cfg_if! { + if #[cfg(feature = "compiler-builtins")] { + let _ = f; + unimplemented!() + } else { + fmt_any_hex(&self.0, f) + } + } + } +} + +impl fmt::LowerHex for Hexf<(F, F)> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + cfg_if! { + if #[cfg(feature = "compiler-builtins")] { + let _ = f; + unimplemented!() + } else { + write!(f, "({:x}, {:x})", Hexf(self.0.0), Hexf(self.0.1)) + } + } + } +} + +impl fmt::LowerHex for Hexf<(F, i32)> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + cfg_if! { + if #[cfg(feature = "compiler-builtins")] { + let _ = f; + unimplemented!() + } else { + write!(f, "({:x}, {:x})", Hexf(self.0.0), Hexf(self.0.1)) + } + } + } +} + +impl fmt::LowerHex for Hexf { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + cfg_if! { + if #[cfg(feature = "compiler-builtins")] { + let _ = f; + unimplemented!() + } else { + fmt::LowerHex::fmt(&self.0, f) + } + } + } +} + +impl fmt::Debug for Hexf +where + Hexf: fmt::LowerHex, +{ + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + cfg_if! { + if #[cfg(feature = "compiler-builtins")] { + let _ = f; + unimplemented!() + } else { + fmt::LowerHex::fmt(self, f) + } + } + } +} + +impl fmt::Display for Hexf +where + Hexf: fmt::LowerHex, +{ + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + cfg_if! { + if #[cfg(feature = "compiler-builtins")] { + let _ = f; + unimplemented!() + } else { + fmt::LowerHex::fmt(self, f) + } + } + } +} + +#[cfg(test)] +mod parse_tests { + extern crate std; + use std::{format, println}; + + use super::*; + + #[cfg(f16_enabled)] + fn rounding_properties(s: &str) -> Result<(), HexFloatParseError> { + let (xd, s0) = parse_any(s, 16, 10, Round::Negative)?; + let (xu, s1) = parse_any(s, 16, 10, Round::Positive)?; + let (xz, s2) = parse_any(s, 16, 10, Round::Zero)?; + let (xn, s3) = parse_any(s, 16, 10, Round::Nearest)?; + + // FIXME: A value between the least normal and largest subnormal + // could have underflow status depend on rounding mode. + + if let Status::OK = s0 { + // an exact result is the same for all rounding modes + assert_eq!(s0, s1); + assert_eq!(s0, s2); + assert_eq!(s0, s3); + + assert_eq!(xd, xu); + assert_eq!(xd, xz); + assert_eq!(xd, xn); + } else { + assert!([s0, s1, s2, s3].into_iter().all(Status::inexact)); + + let xd = f16::from_bits(xd as u16); + let xu = f16::from_bits(xu as u16); + let xz = f16::from_bits(xz as u16); + let xn = f16::from_bits(xn as u16); + + assert_biteq!(xd.next_up(), xu, "s={s}, xd={xd:?}, xu={xu:?}"); + + let signs = [xd, xu, xz, xn].map(f16::is_sign_negative); + + if signs == [true; 4] { + assert_biteq!(xz, xu); + } else { + assert_eq!(signs, [false; 4]); + assert_biteq!(xz, xd); + } + + if xn.to_bits() != xd.to_bits() { + assert_biteq!(xn, xu); + } + } + Ok(()) + } + #[test] + #[cfg(f16_enabled)] + fn test_rounding() { + let n = 1_i32 << 14; + for i in -n..n { + let u = i.rotate_right(11) as u32; + let s = format!("{}", Hexf(f32::from_bits(u))); + assert!(rounding_properties(&s).is_ok()); + } + } + + #[test] + fn test_parse_any() { + for k in -149..=127 { + let s = format!("0x1p{k}"); + let x = hf32(&s); + let y = if k < 0 { + 0.5f32.powi(-k) + } else { + 2.0f32.powi(k) + }; + assert_eq!(x, y); + } + + let mut s = *b"0x.0000000p-121"; + for e in 0..40 { + for k in 0..(1 << 15) { + let expected = f32::from_bits(k) * 2.0f32.powi(e); + let x = hf32(std::str::from_utf8(&s).unwrap()); + assert_eq!( + x.to_bits(), + expected.to_bits(), + "\ + e={e}\n\ + k={k}\n\ + x={x}\n\ + expected={expected}\n\ + s={}\n\ + f32::from_bits(k)={}\n\ + 2.0f32.powi(e)={}\ + ", + std::str::from_utf8(&s).unwrap(), + f32::from_bits(k), + 2.0f32.powi(e), + ); + for i in (3..10).rev() { + if s[i] == b'f' { + s[i] = b'0'; + } else if s[i] == b'9' { + s[i] = b'a'; + break; + } else { + s[i] += 1; + break; + } + } + } + for i in (12..15).rev() { + if s[i] == b'0' { + s[i] = b'9'; + } else { + s[i] -= 1; + break; + } + } + for i in (3..10).rev() { + s[i] = b'0'; + } + } + } + + // FIXME: this test is causing failures that are likely UB on various platforms + #[cfg(all(target_arch = "x86_64", target_os = "linux"))] + #[test] + #[cfg(f128_enabled)] + fn rounding() { + let pi = std::f128::consts::PI; + let s = format!("{}", Hexf(pi)); + + for k in 0..=111 { + let (bits, status) = parse_any(&s, 128 - k, 112 - k, Round::Nearest).unwrap(); + let scale = (1u128 << (112 - k - 1)) as f128; + let expected = (pi * scale).round_ties_even() / scale; + assert_eq!(bits << k, expected.to_bits(), "k = {k}, s = {s}"); + assert_eq!(expected != pi, status.inexact()); + } + } + #[test] + fn rounding_extreme_underflow() { + for k in 1..1000 { + let s = format!("0x1p{}", -149 - k); + let Ok((bits, status)) = parse_any(&s, 32, 23, Round::Nearest) else { + unreachable!() + }; + assert_eq!(bits, 0, "{s} should round to zero, got bits={bits}"); + assert!( + status.underflow(), + "should indicate underflow when parsing {s}" + ); + assert!(status.inexact(), "should indicate inexact when parsing {s}"); + } + } + #[test] + fn long_tail() { + for k in 1..1000 { + let s = format!("0x1.{}p0", "0".repeat(k)); + let Ok(bits) = parse_hex_exact(&s, 32, 23) else { + panic!("parsing {s} failed") + }; + assert_eq!(f32::from_bits(bits as u32), 1.0); + + let s = format!("0x1.{}1p0", "0".repeat(k)); + let Ok((bits, status)) = parse_any(&s, 32, 23, Round::Nearest) else { + unreachable!() + }; + if status.inexact() { + assert!(1.0 == f32::from_bits(bits as u32)); + } else { + assert!(1.0 < f32::from_bits(bits as u32)); + } + } + } + // HACK(msrv): 1.63 rejects unknown width float literals at an AST level, so use a macro to + // hide them from the AST. + #[cfg(f16_enabled)] + macro_rules! f16_tests { + () => { + #[test] + fn test_f16() { + let checks = [ + ("0x.1234p+16", (0x1234 as f16).to_bits()), + ("0x1.234p+12", (0x1234 as f16).to_bits()), + ("0x12.34p+8", (0x1234 as f16).to_bits()), + ("0x123.4p+4", (0x1234 as f16).to_bits()), + ("0x1234p+0", (0x1234 as f16).to_bits()), + ("0x1234.p+0", (0x1234 as f16).to_bits()), + ("0x1234.0p+0", (0x1234 as f16).to_bits()), + ("0x1.ffcp+15", f16::MAX.to_bits()), + ("0x1.0p+1", 2.0f16.to_bits()), + ("0x1.0p+0", 1.0f16.to_bits()), + ("0x1.ffp+8", 0x5ffc), + ("+0x1.ffp+8", 0x5ffc), + ("0x1p+0", 0x3c00), + ("0x1.998p-4", 0x2e66), + ("0x1.9p+6", 0x5640), + ("0x0.0p0", 0.0f16.to_bits()), + ("-0x0.0p0", (-0.0f16).to_bits()), + ("0x1.0p0", 1.0f16.to_bits()), + ("0x1.998p-4", (0.1f16).to_bits()), + ("-0x1.998p-4", (-0.1f16).to_bits()), + ("0x0.123p-12", 0x0123), + ("0x1p-24", 0x0001), + ("nan", f16::NAN.to_bits()), + ("-nan", (-f16::NAN).to_bits()), + ("inf", f16::INFINITY.to_bits()), + ("-inf", f16::NEG_INFINITY.to_bits()), + ]; + for (s, exp) in checks { + println!("parsing {s}"); + assert!(rounding_properties(s).is_ok()); + let act = hf16(s).to_bits(); + assert_eq!( + act, exp, + "parsing {s}: {act:#06x} != {exp:#06x}\nact: {act:#018b}\nexp: {exp:#018b}" + ); + } + } + + #[test] + fn test_macros_f16() { + assert_eq!(hf16!("0x1.ffp+8").to_bits(), 0x5ffc_u16); + } + }; + } + + #[cfg(f16_enabled)] + f16_tests!(); + + #[test] + fn test_f32() { + let checks = [ + ("0x.1234p+16", (0x1234 as f32).to_bits()), + ("0x1.234p+12", (0x1234 as f32).to_bits()), + ("0x12.34p+8", (0x1234 as f32).to_bits()), + ("0x123.4p+4", (0x1234 as f32).to_bits()), + ("0x1234p+0", (0x1234 as f32).to_bits()), + ("0x1234.p+0", (0x1234 as f32).to_bits()), + ("0x1234.0p+0", (0x1234 as f32).to_bits()), + ("0x1.fffffep+127", f32::MAX.to_bits()), + ("0x1.0p+1", 2.0f32.to_bits()), + ("0x1.0p+0", 1.0f32.to_bits()), + ("0x1.ffep+8", 0x43fff000), + ("+0x1.ffep+8", 0x43fff000), + ("0x1p+0", 0x3f800000), + ("0x1.99999ap-4", 0x3dcccccd), + ("0x1.9p+6", 0x42c80000), + ("0x1.2d5ed2p+20", 0x4996af69), + ("-0x1.348eb8p+10", 0xc49a475c), + ("-0x1.33dcfep-33", 0xaf19ee7f), + ("0x0.0p0", 0.0f32.to_bits()), + ("-0x0.0p0", (-0.0f32).to_bits()), + ("0x1.0p0", 1.0f32.to_bits()), + ("0x1.99999ap-4", (0.1f32).to_bits()), + ("-0x1.99999ap-4", (-0.1f32).to_bits()), + ("0x1.111114p-127", 0x00444445), + ("0x1.23456p-130", 0x00091a2b), + ("0x1p-149", 0x00000001), + ("nan", f32::NAN.to_bits()), + ("-nan", (-f32::NAN).to_bits()), + ("inf", f32::INFINITY.to_bits()), + ("-inf", f32::NEG_INFINITY.to_bits()), + ]; + for (s, exp) in checks { + println!("parsing {s}"); + let act = hf32(s).to_bits(); + assert_eq!( + act, exp, + "parsing {s}: {act:#010x} != {exp:#010x}\nact: {act:#034b}\nexp: {exp:#034b}" + ); + } + } + + #[test] + fn test_f64() { + let checks = [ + ("0x.1234p+16", (0x1234 as f64).to_bits()), + ("0x1.234p+12", (0x1234 as f64).to_bits()), + ("0x12.34p+8", (0x1234 as f64).to_bits()), + ("0x123.4p+4", (0x1234 as f64).to_bits()), + ("0x1234p+0", (0x1234 as f64).to_bits()), + ("0x1234.p+0", (0x1234 as f64).to_bits()), + ("0x1234.0p+0", (0x1234 as f64).to_bits()), + ("0x1.ffep+8", 0x407ffe0000000000), + ("0x1p+0", 0x3ff0000000000000), + ("0x1.999999999999ap-4", 0x3fb999999999999a), + ("0x1.9p+6", 0x4059000000000000), + ("0x1.2d5ed1fe1da7bp+20", 0x4132d5ed1fe1da7b), + ("-0x1.348eb851eb852p+10", 0xc09348eb851eb852), + ("-0x1.33dcfe54a3803p-33", 0xbde33dcfe54a3803), + ("0x1.0p0", 1.0f64.to_bits()), + ("0x0.0p0", 0.0f64.to_bits()), + ("-0x0.0p0", (-0.0f64).to_bits()), + ("0x1.999999999999ap-4", 0.1f64.to_bits()), + ("0x1.999999999998ap-4", (0.1f64 - f64::EPSILON).to_bits()), + ("-0x1.999999999999ap-4", (-0.1f64).to_bits()), + ("-0x1.999999999998ap-4", (-0.1f64 + f64::EPSILON).to_bits()), + ("0x0.8000000000001p-1022", 0x0008000000000001), + ("0x0.123456789abcdp-1022", 0x000123456789abcd), + ("0x0.0000000000002p-1022", 0x0000000000000002), + ("nan", f64::NAN.to_bits()), + ("-nan", (-f64::NAN).to_bits()), + ("inf", f64::INFINITY.to_bits()), + ("-inf", f64::NEG_INFINITY.to_bits()), + ]; + for (s, exp) in checks { + println!("parsing {s}"); + let act = hf64(s).to_bits(); + assert_eq!( + act, exp, + "parsing {s}: {act:#018x} != {exp:#018x}\nact: {act:#066b}\nexp: {exp:#066b}" + ); + } + } + + // HACK(msrv): 1.63 rejects unknown width float literals at an AST level, so use a macro to + // hide them from the AST. + #[cfg(f128_enabled)] + macro_rules! f128_tests { + () => { + #[test] + fn test_f128() { + let checks = [ + ("0x.1234p+16", (0x1234 as f128).to_bits()), + ("0x1.234p+12", (0x1234 as f128).to_bits()), + ("0x12.34p+8", (0x1234 as f128).to_bits()), + ("0x123.4p+4", (0x1234 as f128).to_bits()), + ("0x1234p+0", (0x1234 as f128).to_bits()), + ("0x1234.p+0", (0x1234 as f128).to_bits()), + ("0x1234.0p+0", (0x1234 as f128).to_bits()), + ("0x1.ffffffffffffffffffffffffffffp+16383", f128::MAX.to_bits()), + ("0x1.0p+1", 2.0f128.to_bits()), + ("0x1.0p+0", 1.0f128.to_bits()), + ("0x1.ffep+8", 0x4007ffe0000000000000000000000000), + ("+0x1.ffep+8", 0x4007ffe0000000000000000000000000), + ("0x1p+0", 0x3fff0000000000000000000000000000), + ("0x1.999999999999999999999999999ap-4", 0x3ffb999999999999999999999999999a), + ("0x1.9p+6", 0x40059000000000000000000000000000), + ("0x0.0p0", 0.0f128.to_bits()), + ("-0x0.0p0", (-0.0f128).to_bits()), + ("0x1.0p0", 1.0f128.to_bits()), + ("0x1.999999999999999999999999999ap-4", (0.1f128).to_bits()), + ("-0x1.999999999999999999999999999ap-4", (-0.1f128).to_bits()), + ("0x0.abcdef0123456789abcdef012345p-16382", 0x0000abcdef0123456789abcdef012345), + ("0x1p-16494", 0x00000000000000000000000000000001), + ("nan", f128::NAN.to_bits()), + ("-nan", (-f128::NAN).to_bits()), + ("inf", f128::INFINITY.to_bits()), + ("-inf", f128::NEG_INFINITY.to_bits()), + ]; + for (s, exp) in checks { + println!("parsing {s}"); + let act = hf128(s).to_bits(); + assert_eq!( + act, exp, + "parsing {s}: {act:#034x} != {exp:#034x}\nact: {act:#0130b}\nexp: {exp:#0130b}" + ); + } + } + + #[test] + fn test_macros_f128() { + assert_eq!(hf128!("0x1.ffep+8").to_bits(), 0x4007ffe0000000000000000000000000_u128); + } + } + } + + #[cfg(f128_enabled)] + f128_tests!(); + + #[test] + fn test_macros() { + #[cfg(f16_enabled)] + assert_eq!(hf16!("0x1.ffp+8").to_bits(), 0x5ffc_u16); + assert_eq!(hf32!("0x1.ffep+8").to_bits(), 0x43fff000_u32); + assert_eq!(hf64!("0x1.ffep+8").to_bits(), 0x407ffe0000000000_u64); + #[cfg(f128_enabled)] + assert_eq!( + hf128!("0x1.ffep+8").to_bits(), + 0x4007ffe0000000000000000000000000_u128 + ); + } +} + +#[cfg(test)] +// FIXME(ppc): something with `should_panic` tests cause a SIGILL with ppc64le +#[cfg(not(all(target_arch = "powerpc64", target_endian = "little")))] +mod tests_panicking { + extern crate std; + use super::*; + + // HACK(msrv): 1.63 rejects unknown width float literals at an AST level, so use a macro to + // hide them from the AST. + #[cfg(f16_enabled)] + macro_rules! f16_tests { + () => { + #[test] + fn test_f16_almost_extra_precision() { + // Exact maximum precision allowed + hf16("0x1.ffcp+0"); + } + + #[test] + #[should_panic(expected = "the value is too precise")] + fn test_f16_extra_precision() { + // One bit more than the above. + hf16("0x1.ffdp+0"); + } + + #[test] + #[should_panic(expected = "the value is too huge")] + fn test_f16_overflow() { + // One bit more than the above. + hf16("0x1p+16"); + } + + #[test] + fn test_f16_tiniest() { + let x = hf16("0x1.p-24"); + let y = hf16("0x0.001p-12"); + let z = hf16("0x0.8p-23"); + assert_eq!(x, y); + assert_eq!(x, z); + } + + #[test] + #[should_panic(expected = "the value is too tiny")] + fn test_f16_too_tiny() { + hf16("0x1.p-25"); + } + + #[test] + #[should_panic(expected = "the value is too tiny")] + fn test_f16_also_too_tiny() { + hf16("0x0.8p-24"); + } + + #[test] + #[should_panic(expected = "the value is too tiny")] + fn test_f16_again_too_tiny() { + hf16("0x0.001p-13"); + } + }; + } + + #[cfg(f16_enabled)] + f16_tests!(); + + #[test] + fn test_f32_almost_extra_precision() { + // Exact maximum precision allowed + hf32("0x1.abcdeep+0"); + } + + #[test] + #[should_panic] + fn test_f32_extra_precision2() { + // One bit more than the above. + hf32("0x1.ffffffp+127"); + } + + #[test] + #[should_panic(expected = "the value is too huge")] + fn test_f32_overflow() { + // One bit more than the above. + hf32("0x1p+128"); + } + + #[test] + #[should_panic(expected = "the value is too precise")] + fn test_f32_extra_precision() { + // One bit more than the above. + hf32("0x1.abcdefp+0"); + } + + #[test] + fn test_f32_tiniest() { + let x = hf32("0x1.p-149"); + let y = hf32("0x0.0000000000000001p-85"); + let z = hf32("0x0.8p-148"); + assert_eq!(x, y); + assert_eq!(x, z); + } + + #[test] + #[should_panic(expected = "the value is too tiny")] + fn test_f32_too_tiny() { + hf32("0x1.p-150"); + } + + #[test] + #[should_panic(expected = "the value is too tiny")] + fn test_f32_also_too_tiny() { + hf32("0x0.8p-149"); + } + + #[test] + #[should_panic(expected = "the value is too tiny")] + fn test_f32_again_too_tiny() { + hf32("0x0.0000000000000001p-86"); + } + + #[test] + fn test_f64_almost_extra_precision() { + // Exact maximum precision allowed + hf64("0x1.abcdabcdabcdfp+0"); + } + + #[test] + #[should_panic(expected = "the value is too precise")] + fn test_f64_extra_precision() { + // One bit more than the above. + hf64("0x1.abcdabcdabcdf8p+0"); + } + + // HACK(msrv): 1.63 rejects unknown width float literals at an AST level, so use a macro to + // hide them from the AST. + #[cfg(f128_enabled)] + macro_rules! f128_tests { + () => { + #[test] + fn test_f128_almost_extra_precision() { + // Exact maximum precision allowed + hf128("0x1.ffffffffffffffffffffffffffffp+16383"); + } + + #[test] + #[should_panic(expected = "the value is too precise")] + fn test_f128_extra_precision() { + // Just below the maximum finite. + hf128("0x1.fffffffffffffffffffffffffffe8p+16383"); + } + #[test] + #[should_panic(expected = "the value is too huge")] + fn test_f128_extra_precision_overflow() { + // One bit more than the above. Should overflow. + hf128("0x1.ffffffffffffffffffffffffffff8p+16383"); + } + + #[test] + #[should_panic(expected = "the value is too huge")] + fn test_f128_overflow() { + // One bit more than the above. + hf128("0x1p+16384"); + } + + #[test] + fn test_f128_tiniest() { + let x = hf128("0x1.p-16494"); + let y = hf128("0x0.0000000000000001p-16430"); + let z = hf128("0x0.8p-16493"); + assert_eq!(x, y); + assert_eq!(x, z); + } + + #[test] + #[should_panic(expected = "the value is too tiny")] + fn test_f128_too_tiny() { + hf128("0x1.p-16495"); + } + + #[test] + #[should_panic(expected = "the value is too tiny")] + fn test_f128_again_too_tiny() { + hf128("0x0.0000000000000001p-16431"); + } + + #[test] + #[should_panic(expected = "the value is too tiny")] + fn test_f128_also_too_tiny() { + hf128("0x0.8p-16494"); + } + }; + } + + #[cfg(f128_enabled)] + f128_tests!(); +} + +#[cfg(test)] +mod print_tests { + extern crate std; + use std::string::ToString; + + use super::*; + + #[test] + #[cfg(f16_enabled)] + fn test_f16() { + use std::format; + // Exhaustively check that `f16` roundtrips. + for x in 0..=u16::MAX { + let f = f16::from_bits(x); + let s = format!("{}", Hexf(f)); + let from_s = hf16(&s); + + if f.is_nan() && from_s.is_nan() { + continue; + } + + assert_eq!( + f.to_bits(), + from_s.to_bits(), + "{f:?} formatted as {s} but parsed as {from_s:?}" + ); + } + } + + #[test] + #[cfg(f16_enabled)] + fn test_f16_to_f32() { + use std::format; + // Exhaustively check that these are equivalent for all `f16`: + // - `f16 -> f32` + // - `f16 -> str -> f32` + // - `f16 -> f32 -> str -> f32` + // - `f16 -> f32 -> str -> f16 -> f32` + for x in 0..=u16::MAX { + let f16 = f16::from_bits(x); + let s16 = format!("{}", Hexf(f16)); + let f32 = f16 as f32; + let s32 = format!("{}", Hexf(f32)); + + let a = hf32(&s16); + let b = hf32(&s32); + let c = hf16(&s32); + + if f32.is_nan() && a.is_nan() && b.is_nan() && c.is_nan() { + continue; + } + + assert_eq!( + f32.to_bits(), + a.to_bits(), + "{f16:?} : f16 formatted as {s16} which parsed as {a:?} : f16" + ); + assert_eq!( + f32.to_bits(), + b.to_bits(), + "{f32:?} : f32 formatted as {s32} which parsed as {b:?} : f32" + ); + assert_eq!( + f32.to_bits(), + (c as f32).to_bits(), + "{f32:?} : f32 formatted as {s32} which parsed as {c:?} : f16" + ); + } + } + #[test] + fn spot_checks() { + assert_eq!(Hexf(f32::MAX).to_string(), "0x1.fffffep+127"); + assert_eq!(Hexf(f64::MAX).to_string(), "0x1.fffffffffffffp+1023"); + + assert_eq!(Hexf(f32::MIN).to_string(), "-0x1.fffffep+127"); + assert_eq!(Hexf(f64::MIN).to_string(), "-0x1.fffffffffffffp+1023"); + + assert_eq!(Hexf(f32::ZERO).to_string(), "0x0p+0"); + assert_eq!(Hexf(f64::ZERO).to_string(), "0x0p+0"); + + assert_eq!(Hexf(f32::NEG_ZERO).to_string(), "-0x0p+0"); + assert_eq!(Hexf(f64::NEG_ZERO).to_string(), "-0x0p+0"); + + assert_eq!(Hexf(f32::NAN).to_string(), "NaN"); + assert_eq!(Hexf(f64::NAN).to_string(), "NaN"); + + assert_eq!(Hexf(f32::INFINITY).to_string(), "inf"); + assert_eq!(Hexf(f64::INFINITY).to_string(), "inf"); + + assert_eq!(Hexf(f32::NEG_INFINITY).to_string(), "-inf"); + assert_eq!(Hexf(f64::NEG_INFINITY).to_string(), "-inf"); + + #[cfg(f16_enabled)] + { + assert_eq!(Hexf(f16::MAX).to_string(), "0x1.ffcp+15"); + assert_eq!(Hexf(f16::MIN).to_string(), "-0x1.ffcp+15"); + assert_eq!(Hexf(f16::ZERO).to_string(), "0x0p+0"); + assert_eq!(Hexf(f16::NEG_ZERO).to_string(), "-0x0p+0"); + assert_eq!(Hexf(f16::NAN).to_string(), "NaN"); + assert_eq!(Hexf(f16::INFINITY).to_string(), "inf"); + assert_eq!(Hexf(f16::NEG_INFINITY).to_string(), "-inf"); + } + + #[cfg(f128_enabled)] + { + assert_eq!( + Hexf(f128::MAX).to_string(), + "0x1.ffffffffffffffffffffffffffffp+16383" + ); + assert_eq!( + Hexf(f128::MIN).to_string(), + "-0x1.ffffffffffffffffffffffffffffp+16383" + ); + assert_eq!(Hexf(f128::ZERO).to_string(), "0x0p+0"); + assert_eq!(Hexf(f128::NEG_ZERO).to_string(), "-0x0p+0"); + assert_eq!(Hexf(f128::NAN).to_string(), "NaN"); + assert_eq!(Hexf(f128::INFINITY).to_string(), "inf"); + assert_eq!(Hexf(f128::NEG_INFINITY).to_string(), "-inf"); + } + } +} diff --git a/library/compiler-builtins/libm/src/math/support/int_traits.rs b/library/compiler-builtins/libm/src/math/support/int_traits.rs new file mode 100644 index 0000000000000..3ec1faba170cb --- /dev/null +++ b/library/compiler-builtins/libm/src/math/support/int_traits.rs @@ -0,0 +1,455 @@ +use core::{cmp, fmt, ops}; + +/// Minimal integer implementations needed on all integer types, including wide integers. +pub trait MinInt: + Copy + + fmt::Debug + + ops::BitOr + + ops::Not + + ops::Shl +{ + /// Type with the same width but other signedness + type OtherSign: MinInt; + /// Unsigned version of Self + type Unsigned: MinInt; + + /// If `Self` is a signed integer + const SIGNED: bool; + + /// The bitwidth of the int type + const BITS: u32; + + const ZERO: Self; + const ONE: Self; + const MIN: Self; + const MAX: Self; +} + +/// Access the associated `OtherSign` type from an int (helper to avoid ambiguous associated +/// types). +pub type OtherSign = ::OtherSign; + +/// Trait for some basic operations on integers +#[allow(dead_code)] +pub trait Int: + MinInt + + fmt::Display + + fmt::Binary + + fmt::LowerHex + + PartialEq + + PartialOrd + + ops::AddAssign + + ops::SubAssign + + ops::MulAssign + + ops::DivAssign + + ops::RemAssign + + ops::BitAndAssign + + ops::BitOrAssign + + ops::BitXorAssign + + ops::ShlAssign + + ops::ShlAssign + + ops::ShrAssign + + ops::ShrAssign + + ops::Add + + ops::Sub + + ops::Mul + + ops::Div + + ops::Rem + + ops::Shl + + ops::Shl + + ops::Shr + + ops::Shr + + ops::BitXor + + ops::BitAnd + + cmp::Ord + + From + + CastFrom + + CastFrom + + CastFrom + + CastFrom + + CastFrom + + CastInto + + CastInto + + CastInto + + CastInto + + CastInto +{ + fn signed(self) -> OtherSign; + fn unsigned(self) -> Self::Unsigned; + fn from_unsigned(unsigned: Self::Unsigned) -> Self; + fn abs(self) -> Self; + + fn from_bool(b: bool) -> Self; + + /// Prevents the need for excessive conversions between signed and unsigned + fn logical_shr(self, other: u32) -> Self; + + /// Absolute difference between two integers. + fn abs_diff(self, other: Self) -> Self::Unsigned; + + // copied from primitive integers, but put in a trait + fn is_zero(self) -> bool; + fn checked_add(self, other: Self) -> Option; + fn checked_sub(self, other: Self) -> Option; + fn wrapping_neg(self) -> Self; + fn wrapping_add(self, other: Self) -> Self; + fn wrapping_mul(self, other: Self) -> Self; + fn wrapping_sub(self, other: Self) -> Self; + fn wrapping_shl(self, other: u32) -> Self; + fn wrapping_shr(self, other: u32) -> Self; + fn rotate_left(self, other: u32) -> Self; + fn overflowing_add(self, other: Self) -> (Self, bool); + fn overflowing_sub(self, other: Self) -> (Self, bool); + fn leading_zeros(self) -> u32; + fn ilog2(self) -> u32; +} + +macro_rules! int_impl_common { + ($ty:ty) => { + fn from_bool(b: bool) -> Self { + b as $ty + } + + fn logical_shr(self, other: u32) -> Self { + Self::from_unsigned(self.unsigned().wrapping_shr(other)) + } + + fn is_zero(self) -> bool { + self == Self::ZERO + } + + fn checked_add(self, other: Self) -> Option { + self.checked_add(other) + } + + fn checked_sub(self, other: Self) -> Option { + self.checked_sub(other) + } + + fn wrapping_neg(self) -> Self { + ::wrapping_neg(self) + } + + fn wrapping_add(self, other: Self) -> Self { + ::wrapping_add(self, other) + } + + fn wrapping_mul(self, other: Self) -> Self { + ::wrapping_mul(self, other) + } + + fn wrapping_sub(self, other: Self) -> Self { + ::wrapping_sub(self, other) + } + + fn wrapping_shl(self, other: u32) -> Self { + ::wrapping_shl(self, other) + } + + fn wrapping_shr(self, other: u32) -> Self { + ::wrapping_shr(self, other) + } + + fn rotate_left(self, other: u32) -> Self { + ::rotate_left(self, other) + } + + fn overflowing_add(self, other: Self) -> (Self, bool) { + ::overflowing_add(self, other) + } + + fn overflowing_sub(self, other: Self) -> (Self, bool) { + ::overflowing_sub(self, other) + } + + fn leading_zeros(self) -> u32 { + ::leading_zeros(self) + } + + fn ilog2(self) -> u32 { + // On our older MSRV, this resolves to the trait method. Which won't actually work, + // but this is only called behind other gates. + #[allow(clippy::incompatible_msrv)] + ::ilog2(self) + } + }; +} + +macro_rules! int_impl { + ($ity:ty, $uty:ty) => { + impl MinInt for $uty { + type OtherSign = $ity; + type Unsigned = $uty; + + const BITS: u32 = ::ZERO.count_zeros(); + const SIGNED: bool = Self::MIN != Self::ZERO; + + const ZERO: Self = 0; + const ONE: Self = 1; + const MIN: Self = ::MIN; + const MAX: Self = ::MAX; + } + + impl Int for $uty { + fn signed(self) -> $ity { + self as $ity + } + + fn unsigned(self) -> Self { + self + } + + fn abs(self) -> Self { + unimplemented!() + } + + // It makes writing macros easier if this is implemented for both signed and unsigned + #[allow(clippy::wrong_self_convention)] + fn from_unsigned(me: $uty) -> Self { + me + } + + fn abs_diff(self, other: Self) -> Self { + self.abs_diff(other) + } + + int_impl_common!($uty); + } + + impl MinInt for $ity { + type OtherSign = $uty; + type Unsigned = $uty; + + const BITS: u32 = ::ZERO.count_zeros(); + const SIGNED: bool = Self::MIN != Self::ZERO; + + const ZERO: Self = 0; + const ONE: Self = 1; + const MIN: Self = ::MIN; + const MAX: Self = ::MAX; + } + + impl Int for $ity { + fn signed(self) -> Self { + self + } + + fn unsigned(self) -> $uty { + self as $uty + } + + fn abs(self) -> Self { + self.abs() + } + + fn from_unsigned(me: $uty) -> Self { + me as $ity + } + + fn abs_diff(self, other: Self) -> $uty { + self.abs_diff(other) + } + + int_impl_common!($ity); + } + }; +} + +int_impl!(isize, usize); +int_impl!(i8, u8); +int_impl!(i16, u16); +int_impl!(i32, u32); +int_impl!(i64, u64); +int_impl!(i128, u128); + +/// Trait for integers twice the bit width of another integer. This is implemented for all +/// primitives except for `u8`, because there is not a smaller primitive. +pub trait DInt: MinInt { + /// Integer that is half the bit width of the integer this trait is implemented for + type H: HInt; + + /// Returns the low half of `self` + fn lo(self) -> Self::H; + /// Returns the high half of `self` + fn hi(self) -> Self::H; + /// Returns the low and high halves of `self` as a tuple + fn lo_hi(self) -> (Self::H, Self::H) { + (self.lo(), self.hi()) + } + /// Constructs an integer using lower and higher half parts + #[allow(unused)] + fn from_lo_hi(lo: Self::H, hi: Self::H) -> Self { + lo.zero_widen() | hi.widen_hi() + } +} + +/// Trait for integers half the bit width of another integer. This is implemented for all +/// primitives except for `u128`, because it there is not a larger primitive. +pub trait HInt: Int { + /// Integer that is double the bit width of the integer this trait is implemented for + type D: DInt + MinInt; + + // NB: some of the below methods could have default implementations (e.g. `widen_hi`), but for + // unknown reasons this can cause infinite recursion when optimizations are disabled. See + // for context. + + /// Widens (using default extension) the integer to have double bit width + fn widen(self) -> Self::D; + /// Widens (zero extension only) the integer to have double bit width. This is needed to get + /// around problems with associated type bounds (such as `Int`) being unstable + fn zero_widen(self) -> Self::D; + /// Widens the integer to have double bit width and shifts the integer into the higher bits + #[allow(unused)] + fn widen_hi(self) -> Self::D; + /// Widening multiplication with zero widening. This cannot overflow. + fn zero_widen_mul(self, rhs: Self) -> Self::D; + /// Widening multiplication. This cannot overflow. + fn widen_mul(self, rhs: Self) -> Self::D; +} + +macro_rules! impl_d_int { + ($($X:ident $D:ident),*) => { + $( + impl DInt for $D { + type H = $X; + + fn lo(self) -> Self::H { + self as $X + } + fn hi(self) -> Self::H { + (self >> <$X as MinInt>::BITS) as $X + } + } + )* + }; +} + +macro_rules! impl_h_int { + ($($H:ident $uH:ident $X:ident),*) => { + $( + impl HInt for $H { + type D = $X; + + fn widen(self) -> Self::D { + self as $X + } + fn zero_widen(self) -> Self::D { + (self as $uH) as $X + } + fn zero_widen_mul(self, rhs: Self) -> Self::D { + self.zero_widen().wrapping_mul(rhs.zero_widen()) + } + fn widen_mul(self, rhs: Self) -> Self::D { + self.widen().wrapping_mul(rhs.widen()) + } + fn widen_hi(self) -> Self::D { + (self as $X) << ::BITS + } + } + )* + }; +} + +impl_d_int!(u8 u16, u16 u32, u32 u64, u64 u128, i8 i16, i16 i32, i32 i64, i64 i128); +impl_h_int!( + u8 u8 u16, + u16 u16 u32, + u32 u32 u64, + u64 u64 u128, + i8 u8 i16, + i16 u16 i32, + i32 u32 i64, + i64 u64 i128 +); + +/// Trait to express (possibly lossy) casting of integers +pub trait CastInto: Copy { + /// By default, casts should be exact. + fn cast(self) -> T; + + /// Call for casts that are expected to truncate. + fn cast_lossy(self) -> T; +} + +pub trait CastFrom: Copy { + /// By default, casts should be exact. + fn cast_from(value: T) -> Self; + + /// Call for casts that are expected to truncate. + fn cast_from_lossy(value: T) -> Self; +} + +impl + Copy> CastFrom for T { + fn cast_from(value: U) -> Self { + value.cast() + } + + fn cast_from_lossy(value: U) -> Self { + value.cast_lossy() + } +} + +macro_rules! cast_into { + ($ty:ty) => { + cast_into!($ty; usize, isize, u8, i8, u16, i16, u32, i32, u64, i64, u128, i128); + }; + ($ty:ty; $($into:ty),*) => {$( + impl CastInto<$into> for $ty { + fn cast(self) -> $into { + // All we can really do to enforce casting rules is check the rules when in + // debug mode. + #[cfg(not(feature = "compiler-builtins"))] + debug_assert!(<$into>::try_from(self).is_ok(), "failed cast from {self}"); + self as $into + } + + fn cast_lossy(self) -> $into { + self as $into + } + } + )*}; +} + +macro_rules! cast_into_float { + ($ty:ty) => { + #[cfg(f16_enabled)] + cast_into_float!($ty; f16); + + cast_into_float!($ty; f32, f64); + + #[cfg(f128_enabled)] + cast_into_float!($ty; f128); + }; + ($ty:ty; $($into:ty),*) => {$( + impl CastInto<$into> for $ty { + fn cast(self) -> $into { + #[cfg(not(feature = "compiler-builtins"))] + debug_assert_eq!(self as $into as $ty, self, "inexact float cast"); + self as $into + } + + fn cast_lossy(self) -> $into { + self as $into + } + } + )*}; +} + +cast_into!(usize); +cast_into!(isize); +cast_into!(u8); +cast_into!(i8); +cast_into!(u16); +cast_into!(i16); +cast_into!(u32); +cast_into!(i32); +cast_into!(u64); +cast_into!(i64); +cast_into!(u128); +cast_into!(i128); + +cast_into_float!(i8); +cast_into_float!(i16); +cast_into_float!(i32); +cast_into_float!(i64); +cast_into_float!(i128); diff --git a/library/compiler-builtins/libm/src/math/support/macros.rs b/library/compiler-builtins/libm/src/math/support/macros.rs new file mode 100644 index 0000000000000..0b72db0e46e8b --- /dev/null +++ b/library/compiler-builtins/libm/src/math/support/macros.rs @@ -0,0 +1,157 @@ +/// `libm` cannot have dependencies, so this is vendored directly from the `cfg-if` crate +/// (with some comments stripped for compactness). +macro_rules! cfg_if { + // match if/else chains with a final `else` + ($( + if #[cfg($meta:meta)] { $($tokens:tt)* } + ) else * else { + $($tokens2:tt)* + }) => { + cfg_if! { @__items () ; $( ( ($meta) ($($tokens)*) ), )* ( () ($($tokens2)*) ), } + }; + + // match if/else chains lacking a final `else` + ( + if #[cfg($i_met:meta)] { $($i_tokens:tt)* } + $( else if #[cfg($e_met:meta)] { $($e_tokens:tt)* } )* + ) => { + cfg_if! { + @__items + () ; + ( ($i_met) ($($i_tokens)*) ), + $( ( ($e_met) ($($e_tokens)*) ), )* + ( () () ), + } + }; + + // Internal and recursive macro to emit all the items + // + // Collects all the negated cfgs in a list at the beginning and after the + // semicolon is all the remaining items + (@__items ($($not:meta,)*) ; ) => {}; + (@__items ($($not:meta,)*) ; ( ($($m:meta),*) ($($tokens:tt)*) ), $($rest:tt)*) => { + #[cfg(all($($m,)* not(any($($not),*))))] cfg_if! { @__identity $($tokens)* } + cfg_if! { @__items ($($not,)* $($m,)*) ; $($rest)* } + }; + + // Internal macro to make __apply work out right for different match types, + // because of how macros matching/expand stuff. + (@__identity $($tokens:tt)*) => { $($tokens)* }; +} + +/// Choose between using an arch-specific implementation and the function body. Returns directly +/// if the arch implementation is used, otherwise continue with the rest of the function. +/// +/// Specify a `use_arch` meta field if an architecture-specific implementation is provided. +/// These live in the `math::arch::some_target_arch` module. +/// +/// Specify a `use_arch_required` meta field if something architecture-specific must be used +/// regardless of feature configuration (`force-soft-floats`). +/// +/// The passed meta options do not need to account for the `arch` target feature. +macro_rules! select_implementation { + ( + name: $fn_name:ident, + // Configuration meta for when to use arch-specific implementation that requires hard + // float ops + $( use_arch: $use_arch:meta, )? + // Configuration meta for when to use the arch module regardless of whether softfloats + // have been requested. + $( use_arch_required: $use_arch_required:meta, )? + args: $($arg:ident),+ , + ) => { + // FIXME: these use paths that are a pretty fragile (`super`). We should figure out + // something better w.r.t. how this is vendored into compiler-builtins. + + // However, we do need a few things from `arch` that are used even with soft floats. + select_implementation! { + @cfg $($use_arch_required)?; + if true { + return super::arch::$fn_name( $($arg),+ ); + } + } + + // By default, never use arch-specific implementations if we have force-soft-floats + #[cfg(arch_enabled)] + select_implementation! { + @cfg $($use_arch)?; + // Wrap in `if true` to avoid unused warnings + if true { + return super::arch::$fn_name( $($arg),+ ); + } + } + }; + + // Coalesce helper to construct an expression only if a config is provided + (@cfg ; $ex:expr) => { }; + (@cfg $provided:meta; $ex:expr) => { #[cfg($provided)] $ex }; +} + +/// Construct a 16-bit float from hex float representation (C-style), guaranteed to +/// evaluate at compile time. +#[cfg(f16_enabled)] +#[cfg_attr(feature = "unstable-public-internals", macro_export)] +#[allow(unused_macros)] +macro_rules! hf16 { + ($s:literal) => {{ + const X: f16 = $crate::support::hf16($s); + X + }}; +} + +/// Construct a 32-bit float from hex float representation (C-style), guaranteed to +/// evaluate at compile time. +#[allow(unused_macros)] +#[cfg_attr(feature = "unstable-public-internals", macro_export)] +macro_rules! hf32 { + ($s:literal) => {{ + const X: f32 = $crate::support::hf32($s); + X + }}; +} + +/// Construct a 64-bit float from hex float representation (C-style), guaranteed to +/// evaluate at compile time. +#[allow(unused_macros)] +#[cfg_attr(feature = "unstable-public-internals", macro_export)] +macro_rules! hf64 { + ($s:literal) => {{ + const X: f64 = $crate::support::hf64($s); + X + }}; +} + +/// Construct a 128-bit float from hex float representation (C-style), guaranteed to +/// evaluate at compile time. +#[cfg(f128_enabled)] +#[allow(unused_macros)] +#[cfg_attr(feature = "unstable-public-internals", macro_export)] +macro_rules! hf128 { + ($s:literal) => {{ + const X: f128 = $crate::support::hf128($s); + X + }}; +} + +/// Assert `F::biteq` with better messages. +#[cfg(test)] +macro_rules! assert_biteq { + ($left:expr, $right:expr, $($tt:tt)*) => {{ + use $crate::support::Int; + let l = $left; + let r = $right; + let bits = Int::leading_zeros(l.to_bits() - l.to_bits()); // hack to get the width from the value + assert!( + l.biteq(r), + "{}\nl: {l:?} ({lb:#0width$x})\nr: {r:?} ({rb:#0width$x})", + format_args!($($tt)*), + lb = l.to_bits(), + rb = r.to_bits(), + width = ((bits / 4) + 2) as usize, + + ); + }}; + ($left:expr, $right:expr $(,)?) => { + assert_biteq!($left, $right, "") + }; +} diff --git a/library/compiler-builtins/libm/src/math/support/mod.rs b/library/compiler-builtins/libm/src/math/support/mod.rs new file mode 100644 index 0000000000000..a4f596ab8442f --- /dev/null +++ b/library/compiler-builtins/libm/src/math/support/mod.rs @@ -0,0 +1,32 @@ +#[macro_use] +pub mod macros; +mod big; +mod env; +// Runtime feature detection requires atomics. +#[cfg(target_has_atomic = "ptr")] +pub(crate) mod feature_detect; +mod float_traits; +pub mod hex_float; +mod int_traits; + +#[allow(unused_imports)] +pub use big::{i256, u256}; +pub use env::{FpResult, Round, Status}; +#[allow(unused_imports)] +pub use float_traits::{DFloat, Float, HFloat, IntTy}; +pub(crate) use float_traits::{f32_from_bits, f64_from_bits}; +#[cfg(f16_enabled)] +#[allow(unused_imports)] +pub use hex_float::hf16; +#[cfg(f128_enabled)] +#[allow(unused_imports)] +pub use hex_float::hf128; +#[allow(unused_imports)] +pub use hex_float::{Hexf, hf32, hf64}; +pub use int_traits::{CastFrom, CastInto, DInt, HInt, Int, MinInt}; + +/// Hint to the compiler that the current path is cold. +pub fn cold_path() { + #[cfg(intrinsics_enabled)] + core::intrinsics::cold_path(); +} diff --git a/library/compiler-builtins/libm/src/math/tan.rs b/library/compiler-builtins/libm/src/math/tan.rs new file mode 100644 index 0000000000000..a072bdec56e00 --- /dev/null +++ b/library/compiler-builtins/libm/src/math/tan.rs @@ -0,0 +1,74 @@ +// origin: FreeBSD /usr/src/lib/msun/src/s_tan.c */ +// +// ==================================================== +// Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. +// +// Developed at SunPro, a Sun Microsystems, Inc. business. +// Permission to use, copy, modify, and distribute this +// software is freely granted, provided that this notice +// is preserved. +// ==================================================== + +use super::{k_tan, rem_pio2}; + +// tan(x) +// Return tangent function of x. +// +// kernel function: +// k_tan ... tangent function on [-pi/4,pi/4] +// rem_pio2 ... argument reduction routine +// +// Method. +// Let S,C and T denote the sin, cos and tan respectively on +// [-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2 +// in [-pi/4 , +pi/4], and let n = k mod 4. +// We have +// +// n sin(x) cos(x) tan(x) +// ---------------------------------------------------------- +// 0 S C T +// 1 C -S -1/T +// 2 -S -C T +// 3 -C S -1/T +// ---------------------------------------------------------- +// +// Special cases: +// Let trig be any of sin, cos, or tan. +// trig(+-INF) is NaN, with signals; +// trig(NaN) is that NaN; +// +// Accuracy: +// TRIG(x) returns trig(x) nearly rounded + +/// The tangent of `x` (f64). +/// +/// `x` is specified in radians. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn tan(x: f64) -> f64 { + let x1p120 = f32::from_bits(0x7b800000); // 0x1p120f === 2 ^ 120 + + let ix = (f64::to_bits(x) >> 32) as u32 & 0x7fffffff; + /* |x| ~< pi/4 */ + if ix <= 0x3fe921fb { + if ix < 0x3e400000 { + /* |x| < 2**-27 */ + /* raise inexact if x!=0 and underflow if subnormal */ + force_eval!(if ix < 0x00100000 { + x / x1p120 as f64 + } else { + x + x1p120 as f64 + }); + return x; + } + return k_tan(x, 0.0, 0); + } + + /* tan(Inf or NaN) is NaN */ + if ix >= 0x7ff00000 { + return x - x; + } + + /* argument reduction */ + let (n, y0, y1) = rem_pio2(x); + k_tan(y0, y1, n & 1) +} diff --git a/library/compiler-builtins/libm/src/math/tanf.rs b/library/compiler-builtins/libm/src/math/tanf.rs new file mode 100644 index 0000000000000..8bcf9581ff600 --- /dev/null +++ b/library/compiler-builtins/libm/src/math/tanf.rs @@ -0,0 +1,81 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/s_tanf.c */ +/* + * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. + * Optimized by Bruce D. Evans. + */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ + +use core::f64::consts::FRAC_PI_2; + +use super::{k_tanf, rem_pio2f}; + +/* Small multiples of pi/2 rounded to double precision. */ +const T1_PIO2: f64 = 1. * FRAC_PI_2; /* 0x3FF921FB, 0x54442D18 */ +const T2_PIO2: f64 = 2. * FRAC_PI_2; /* 0x400921FB, 0x54442D18 */ +const T3_PIO2: f64 = 3. * FRAC_PI_2; /* 0x4012D97C, 0x7F3321D2 */ +const T4_PIO2: f64 = 4. * FRAC_PI_2; /* 0x401921FB, 0x54442D18 */ + +/// The tangent of `x` (f32). +/// +/// `x` is specified in radians. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn tanf(x: f32) -> f32 { + let x64 = x as f64; + + let x1p120 = f32::from_bits(0x7b800000); // 0x1p120f === 2 ^ 120 + + let mut ix = x.to_bits(); + let sign = (ix >> 31) != 0; + ix &= 0x7fffffff; + + if ix <= 0x3f490fda { + /* |x| ~<= pi/4 */ + if ix < 0x39800000 { + /* |x| < 2**-12 */ + /* raise inexact if x!=0 and underflow if subnormal */ + force_eval!(if ix < 0x00800000 { + x / x1p120 + } else { + x + x1p120 + }); + return x; + } + return k_tanf(x64, false); + } + if ix <= 0x407b53d1 { + /* |x| ~<= 5*pi/4 */ + if ix <= 0x4016cbe3 { + /* |x| ~<= 3pi/4 */ + return k_tanf(if sign { x64 + T1_PIO2 } else { x64 - T1_PIO2 }, true); + } else { + return k_tanf(if sign { x64 + T2_PIO2 } else { x64 - T2_PIO2 }, false); + } + } + if ix <= 0x40e231d5 { + /* |x| ~<= 9*pi/4 */ + if ix <= 0x40afeddf { + /* |x| ~<= 7*pi/4 */ + return k_tanf(if sign { x64 + T3_PIO2 } else { x64 - T3_PIO2 }, true); + } else { + return k_tanf(if sign { x64 + T4_PIO2 } else { x64 - T4_PIO2 }, false); + } + } + + /* tan(Inf or NaN) is NaN */ + if ix >= 0x7f800000 { + return x - x; + } + + /* argument reduction */ + let (n, y) = rem_pio2f(x); + k_tanf(y, n & 1 != 0) +} diff --git a/library/compiler-builtins/libm/src/math/tanh.rs b/library/compiler-builtins/libm/src/math/tanh.rs new file mode 100644 index 0000000000000..cc0abe4fcb2d4 --- /dev/null +++ b/library/compiler-builtins/libm/src/math/tanh.rs @@ -0,0 +1,53 @@ +use super::expm1; + +/* tanh(x) = (exp(x) - exp(-x))/(exp(x) + exp(-x)) + * = (exp(2*x) - 1)/(exp(2*x) - 1 + 2) + * = (1 - exp(-2*x))/(exp(-2*x) - 1 + 2) + */ + +/// The hyperbolic tangent of `x` (f64). +/// +/// `x` is specified in radians. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn tanh(mut x: f64) -> f64 { + let mut uf: f64 = x; + let mut ui: u64 = f64::to_bits(uf); + + let w: u32; + let sign: bool; + let mut t: f64; + + /* x = |x| */ + sign = ui >> 63 != 0; + ui &= !1 / 2; + uf = f64::from_bits(ui); + x = uf; + w = (ui >> 32) as u32; + + if w > 0x3fe193ea { + /* |x| > log(3)/2 ~= 0.5493 or nan */ + if w > 0x40340000 { + /* |x| > 20 or nan */ + /* note: this branch avoids raising overflow */ + t = 1.0 - 0.0 / x; + } else { + t = expm1(2.0 * x); + t = 1.0 - 2.0 / (t + 2.0); + } + } else if w > 0x3fd058ae { + /* |x| > log(5/3)/2 ~= 0.2554 */ + t = expm1(2.0 * x); + t = t / (t + 2.0); + } else if w >= 0x00100000 { + /* |x| >= 0x1p-1022, up to 2ulp error in [0.1,0.2554] */ + t = expm1(-2.0 * x); + t = -t / (t + 2.0); + } else { + /* |x| is subnormal */ + /* note: the branch above would not raise underflow in [0x1p-1023,0x1p-1022) */ + force_eval!(x as f32); + t = x; + } + + if sign { -t } else { t } +} diff --git a/library/compiler-builtins/libm/src/math/tanhf.rs b/library/compiler-builtins/libm/src/math/tanhf.rs new file mode 100644 index 0000000000000..fffbba6c6ec41 --- /dev/null +++ b/library/compiler-builtins/libm/src/math/tanhf.rs @@ -0,0 +1,38 @@ +use super::expm1f; + +/// The hyperbolic tangent of `x` (f32). +/// +/// `x` is specified in radians. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn tanhf(mut x: f32) -> f32 { + /* x = |x| */ + let mut ix = x.to_bits(); + let sign = (ix >> 31) != 0; + ix &= 0x7fffffff; + x = f32::from_bits(ix); + let w = ix; + + let tt = if w > 0x3f0c9f54 { + /* |x| > log(3)/2 ~= 0.5493 or nan */ + if w > 0x41200000 { + /* |x| > 10 */ + 1. + 0. / x + } else { + let t = expm1f(2. * x); + 1. - 2. / (t + 2.) + } + } else if w > 0x3e82c578 { + /* |x| > log(5/3)/2 ~= 0.2554 */ + let t = expm1f(2. * x); + t / (t + 2.) + } else if w >= 0x00800000 { + /* |x| >= 0x1p-126 */ + let t = expm1f(-2. * x); + -t / (t + 2.) + } else { + /* |x| is subnormal */ + force_eval!(x * x); + x + }; + if sign { -tt } else { tt } +} diff --git a/library/compiler-builtins/libm/src/math/tgamma.rs b/library/compiler-builtins/libm/src/math/tgamma.rs new file mode 100644 index 0000000000000..3059860646a5b --- /dev/null +++ b/library/compiler-builtins/libm/src/math/tgamma.rs @@ -0,0 +1,209 @@ +/* +"A Precision Approximation of the Gamma Function" - Cornelius Lanczos (1964) +"Lanczos Implementation of the Gamma Function" - Paul Godfrey (2001) +"An Analysis of the Lanczos Gamma Approximation" - Glendon Ralph Pugh (2004) + +approximation method: + + (x - 0.5) S(x) +Gamma(x) = (x + g - 0.5) * ---------------- + exp(x + g - 0.5) + +with + a1 a2 a3 aN +S(x) ~= [ a0 + ----- + ----- + ----- + ... + ----- ] + x + 1 x + 2 x + 3 x + N + +with a0, a1, a2, a3,.. aN constants which depend on g. + +for x < 0 the following reflection formula is used: + +Gamma(x)*Gamma(-x) = -pi/(x sin(pi x)) + +most ideas and constants are from boost and python +*/ +use super::{exp, floor, k_cos, k_sin, pow}; + +const PI: f64 = 3.141592653589793238462643383279502884; + +/* sin(pi x) with x > 0x1p-100, if sin(pi*x)==0 the sign is arbitrary */ +fn sinpi(mut x: f64) -> f64 { + let mut n: isize; + + /* argument reduction: x = |x| mod 2 */ + /* spurious inexact when x is odd int */ + x = x * 0.5; + x = 2.0 * (x - floor(x)); + + /* reduce x into [-.25,.25] */ + n = (4.0 * x) as isize; + n = div!(n + 1, 2); + x -= (n as f64) * 0.5; + + x *= PI; + match n { + 1 => k_cos(x, 0.0), + 2 => k_sin(-x, 0.0, 0), + 3 => -k_cos(x, 0.0), + // 0 + _ => k_sin(x, 0.0, 0), + } +} + +const N: usize = 12; +//static const double g = 6.024680040776729583740234375; +const GMHALF: f64 = 5.524680040776729583740234375; +const SNUM: [f64; N + 1] = [ + 23531376880.410759688572007674451636754734846804940, + 42919803642.649098768957899047001988850926355848959, + 35711959237.355668049440185451547166705960488635843, + 17921034426.037209699919755754458931112671403265390, + 6039542586.3520280050642916443072979210699388420708, + 1439720407.3117216736632230727949123939715485786772, + 248874557.86205415651146038641322942321632125127801, + 31426415.585400194380614231628318205362874684987640, + 2876370.6289353724412254090516208496135991145378768, + 186056.26539522349504029498971604569928220784236328, + 8071.6720023658162106380029022722506138218516325024, + 210.82427775157934587250973392071336271166969580291, + 2.5066282746310002701649081771338373386264310793408, +]; +const SDEN: [f64; N + 1] = [ + 0.0, + 39916800.0, + 120543840.0, + 150917976.0, + 105258076.0, + 45995730.0, + 13339535.0, + 2637558.0, + 357423.0, + 32670.0, + 1925.0, + 66.0, + 1.0, +]; +/* n! for small integer n */ +const FACT: [f64; 23] = [ + 1.0, + 1.0, + 2.0, + 6.0, + 24.0, + 120.0, + 720.0, + 5040.0, + 40320.0, + 362880.0, + 3628800.0, + 39916800.0, + 479001600.0, + 6227020800.0, + 87178291200.0, + 1307674368000.0, + 20922789888000.0, + 355687428096000.0, + 6402373705728000.0, + 121645100408832000.0, + 2432902008176640000.0, + 51090942171709440000.0, + 1124000727777607680000.0, +]; + +/* S(x) rational function for positive x */ +fn s(x: f64) -> f64 { + let mut num: f64 = 0.0; + let mut den: f64 = 0.0; + + /* to avoid overflow handle large x differently */ + if x < 8.0 { + for i in (0..=N).rev() { + num = num * x + i!(SNUM, i); + den = den * x + i!(SDEN, i); + } + } else { + for i in 0..=N { + num = num / x + i!(SNUM, i); + den = den / x + i!(SDEN, i); + } + } + return num / den; +} + +/// The [Gamma function](https://en.wikipedia.org/wiki/Gamma_function) (f64). +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn tgamma(mut x: f64) -> f64 { + let u: u64 = x.to_bits(); + let absx: f64; + let mut y: f64; + let mut dy: f64; + let mut z: f64; + let mut r: f64; + let ix: u32 = ((u >> 32) as u32) & 0x7fffffff; + let sign: bool = (u >> 63) != 0; + + /* special cases */ + if ix >= 0x7ff00000 { + /* tgamma(nan)=nan, tgamma(inf)=inf, tgamma(-inf)=nan with invalid */ + return x + f64::INFINITY; + } + if ix < ((0x3ff - 54) << 20) { + /* |x| < 2^-54: tgamma(x) ~ 1/x, +-0 raises div-by-zero */ + return 1.0 / x; + } + + /* integer arguments */ + /* raise inexact when non-integer */ + if x == floor(x) { + if sign { + return 0.0 / 0.0; + } + if x <= FACT.len() as f64 { + return i!(FACT, (x as usize) - 1); + } + } + + /* x >= 172: tgamma(x)=inf with overflow */ + /* x =< -184: tgamma(x)=+-0 with underflow */ + if ix >= 0x40670000 { + /* |x| >= 184 */ + if sign { + let x1p_126 = f64::from_bits(0x3810000000000000); // 0x1p-126 == 2^-126 + force_eval!((x1p_126 / x) as f32); + if floor(x) * 0.5 == floor(x * 0.5) { + return 0.0; + } else { + return -0.0; + } + } + let x1p1023 = f64::from_bits(0x7fe0000000000000); // 0x1p1023 == 2^1023 + x *= x1p1023; + return x; + } + + absx = if sign { -x } else { x }; + + /* handle the error of x + g - 0.5 */ + y = absx + GMHALF; + if absx > GMHALF { + dy = y - absx; + dy -= GMHALF; + } else { + dy = y - GMHALF; + dy -= absx; + } + + z = absx - 0.5; + r = s(absx) * exp(-y); + if x < 0.0 { + /* reflection formula for negative x */ + /* sinpi(absx) is not 0, integers are already handled */ + r = -PI / (sinpi(absx) * absx * r); + dy = -dy; + z = -z; + } + r += dy * (GMHALF + 0.5) * r / y; + z = pow(y, 0.5 * z); + y = r * z * z; + return y; +} diff --git a/library/compiler-builtins/libm/src/math/tgammaf.rs b/library/compiler-builtins/libm/src/math/tgammaf.rs new file mode 100644 index 0000000000000..fe178f7a3c0ea --- /dev/null +++ b/library/compiler-builtins/libm/src/math/tgammaf.rs @@ -0,0 +1,7 @@ +use super::tgamma; + +/// The [Gamma function](https://en.wikipedia.org/wiki/Gamma_function) (f32). +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn tgammaf(x: f32) -> f32 { + tgamma(x as f64) as f32 +} diff --git a/library/compiler-builtins/libm/src/math/trunc.rs b/library/compiler-builtins/libm/src/math/trunc.rs new file mode 100644 index 0000000000000..fa50d55e13687 --- /dev/null +++ b/library/compiler-builtins/libm/src/math/trunc.rs @@ -0,0 +1,53 @@ +/// Rounds the number toward 0 to the closest integral value (f16). +/// +/// This effectively removes the decimal part of the number, leaving the integral part. +#[cfg(f16_enabled)] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn truncf16(x: f16) -> f16 { + super::generic::trunc(x) +} + +/// Rounds the number toward 0 to the closest integral value (f32). +/// +/// This effectively removes the decimal part of the number, leaving the integral part. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn truncf(x: f32) -> f32 { + select_implementation! { + name: truncf, + use_arch: all(target_arch = "wasm32", intrinsics_enabled), + args: x, + } + + super::generic::trunc(x) +} + +/// Rounds the number toward 0 to the closest integral value (f64). +/// +/// This effectively removes the decimal part of the number, leaving the integral part. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn trunc(x: f64) -> f64 { + select_implementation! { + name: trunc, + use_arch: all(target_arch = "wasm32", intrinsics_enabled), + args: x, + } + + super::generic::trunc(x) +} + +/// Rounds the number toward 0 to the closest integral value (f128). +/// +/// This effectively removes the decimal part of the number, leaving the integral part. +#[cfg(f128_enabled)] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn truncf128(x: f128) -> f128 { + super::generic::trunc(x) +} + +#[cfg(test)] +mod tests { + #[test] + fn sanity_check() { + assert_eq!(super::truncf(1.1), 1.0); + } +} diff --git a/library/compiler-builtins/libm/src/math/truncf.rs b/library/compiler-builtins/libm/src/math/truncf.rs new file mode 100644 index 0000000000000..14533a2670632 --- /dev/null +++ b/library/compiler-builtins/libm/src/math/truncf.rs @@ -0,0 +1,23 @@ +/// Rounds the number toward 0 to the closest integral value (f32). +/// +/// This effectively removes the decimal part of the number, leaving the integral part. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn truncf(x: f32) -> f32 { + select_implementation! { + name: truncf, + use_arch: all(target_arch = "wasm32", intrinsics_enabled), + args: x, + } + + super::generic::trunc(x) +} + +// PowerPC tests are failing on LLVM 13: https://github.com/rust-lang/rust/issues/88520 +#[cfg(not(target_arch = "powerpc64"))] +#[cfg(test)] +mod tests { + #[test] + fn sanity_check() { + assert_eq!(super::truncf(1.1), 1.0); + } +} diff --git a/library/compiler-builtins/libm/src/math/truncf128.rs b/library/compiler-builtins/libm/src/math/truncf128.rs new file mode 100644 index 0000000000000..9dccc0d0e9d7f --- /dev/null +++ b/library/compiler-builtins/libm/src/math/truncf128.rs @@ -0,0 +1,7 @@ +/// Rounds the number toward 0 to the closest integral value (f128). +/// +/// This effectively removes the decimal part of the number, leaving the integral part. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn truncf128(x: f128) -> f128 { + super::generic::trunc(x) +} diff --git a/library/compiler-builtins/libm/src/math/truncf16.rs b/library/compiler-builtins/libm/src/math/truncf16.rs new file mode 100644 index 0000000000000..d7c3d225cf9b8 --- /dev/null +++ b/library/compiler-builtins/libm/src/math/truncf16.rs @@ -0,0 +1,7 @@ +/// Rounds the number toward 0 to the closest integral value (f16). +/// +/// This effectively removes the decimal part of the number, leaving the integral part. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn truncf16(x: f16) -> f16 { + super::generic::trunc(x) +} diff --git a/library/compiler-builtins/thumbv6m-linux-eabi.json b/library/compiler-builtins/thumbv6m-linux-eabi.json new file mode 100644 index 0000000000000..ac736eae686b4 --- /dev/null +++ b/library/compiler-builtins/thumbv6m-linux-eabi.json @@ -0,0 +1,28 @@ +{ + "abi-blacklist": [ + "stdcall", + "fastcall", + "vectorcall", + "win64", + "sysv64" + ], + "arch": "arm", + "data-layout": "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64", + "env": "", + "executables": true, + "features": "+strict-align", + "linker": "arm-none-eabi-gcc", + "linker-flavor": "gcc", + "llvm-target": "thumbv6m-none-eabi", + "max-atomic-width": 0, + "os": "linux", + "panic-strategy": "abort", + "pre-link-args": { + "gcc": ["-nostartfiles"] + }, + "relocation-model": "static", + "target-endian": "little", + "target-pointer-width": "32", + "target-c-int-width": "32", + "vendor": "" +} diff --git a/library/compiler-builtins/thumbv7em-linux-eabi.json b/library/compiler-builtins/thumbv7em-linux-eabi.json new file mode 100644 index 0000000000000..b6d4a6bda7bac --- /dev/null +++ b/library/compiler-builtins/thumbv7em-linux-eabi.json @@ -0,0 +1,27 @@ +{ + "abi-blacklist": [ + "stdcall", + "fastcall", + "vectorcall", + "win64", + "sysv64" + ], + "arch": "arm", + "data-layout": "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64", + "env": "", + "executables": true, + "linker": "arm-none-eabi-gcc", + "linker-flavor": "gcc", + "llvm-target": "thumbv7em-none-eabi", + "max-atomic-width": 32, + "os": "linux", + "panic-strategy": "abort", + "pre-link-args": { + "gcc": ["-nostartfiles"] + }, + "relocation-model": "static", + "target-endian": "little", + "target-pointer-width": "32", + "target-c-int-width": "32", + "vendor": "" +} diff --git a/library/compiler-builtins/thumbv7em-linux-eabihf.json b/library/compiler-builtins/thumbv7em-linux-eabihf.json new file mode 100644 index 0000000000000..81cfcd48d56ce --- /dev/null +++ b/library/compiler-builtins/thumbv7em-linux-eabihf.json @@ -0,0 +1,28 @@ +{ + "abi-blacklist": [ + "stdcall", + "fastcall", + "vectorcall", + "win64", + "sysv64" + ], + "arch": "arm", + "data-layout": "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64", + "env": "", + "executables": true, + "features": "+vfp4,+d16,+fp-only-sp", + "linker": "arm-none-eabi-gcc", + "linker-flavor": "gcc", + "llvm-target": "thumbv7em-none-eabihf", + "max-atomic-width": 32, + "os": "linux", + "panic-strategy": "abort", + "pre-link-args": { + "gcc": ["-nostartfiles"] + }, + "relocation-model": "static", + "target-endian": "little", + "target-pointer-width": "32", + "target-c-int-width": "32", + "vendor": "" +} diff --git a/library/compiler-builtins/thumbv7m-linux-eabi.json b/library/compiler-builtins/thumbv7m-linux-eabi.json new file mode 100644 index 0000000000000..abe037c5bef88 --- /dev/null +++ b/library/compiler-builtins/thumbv7m-linux-eabi.json @@ -0,0 +1,27 @@ +{ + "abi-blacklist": [ + "stdcall", + "fastcall", + "vectorcall", + "win64", + "sysv64" + ], + "arch": "arm", + "data-layout": "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64", + "env": "", + "executables": true, + "linker": "arm-none-eabi-gcc", + "linker-flavor": "gcc", + "llvm-target": "thumbv7m-none-eabi", + "max-atomic-width": 32, + "os": "linux", + "panic-strategy": "abort", + "pre-link-args": { + "gcc": ["-nostartfiles"] + }, + "relocation-model": "static", + "target-endian": "little", + "target-pointer-width": "32", + "target-c-int-width": "32", + "vendor": "" +} diff --git a/rustfmt.toml b/rustfmt.toml index d9857a7e3e788..689e390b990c2 100644 --- a/rustfmt.toml +++ b/rustfmt.toml @@ -29,6 +29,7 @@ ignore = [ # Do not format submodules. "library/backtrace", + "library/compiler-builtins", "library/portable-simd", "library/stdarch", "src/doc/book", diff --git a/src/tools/tidy/src/pal.rs b/src/tools/tidy/src/pal.rs index c708ff7150c9b..9b915e0f737dc 100644 --- a/src/tools/tidy/src/pal.rs +++ b/src/tools/tidy/src/pal.rs @@ -36,6 +36,7 @@ use crate::walk::{filter_dirs, walk}; // Paths that may contain platform-specific code. const EXCEPTION_PATHS: &[&str] = &[ + "library/compiler-builtins", "library/windows_targets", "library/panic_abort", "library/panic_unwind", diff --git a/src/tools/tidy/src/walk.rs b/src/tools/tidy/src/walk.rs index 08ee5c16c122f..c2c9eb8d2507e 100644 --- a/src/tools/tidy/src/walk.rs +++ b/src/tools/tidy/src/walk.rs @@ -14,6 +14,7 @@ pub fn filter_dirs(path: &Path) -> bool { "compiler/rustc_codegen_gcc", "src/llvm-project", "library/backtrace", + "library/compiler-builtins", "library/portable-simd", "library/stdarch", "src/tools/cargo", diff --git a/triagebot.toml b/triagebot.toml index 9dcdbcecbeca6..5921c2056f525 100644 --- a/triagebot.toml +++ b/triagebot.toml @@ -254,6 +254,11 @@ trigger_files = [ "compiler/rustc_attr_data_structures", ] +[autolabel."A-compiler-builtins"] +trigger_files = [ + "library/compiler-builtins", +] + [autolabel."F-autodiff"] trigger_files = [ "src/tools/enzyme", @@ -851,6 +856,15 @@ cc = ["@Urgau"] message = "Some changes occurred in check-cfg diagnostics" cc = ["@Urgau"] +[mentions."library/compiler-builtins"] +message = """ +`compiler-builtins` is developed in its own repository. If possible, consider \ +making this change to \ +[rust-lang/compiler-builtins](https://github.com/rust-lang/compiler-builtins) \ +instead. +""" +cc = ["@tgross35"] + [mentions."library/core/src/intrinsics/simd.rs"] message = """ Some changes occurred to the platform-builtins intrinsics. Make sure the