Closed
Description
long double
doesn't return correctly for cosl
. It seems to destroy the value inside DE
. Here is a minimal reproducible example compiled at -Oz
.
#include <math.h>
static long double _f64_sinus(unsigned char quad, long double x) {
// large enough to not be inlined into cosl
return (x + quad) / x;
}
long double sinl(long double arg) {
return _f64_sinus(signbit(arg) ? 2 : 0, fabsl(arg));
}
long double cosl(long double arg) {
long double r = _f64_sinus(1, fabsl(arg));
return r;
}
public _sinl
_sinl:
ld hl, -3
call __frameset
ld hl, (ix + 6)
ld de, (ix + 9)
ld bc, (ix + 12)
push bc
push de
push hl
call __signbitl
pop hl
pop hl
pop hl
bit 0, a
jr nz, BB0_2
ld l, 0
jr BB0_3
BB0_2:
ld l, 2
BB0_3:
ld (ix - 3), hl
ld hl, (ix + 12)
push hl
ld hl, (ix + 9)
push hl
ld hl, (ix + 6)
push hl
call __debug_fabsl
pop iy
pop iy
pop iy
push bc
push de
push hl
ld hl, (ix - 3)
push hl
call __f64_sinus
ld iyl, e
ld iyh, d
pop de
pop de
pop de
pop de
push bc
pop de
ld c, iyl
ld b, iyh
ld sp, ix
pop ix
ret
private __f64_sinus
__f64_sinus:
call __frameset0
ld iy, (ix + 15)
or a, a
sbc hl, hl
ld l, (ix + 6)
ld e, h
call __ultod
push iy
ld iy, (ix + 12)
push iy
ld iy, (ix + 9)
push iy
call __dadd
pop iy
pop iy
pop iy
ld iy, (ix + 15)
push iy
ld iy, (ix + 12)
push iy
ld iy, (ix + 9)
push iy
call __ddiv
ld sp, ix
pop ix
ret
public _cosl
_cosl:
call __frameset0
ld hl, (ix + 6)
ld de, (ix + 9)
ld bc, (ix + 12)
push bc
push de
push hl
call __debug_fabsl
pop iy
pop iy
pop iy
push bc
push de
push hl
ld hl, 1
push hl
call __f64_sinus
ld iyl, e
ld iyh, d
pop de
pop de
pop de
pop de
push bc
pop de
ld c, iyl
ld b, iyh
pop ix
ret
extern __ultod
extern __frameset
extern __debug_fabsl
extern __signbitl
extern __frameset0
extern __dadd
extern __ddiv
If I change it to this:
long double cosl(long double arg) {
volatile const long double ret = _f64_sinus(fabsl(arg), 1);
return ret;
}
then the code works correctly (but slower)
section .text,"ax",@progbits
public _cosl
_cosl:
ld hl, -11
call __frameset
ld iy, (ix + 6)
ld de, (ix + 9)
ld bc, (ix + 12)
lea hl, ix - 8
ld (ix - 11), hl
push bc
push de
push iy
call _fabsl
pop iy
pop iy
pop iy
ld iy, 1
push iy
push bc
push de
push hl
call __f64_sinus
pop iy
pop iy
pop iy
pop iy
ld (ix - 8), hl
ld iy, (ix - 11)
lea hl, iy + 3
ld (hl), de
lea hl, iy + 6
ld (hl), c
inc hl
ld (hl), b
ld hl, (ix - 8)
ld de, (ix - 5)
ld c, (ix - 2)
ld b, (ix - 1)
ld sp, ix
pop ix
ret
section .text,"ax",@progbits