Skip to content

long double returns incorrectly #56

Closed
@ZERICO2005

Description

@ZERICO2005

long double doesn't return correctly for cosl. It seems to destroy the value inside DE. Here is a minimal reproducible example compiled at -Oz.

#include <math.h>
static long double _f64_sinus(unsigned char quad, long double x) {
    // large enough to not be inlined into cosl
    return (x + quad) / x;
}

long double sinl(long double arg) {
    return _f64_sinus(signbit(arg) ? 2 : 0, fabsl(arg));
}

long double cosl(long double arg) {
    long double r = _f64_sinus(1, fabsl(arg));
    return r;
}
public    _sinl
_sinl:
    ld    hl, -3
    call    __frameset
    ld    hl, (ix + 6)
    ld    de, (ix + 9)
    ld    bc, (ix + 12)
    push    bc
    push    de
    push    hl
    call    __signbitl
    pop    hl
    pop    hl
    pop    hl
    bit    0, a
    jr    nz, BB0_2
    ld    l, 0
    jr    BB0_3
BB0_2:
    ld    l, 2
BB0_3:
    ld    (ix - 3), hl
    ld    hl, (ix + 12)
    push    hl
    ld    hl, (ix + 9)
    push    hl
    ld    hl, (ix + 6)
    push    hl
    call    __debug_fabsl
    pop    iy
    pop    iy
    pop    iy
    push    bc
    push    de
    push    hl
    ld    hl, (ix - 3)
    push    hl
    call    __f64_sinus
    ld    iyl, e
    ld    iyh, d
    pop    de
    pop    de
    pop    de
    pop    de
    push    bc
    pop    de
    ld    c, iyl
    ld    b, iyh
    ld    sp, ix
    pop    ix
    ret

    private    __f64_sinus
__f64_sinus:
    call    __frameset0
    ld    iy, (ix + 15)
    or    a, a
    sbc    hl, hl
    ld    l, (ix + 6)
    ld    e, h
    call    __ultod
    push    iy
    ld    iy, (ix + 12)
    push    iy
    ld    iy, (ix + 9)
    push    iy
    call    __dadd
    pop    iy
    pop    iy
    pop    iy
    ld    iy, (ix + 15)
    push    iy
    ld    iy, (ix + 12)
    push    iy
    ld    iy, (ix + 9)
    push    iy
    call    __ddiv
    ld    sp, ix
    pop    ix
    ret

    public    _cosl
_cosl:
    call    __frameset0
    ld    hl, (ix + 6)
    ld    de, (ix + 9)
    ld    bc, (ix + 12)
    push    bc
    push    de
    push    hl
    call    __debug_fabsl
    pop    iy
    pop    iy
    pop    iy
    push    bc
    push    de
    push    hl
    ld    hl, 1
    push    hl
    call    __f64_sinus
    ld    iyl, e
    ld    iyh, d
    pop    de
    pop    de
    pop    de
    pop    de
    push    bc
    pop    de
    ld    c, iyl
    ld    b, iyh
    pop    ix
    ret

    extern    __ultod
    extern    __frameset
    extern    __debug_fabsl
    extern    __signbitl
    extern    __frameset0
    extern    __dadd
    extern    __ddiv

If I change it to this:

long double cosl(long double arg) {
    volatile const long double ret = _f64_sinus(fabsl(arg), 1);
    return ret;
}

then the code works correctly (but slower)

	section	.text,"ax",@progbits
	public	_cosl
_cosl:
	ld	hl, -11
	call	__frameset
	ld	iy, (ix + 6)
	ld	de, (ix + 9)
	ld	bc, (ix + 12)
	lea	hl, ix - 8
	ld	(ix - 11), hl
	push	bc
	push	de
	push	iy
	call	_fabsl
	pop	iy
	pop	iy
	pop	iy
	ld	iy, 1
	push	iy
	push	bc
	push	de
	push	hl
	call	__f64_sinus
	pop	iy
	pop	iy
	pop	iy
	pop	iy
	ld	(ix - 8), hl
	ld	iy, (ix - 11)
	lea	hl, iy + 3
	ld	(hl), de
	lea	hl, iy + 6
	ld	(hl), c
	inc	hl
	ld	(hl), b
	ld	hl, (ix - 8)
	ld	de, (ix - 5)
	ld	c, (ix - 2)
	ld	b, (ix - 1)
	ld	sp, ix
	pop	ix
	ret
	section	.text,"ax",@progbits

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions