Skip to content

long double doesn't return correctly #2

Closed
@ZERICO2005

Description

@ZERICO2005

long double doesn't return correctly for cosl. It seems to destroy the value inside DE. Here is a minimal reproducible example compiled at -Oz.

#include <math.h>
static long double _f64_sinus(unsigned char quad, long double x) {
    // large enough to not be inlined into cosl
    return (x + quad) / x;
}

long double sinl(long double arg) {
    return _f64_sinus(signbit(arg) ? 2 : 0, fabsl(arg));
}

long double cosl(long double arg) {
    long double r = _f64_sinus(1, fabsl(arg));
    return r;
}
public    _sinl
_sinl:
    ld    hl, -3
    call    __frameset
    ld    hl, (ix + 6)
    ld    de, (ix + 9)
    ld    bc, (ix + 12)
    push    bc
    push    de
    push    hl
    call    __signbitl
    pop    hl
    pop    hl
    pop    hl
    bit    0, a
    jr    nz, BB0_2
    ld    l, 0
    jr    BB0_3
BB0_2:
    ld    l, 2
BB0_3:
    ld    (ix - 3), hl
    ld    hl, (ix + 12)
    push    hl
    ld    hl, (ix + 9)
    push    hl
    ld    hl, (ix + 6)
    push    hl
    call    __debug_fabsl
    pop    iy
    pop    iy
    pop    iy
    push    bc
    push    de
    push    hl
    ld    hl, (ix - 3)
    push    hl
    call    __f64_sinus
    ld    iyl, e
    ld    iyh, d
    pop    de
    pop    de
    pop    de
    pop    de
    push    bc
    pop    de
    ld    c, iyl
    ld    b, iyh
    ld    sp, ix
    pop    ix
    ret

    private    __f64_sinus
__f64_sinus:
    call    __frameset0
    ld    iy, (ix + 15)
    or    a, a
    sbc    hl, hl
    ld    l, (ix + 6)
    ld    e, h
    call    __ultod
    push    iy
    ld    iy, (ix + 12)
    push    iy
    ld    iy, (ix + 9)
    push    iy
    call    __dadd
    pop    iy
    pop    iy
    pop    iy
    ld    iy, (ix + 15)
    push    iy
    ld    iy, (ix + 12)
    push    iy
    ld    iy, (ix + 9)
    push    iy
    call    __ddiv
    ld    sp, ix
    pop    ix
    ret

    public    _cosl
_cosl:
    call    __frameset0
    ld    hl, (ix + 6)
    ld    de, (ix + 9)
    ld    bc, (ix + 12)
    push    bc
    push    de
    push    hl
    call    __debug_fabsl
    pop    iy
    pop    iy
    pop    iy
    push    bc
    push    de
    push    hl
    ld    hl, 1
    push    hl
    call    __f64_sinus
    ld    iyl, e
    ld    iyh, d
    pop    de
    pop    de
    pop    de
    pop    de
    push    bc
    pop    de
    ld    c, iyl
    ld    b, iyh
    pop    ix
    ret

    extern    __ultod
    extern    __frameset
    extern    __debug_fabsl
    extern    __signbitl
    extern    __frameset0
    extern    __dadd
    extern    __ddiv

If I change it to this:

long double cosl(long double arg) {
    volatile const long double ret = _f64_sinus(fabsl(arg), 1);
    return ret;
}

then the code works correctly (but slower)

	section	.text,"ax",@progbits
	public	_cosl
_cosl:
	ld	hl, -11
	call	__frameset
	ld	iy, (ix + 6)
	ld	de, (ix + 9)
	ld	bc, (ix + 12)
	lea	hl, ix - 8
	ld	(ix - 11), hl
	push	bc
	push	de
	push	iy
	call	_fabsl
	pop	iy
	pop	iy
	pop	iy
	ld	iy, 1
	push	iy
	push	bc
	push	de
	push	hl
	call	__f64_sinus
	pop	iy
	pop	iy
	pop	iy
	pop	iy
	ld	(ix - 8), hl
	ld	iy, (ix - 11)
	lea	hl, iy + 3
	ld	(hl), de
	lea	hl, iy + 6
	ld	(hl), c
	inc	hl
	ld	(hl), b
	ld	hl, (ix - 8)
	ld	de, (ix - 5)
	ld	c, (ix - 2)
	ld	b, (ix - 1)
	ld	sp, ix
	pop	ix
	ret
	section	.text,"ax",@progbits

Metadata

Metadata

Assignees

No one assigned

    Labels

    bugSomething isn't working

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions