Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 20 additions & 14 deletions sljit_src/sljitConfigInternal.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,14 @@
SLJIT_UPPER_BITS_ZERO_EXTENDED : 32 bit operations clears the upper bits of destination registers
SLJIT_UPPER_BITS_SIGN_EXTENDED : 32 bit operations replicates the sign bit in the upper bits of destination registers
SLJIT_UPPER_BITS_PRESERVED : 32 bit operations preserves the upper bits of destination registers
SLJIT_SUBC_SETS_SIGNED : SLJIT_SUBC[32] operation always sets the signed result, so setting
the following status flags after a subtract with carry operation is valid:
sljit_set_current_flags(..., SLJIT_CURRENT_FLAGS_SUB | SLJIT_SET_SIG_LESS)
sljit_set_current_flags(..., SLJIT_CURRENT_FLAGS_SUB | SLJIT_SET_SIG_GREATER)
SLJIT_SHARED_COMPARISON_FLAGS: the cpu has different instructions for signed and unsigned
comparisons, which sets the same status flags, so passing SLJIT_LESS
or SLJIT_SIG_LESS as an argument has the same effect, and this is true
for all other signed/unsigned comparison type pairs

Constants:
SLJIT_NUMBER_OF_REGISTERS : number of available registers
Expand Down Expand Up @@ -89,14 +97,6 @@
SLJIT_CONV_NAN_FLOAT : result when a NaN floating point value is converted to integer
(possible values: SLJIT_CONV_RESULT_MAX_INT, SLJIT_CONV_RESULT_MIN_INT,
or SLJIT_CONV_RESULT_ZERO)
SLJIT_SUBC_SETS_SIGNED : SLJIT_SUBC[32] operation always sets the signed result, so setting
the following status flags after a subtract with carry operation is valid:
sljit_set_current_flags(..., SLJIT_CURRENT_FLAGS_SUB | SLJIT_SET_SIG_LESS)
sljit_set_current_flags(..., SLJIT_CURRENT_FLAGS_SUB | SLJIT_SET_SIG_GREATER)
SLJIT_SHARED_COMPARISON_FLAGS: the cpu has different instructions for signed and unsigned
comparisons, which sets the same status flags, so passing SLJIT_LESS
or SLJIT_SIG_LESS as an argument has the same effect, and this is true
for all other signed/unsigned comparison type pairs

Other macros:
SLJIT_TMP_R0 .. R9 : accessing temporary registers
Expand All @@ -108,18 +108,21 @@
SLJIT_TMP_DEST_REG : a temporary register for results, see the rules below
SLJIT_TMP_DEST_FREG : a temporary register for float results, see the rules below
SLJIT_TMP_DEST_VREG : a temporary register for vector results, see the rules below
SLJIT_TMP_OPT_REG : a temporary register which might not be defined, see the rules below
SLJIT_TMP_OPT_REG : an optional temporary register, see the rules below
SLJIT_TMP_FLAG_REG : an optional temporary register for storing the value of a flag
SLJIT_FUNC : calling convention attribute for both calling JIT from C and C calling back from JIT
SLJIT_W(number) : defining 64 bit constants on 64 bit architectures (platform independent helper)
SLJIT_F64_SECOND(reg) : provides the register index of the second 32 bit part of a 64 bit
floating point register when SLJIT_HAS_F64_AS_F32_PAIR returns non-zero

Temporary register rules (e.g. SLJIT_TMP_DEST_REG / SLJIT_TMP_OPT_REG):
Sljit tries to emit instructions without using any temporary registers whenever it is possible.
When a single temporary register is needed, it is always the "OPT" register. When two temporary
registers are needed, both the "DEST" and "OPT" are used. The x86-32 does not define an "OPT"
register, and handles all cases without an "OPT" register which requires an "OPT" register
on other architectures.
The sljit compiler avoids using temporary registers, but certain instruction
forms cannot be generated without them. The number of temporary registers
reserved by the compiler depends on the target architecture. An SLJIT_TMP_*
name is assigned to some of these registers, which represents their
SLJIT_TMP_R(i) index. When such register is optional, it might not be
defined on all architectures. For example, the x86-32 code generator does
not use any optional temporary registers.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

wouldn't it be clearer to say "the x86-32 code generator does not use any temporary registers at all because of the limited number of registers available"?

Copy link
Copy Markdown
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It uses one temporary register. The "optional" is a second temporary register.

*/

#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
Expand Down Expand Up @@ -722,6 +725,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void *code);
#define SLJIT_NUMBER_OF_TEMPORARY_FLOAT_REGISTERS 3
#define SLJIT_TMP_DEST_REG SLJIT_TMP_R1
#define SLJIT_TMP_OPT_REG SLJIT_TMP_R0
#define SLJIT_TMP_FLAG_REG SLJIT_TMP_R3
#define SLJIT_TMP_DEST_FREG SLJIT_TMP_FR0
#define SLJIT_MASKED_SHIFT 1
#define SLJIT_MASKED_SHIFT32 1
Expand All @@ -742,6 +746,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void *code);
#define SLJIT_NUMBER_OF_TEMPORARY_VECTOR_REGISTERS 2
#define SLJIT_TMP_DEST_REG SLJIT_TMP_R1
#define SLJIT_TMP_OPT_REG SLJIT_TMP_R0
#define SLJIT_TMP_FLAG_REG SLJIT_TMP_R3
#define SLJIT_TMP_DEST_FREG SLJIT_TMP_FR0
#define SLJIT_TMP_DEST_VREG SLJIT_TMP_VR0
#define SLJIT_LOCALS_OFFSET_BASE 0
Expand Down Expand Up @@ -799,6 +804,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void *code);
#define SLJIT_NUMBER_OF_TEMPORARY_FLOAT_REGISTERS 2
#define SLJIT_TMP_DEST_REG SLJIT_TMP_R1
#define SLJIT_TMP_OPT_REG SLJIT_TMP_R0
#define SLJIT_TMP_FLAG_REG SLJIT_TMP_R3
#define SLJIT_TMP_DEST_FREG SLJIT_TMP_FR0
#define SLJIT_LOCALS_OFFSET_BASE 0
#define SLJIT_MASKED_SHIFT 1
Expand Down
46 changes: 28 additions & 18 deletions sljit_src/sljitNativeRISCV_common.c
Original file line number Diff line number Diff line change
Expand Up @@ -2046,15 +2046,13 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl

if (flags & SRC2_IMM) {
if (is_overflow) {
if (src2 >= 0) {
if (dst == src1) {
if (RISCV_HAS_COMPRESSED(200))
FAIL_IF(push_inst16(compiler, C_MV | C_RD(EQUAL_FLAG) | C_RS2(src1)));
FAIL_IF(push_inst16(compiler, C_MV | C_RD(OTHER_FLAG) | C_RS2(src1)));
else
FAIL_IF(push_inst(compiler, ADDI | RD(EQUAL_FLAG) | RS1(src1) | IMM_I(0)));
} else
FAIL_IF(push_inst(compiler, XORI | RD(EQUAL_FLAG) | RS1(src1) | IMM_I(-1)));
}
else if (op & SLJIT_SET_Z)
FAIL_IF(push_inst(compiler, ADDI | RD(OTHER_FLAG) | RS1(src1) | IMM_I(0)));
}
} else if (op & SLJIT_SET_Z)
FAIL_IF(push_inst(compiler, ADDI | WORD | RD(EQUAL_FLAG) | RS1(src1) | IMM_I(src2)));

/* Only the zero flag is needed. */
Expand Down Expand Up @@ -2088,7 +2086,7 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl
}

/* Carry is zero if a + b >= a or a + b >= b, otherwise it is 1. */
if (is_overflow || carry_src_r != 0) {
if (carry_src_r != 0 || (is_overflow && !(flags & SRC2_IMM))) {
if (flags & SRC2_IMM)
FAIL_IF(push_inst(compiler, SLTUI | RD(OTHER_FLAG) | RS1(dst) | IMM_I(src2)));
else
Expand All @@ -2098,6 +2096,12 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl
if (!is_overflow)
return SLJIT_SUCCESS;

if (flags & SRC2_IMM) {
if (src2 >= 0)
return push_inst(compiler, SLT | RD(OTHER_FLAG) | RS1(dst) | RS2(dst == src1 ? OTHER_FLAG : src1));
return push_inst(compiler, SLT | RD(OTHER_FLAG) | RS1(dst == src1 ? OTHER_FLAG : src1) | RS2(dst));
}

FAIL_IF(push_inst(compiler, XOR | RD(TMP_REG1) | RS1(dst) | RS2(EQUAL_FLAG)));
if (op & SLJIT_SET_Z) {
if (RISCV_HAS_COMPRESSED(200))
Expand Down Expand Up @@ -2234,18 +2238,18 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl

if (flags & SRC2_IMM) {
if (is_overflow) {
if (src2 >= 0) {
if (dst == src1) {
if (RISCV_HAS_COMPRESSED(200))
FAIL_IF(push_inst16(compiler, C_MV | C_RD(EQUAL_FLAG) | C_RS2(src1)));
FAIL_IF(push_inst16(compiler, C_MV | C_RD(OTHER_FLAG) | C_RS2(src1)));
else
FAIL_IF(push_inst(compiler, ADDI | RD(EQUAL_FLAG) | RS1(src1) | IMM_I(0)));
} else
FAIL_IF(push_inst(compiler, XORI | RD(EQUAL_FLAG) | RS1(src1) | IMM_I(-1)));
} else if (op & SLJIT_SET_Z)
FAIL_IF(push_inst(compiler, ADDI | WORD | RD(EQUAL_FLAG) | RS1(src1) | IMM_I(-src2)));

if (is_overflow || is_carry)
FAIL_IF(push_inst(compiler, SLTUI | RD(OTHER_FLAG) | RS1(src1) | IMM_I(src2)));
FAIL_IF(push_inst(compiler, ADDI | RD(OTHER_FLAG) | RS1(src1) | IMM_I(0)));
}
} else {
if (op & SLJIT_SET_Z)
FAIL_IF(push_inst(compiler, ADDI | WORD | RD(EQUAL_FLAG) | RS1(src1) | IMM_I(-src2)));
if (is_carry)
FAIL_IF(push_inst(compiler, SLTUI | RD(OTHER_FLAG) | RS1(src1) | IMM_I(src2)));
}

/* Only the zero flag is needed. */
if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK)) {
Expand Down Expand Up @@ -2276,6 +2280,12 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl
if (!is_overflow)
return SLJIT_SUCCESS;

if (flags & SRC2_IMM) {
if (src2 >= 0)
return push_inst(compiler, SLT | RD(OTHER_FLAG) | RS1(dst) | RS2(dst == src1 ? OTHER_FLAG : src1));
return push_inst(compiler, SLT | RD(OTHER_FLAG) | RS1(dst == src1 ? OTHER_FLAG : src1) | RS2(dst));
}

FAIL_IF(push_inst(compiler, XOR | RD(TMP_REG1) | RS1(dst) | RS2(EQUAL_FLAG)));
if (op & SLJIT_SET_Z) {
if (RISCV_HAS_COMPRESSED(200))
Expand Down
43 changes: 41 additions & 2 deletions test_src/sljitTest.c
Original file line number Diff line number Diff line change
Expand Up @@ -9871,16 +9871,20 @@ static void test79(void)
executable_code code;
struct sljit_compiler *compiler = sljit_create_compiler(NULL);
sljit_s32 i;
sljit_sw buf[23];
sljit_sw buf[27];
#ifdef SLJIT_TMP_FLAG_REG
struct sljit_jump* jump;
#endif /* SLJIT_TMP_FLAG_REG */

if (verbose)
printf("Run test79\n");

for (i = 0; i < 23; i++)
for (i = 0; i < 27; i++)
buf[i] = -1;

buf[20] = WCONST(0xe56c91d40f839ba7, 0xe56c91d4);
buf[22] = WCONST(0x748bd902ca1f623f, 0x748bd902);
buf[23] = WCONST(0xeab833dc8a6089bc, 0x8a6089bc);

FAILED(!compiler, "cannot create compiler\n");

Expand Down Expand Up @@ -10018,6 +10022,35 @@ static void test79(void)
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S1), sizeof(sljit_sw), SLJIT_TMP_DEST_REG, 0);
#endif /* SLJIT_TMP_OPT_REG */

#ifdef SLJIT_TMP_FLAG_REG
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, (23 * sizeof(sljit_sw)) >> 1);
sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_TMP_FLAG_REG, 0, SLJIT_MEM2(SLJIT_S0, SLJIT_R1), 1, SLJIT_IMM, WCONST(0xd5452f384833ec77, 0x4833ec77));
/* buf[23] */
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 23 * sizeof(sljit_sw), SLJIT_TMP_FLAG_REG, 0);

sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_TMP_FLAG_REG, 0, SLJIT_IMM, -8);
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_TMP_DEST_REG, 0, SLJIT_IMM, -7);
/* The SLJIT_TMP_FLAG_REG might be destroyed. */
jump = sljit_emit_cmp(compiler, SLJIT_SIG_LESS, SLJIT_TMP_FLAG_REG, 0, SLJIT_TMP_DEST_REG, 0);
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_MEM0(), 0);
sljit_set_label(jump, sljit_emit_label(compiler));
/* buf[24] */
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 24 * sizeof(sljit_sw), SLJIT_TMP_DEST_REG, 0);

sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_TMP_FLAG_REG, 0, SLJIT_IMM, -17);
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_TMP_DEST_REG, 0, SLJIT_IMM, -18);
jump = sljit_emit_cmp(compiler, SLJIT_SIG_LESS_EQUAL, SLJIT_TMP_FLAG_REG, 0, SLJIT_TMP_DEST_REG, 0);
/* buf[25] */
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 25 * sizeof(sljit_sw), SLJIT_TMP_DEST_REG, 0);
sljit_set_label(jump, sljit_emit_label(compiler));

sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_TMP_FLAG_REG, 0, SLJIT_IMM, WCONST(0xc716533c401db8e3, 0x401db8e3));
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 7);
sljit_emit_op2u(compiler, SLJIT_SUB | SLJIT_SET_Z, SLJIT_R0, 0, SLJIT_IMM, 7);
/* buf[26] */
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 26 * sizeof(sljit_sw), SLJIT_TMP_FLAG_REG, 0);
#endif /* SLJIT_TMP_FLAG_REG */

sljit_emit_return_void(compiler);

code.code = sljit_generate_code(compiler, 0, NULL);
Expand Down Expand Up @@ -10054,6 +10087,12 @@ static void test79(void)
FAILED(buf[21] != WCONST(0x748bd902ca1f623f, 0x748bd902), "test79 case 22 failed\n");
FAILED(buf[22] != 22, "test79 case 23 failed\n");
#endif /* SLJIT_TMP_OPT_REG */
#ifdef SLJIT_TMP_FLAG_REG
FAILED(buf[23] != WCONST(0xbffd6314d2947633, 0xd2947633), "test79 case 24 failed\n");
FAILED(buf[24] != -7, "test79 case 25 failed\n");
FAILED(buf[25] != -18, "test79 case 26 failed\n");
FAILED(buf[26] != 0, "test79 case 27 failed\n");
#endif /* SLJIT_TMP_FLAG_REG */

sljit_free_code(code.code, NULL);
successful_tests++;
Expand Down