diff --git a/.gitignore b/.gitignore index 8a662b2c5..9c68cc7d3 100644 --- a/.gitignore +++ b/.gitignore @@ -55,3 +55,6 @@ node_modules /*.tgz args.txt /other/benchs/hlc + +/CLAUDE.md +/.claude \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt index a78b53316..80b809fa0 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -20,11 +20,10 @@ include(FindPkgConfig) include(CTest) set(WITH_VM_DEFAULT ON) -if(CMAKE_SYSTEM_PROCESSOR MATCHES "arm|aarch64" AND (NOT CMAKE_OSX_ARCHITECTURES MATCHES "x86_64")) - set(WITH_VM_DEFAULT OFF) -endif() +# VM now supports x86, x86-64, and AArch64 architectures option(WITH_VM "Whether to build the Hashlink virtual machine" ${WITH_VM_DEFAULT}) +option(WITH_LLVM_AOT "Whether to build the hl2llvm AOT compiler" OFF) option(BUILD_SHARED_LIBS "Build using shared libraries" ON) if(BUILD_SHARED_LIBS) # ensure third-party static libs are built with PIC @@ -199,9 +198,24 @@ set_target_properties(libhl ) if (WITH_VM) + # Select JIT backend based on architecture + # Note: macOS uses "arm64" while Linux uses "aarch64" + if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|arm64") + set(JIT_SOURCES + src/jit_aarch64.c + src/jit_aarch64_emit.c + src/jit_shared.c + ) + else() + set(JIT_SOURCES + src/jit_x86.c + src/jit_shared.c + ) + endif() + add_executable(hl src/code.c - src/jit.c + ${JIT_SOURCES} src/main.c src/module.c src/debugger.c @@ -236,6 +250,83 @@ else() endif() endif() +##################### +# LLVM AOT Compiler (hl2llvm) +if(WITH_LLVM_AOT) + find_package(LLVM REQUIRED CONFIG) + message(STATUS "Found LLVM ${LLVM_PACKAGE_VERSION}") + message(STATUS "Using LLVMConfig.cmake in: ${LLVM_DIR}") + + # LLVM definitions and includes + add_definitions(${LLVM_DEFINITIONS}) + + # Source files for hl2llvm + set(HL2LLVM_SOURCES + src/llvm/hl2llvm_main.c + src/llvm/llvm_codegen.c + src/llvm/llvm_types.c + src/llvm/llvm_runtime.c + src/llvm/llvm_ops_constants.c + src/llvm/llvm_ops_arith.c + src/llvm/llvm_ops_control.c + src/llvm/llvm_ops_memory.c + src/llvm/llvm_ops_calls.c + src/llvm/llvm_ops_closures.c + src/llvm/llvm_ops_types.c + src/llvm/llvm_ops_objects.c + src/llvm/llvm_ops_enums.c + src/llvm/llvm_ops_refs.c + src/llvm/llvm_ops_exceptions.c + src/llvm/llvm_ops_misc.c + src/code.c + ) + + add_executable(hl2llvm ${HL2LLVM_SOURCES}) + + # AOT runtime library (provides module loading for AOT binaries) + add_library(aot_runtime STATIC + src/llvm/aot_runtime.c + src/module.c + src/code.c + ) + target_include_directories(aot_runtime PRIVATE src) + target_link_libraries(aot_runtime libhl) + + # Make hl2llvm depend on aot_runtime so both are built together + add_dependencies(hl2llvm aot_runtime) + + target_include_directories(hl2llvm + PRIVATE + src + ${LLVM_INCLUDE_DIRS} + ) + + # Get LLVM libraries + llvm_map_components_to_libnames(LLVM_LIBS + core + analysis + bitwriter + target + ${LLVM_TARGETS_TO_BUILD} + ) + + target_link_libraries(hl2llvm + libhl + ${LLVM_LIBS} + ) + + if(APPLE) + set_target_properties(hl2llvm PROPERTIES + INSTALL_RPATH "@executable_path;@executable_path/../${CMAKE_INSTALL_LIBDIR}" + ) + elseif(UNIX) + set_target_properties(hl2llvm PROPERTIES + INSTALL_RPATH "$ORIGIN;$ORIGIN/../${CMAKE_INSTALL_LIBDIR}" + ) + endif() + +endif() + if(BUILD_TESTING) find_program( @@ -402,6 +493,80 @@ if(BUILD_TESTING) add_test(NAME uvsample.hl COMMAND hl ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/test/uvsample.hl 6001 ) + + ##################### + # Minimal JIT Tests + # These test individual opcodes without pulling in the Haxe stdlib + + # Common sources for all minimal JIT tests + set(MINIMAL_JIT_SOURCES + src/code.c + ${JIT_SOURCES} + src/module.c + src/debugger.c + src/profile.c + ) + + # Macro to add a minimal JIT test + macro(add_minimal_jit_test name) + add_executable(${name} + ${CMAKE_SOURCE_DIR}/other/tests/minimal/${name}.c + ${MINIMAL_JIT_SOURCES} + ) + target_include_directories(${name} + PRIVATE ${CMAKE_SOURCE_DIR}/other/tests/minimal + ) + target_link_libraries(${name} + libhl + ) + set_target_properties(${name} + PROPERTIES + RUNTIME_OUTPUT_DIRECTORY ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/test/minimal + ) + add_test(NAME ${name} COMMAND ${name}) + endmacro() + + # Add all minimal JIT tests + add_minimal_jit_test(test_int_ops) + add_minimal_jit_test(test_float_ops) + add_minimal_jit_test(test_bool_ops) + add_minimal_jit_test(test_control_flow) + add_minimal_jit_test(test_i64_ops) + add_minimal_jit_test(test_calls) + add_minimal_jit_test(test_strings) + add_minimal_jit_test(test_globals) + add_minimal_jit_test(test_natives) + add_minimal_jit_test(test_closures) + add_minimal_jit_test(test_objects) + add_minimal_jit_test(test_dynamic) + add_minimal_jit_test(test_callbacks) + add_minimal_jit_test(test_native_field) + add_minimal_jit_test(test_binop_inplace) + add_minimal_jit_test(test_enum) + add_minimal_jit_test(test_instance_closure) + add_minimal_jit_test(test_memory_ops) + add_minimal_jit_test(test_array_ops) + add_minimal_jit_test(test_ref_ops) + add_minimal_jit_test(test_unsigned_ops) + add_minimal_jit_test(test_switch) + add_minimal_jit_test(test_jumps_unsigned) + add_minimal_jit_test(test_type_ops) + add_minimal_jit_test(test_exceptions) + add_minimal_jit_test(test_methods) + add_minimal_jit_test(test_virtual_fields) + add_minimal_jit_test(test_fp_pressure) + + # Bytecode dump utility (needs code.c for hl_code_read) + add_executable(hldump + ${CMAKE_SOURCE_DIR}/other/tests/minimal/hldump.c + ${CMAKE_SOURCE_DIR}/src/code.c + ) + target_link_libraries(hldump libhl) + set_target_properties(hldump + PROPERTIES + RUNTIME_OUTPUT_DIRECTORY ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/test/minimal + ) + endif() add_test(NAME hello @@ -442,6 +607,9 @@ set(INSTALL_TARGETS libhl) if (WITH_VM) list(APPEND INSTALL_TARGETS hl) endif() +if (WITH_LLVM_AOT) + list(APPEND INSTALL_TARGETS hl2llvm) +endif() install( TARGETS diff --git a/Makefile b/Makefile index 9ff8a6345..f4aed9a03 100644 --- a/Makefile +++ b/Makefile @@ -40,7 +40,16 @@ STD = src/std/array.o src/std/buffer.o src/std/bytes.o src/std/cast.o src/std/da src/std/socket.o src/std/string.o src/std/sys.o src/std/types.o src/std/ucs2.o src/std/thread.o src/std/process.o \ src/std/track.o -HL = src/code.o src/jit.o src/main.o src/module.o src/debugger.o src/profile.o +# Conditional JIT backend selection based on architecture +ifeq ($(ARCH),aarch64) + HL_JIT = src/jit_aarch64.o src/jit_aarch64_emit.o src/jit_shared.o +else ifeq ($(ARCH),arm64) + HL_JIT = src/jit_aarch64.o src/jit_aarch64_emit.o src/jit_shared.o +else + HL_JIT = src/jit_x86.o src/jit_shared.o +endif + +HL = src/code.o $(HL_JIT) src/main.o src/module.o src/debugger.o src/profile.o FMT_INCLUDE = -I include/mikktspace -I include/minimp3 @@ -222,19 +231,12 @@ ifdef DEBUG CFLAGS += -g endif -all: libhl libs -ifeq ($(ARCH),arm64) - $(warning HashLink vm is not supported on arm64, skipping) -else -all: hl -endif +all: libhl libs hl install: $(UNAME)==Darwin && ${MAKE} uninstall -ifneq ($(ARCH),arm64) mkdir -p $(INSTALL_BIN_DIR) cp hl $(INSTALL_BIN_DIR) -endif mkdir -p $(INSTALL_LIB_DIR) cp *.hdll $(INSTALL_LIB_DIR) cp libhl.${LIBEXT} $(INSTALL_LIB_DIR) diff --git a/include/mdbg/mach_excServer.c b/include/mdbg/mach_excServer.c index 15044477e..316e3a6a7 100644 --- a/include/mdbg/mach_excServer.c +++ b/include/mdbg/mach_excServer.c @@ -7,7 +7,7 @@ /* Module mach_exc */ -#ifdef __x86_64__ +#if defined(__x86_64__) || defined(__aarch64__) #define __MIG_check__Request__mach_exc_subsystem__ 1 diff --git a/include/mdbg/mach_excUser.c b/include/mdbg/mach_excUser.c index 4d0817fe8..fdc2ecaae 100644 --- a/include/mdbg/mach_excUser.c +++ b/include/mdbg/mach_excUser.c @@ -5,7 +5,7 @@ * OPTIONS: */ -#ifdef __x86_64__ +#if defined(__x86_64__) || defined(__aarch64__) #define __MIG_check__Reply__mach_exc_subsystem__ 1 diff --git a/include/mdbg/mdbg.c b/include/mdbg/mdbg.c index 04148c13e..57f9e43a5 100644 --- a/include/mdbg/mdbg.c +++ b/include/mdbg/mdbg.c @@ -20,7 +20,7 @@ * DEALINGS IN THE SOFTWARE. */ -#ifdef __x86_64__ +#ifdef __aarch64__ #include #include @@ -32,7 +32,6 @@ #include #include #include -#include #include #include #include @@ -60,7 +59,18 @@ #define STATUS_STACKOVERFLOW 5 #define STATUS_WATCHBREAK 0x100 -#define SINGLESTEP_TRAP 0x00000100 +/* + * ARM64 EXC_BREAKPOINT exception codes (from mach/arm/exception.h): + * 1 = EXC_ARM_BREAKPOINT - BRK instruction executed + * 2 = Software single-step completed (Empirical value for macOS ARM64) + * 3 = Hardware breakpoint hit (Empirical value for macOS ARM64) + * + * Note: EXC_ARM_SINGLE_STEP and EXC_ARM_HW_BREAKPOINT are not defined + * in the official macOS SDK but are observed during debugging sessions. + */ +#define EXC_ARM_BREAKPOINT 1 +#define EXC_ARM_SINGLE_STEP 2 +#define EXC_ARM_HW_BREAKPOINT 3 #define MAX_EXCEPTION_PORTS 16 @@ -71,12 +81,12 @@ static struct debug_session *find_session(mach_port_t task); static mach_port_t get_task(pid_t pid); static mach_port_t get_thread(mach_port_t mach_task, uint thread_num); static uint64_t get_thread_id(thread_t thread); -static x86_thread_state64_t* get_thread_state(mach_port_t mach_thread); -static kern_return_t set_thread_state(thread_t mach_thread, x86_thread_state64_t *break_state); -static x86_debug_state64_t* get_debug_state(thread_t mach_thread); -static kern_return_t set_debug_state(thread_t mach_thread, x86_debug_state64_t *break_state); +static arm_thread_state64_t* get_thread_state(mach_port_t mach_thread); +static kern_return_t set_thread_state(thread_t mach_thread, arm_thread_state64_t *break_state); +static arm_debug_state64_t* get_debug_state(thread_t mach_thread); +static kern_return_t set_debug_state(thread_t mach_thread, arm_debug_state64_t *break_state); -static void* task_exception_server (mach_port_t exception_port); +static void* task_exception_server_thread(void* arg); #pragma mark Structs @@ -210,34 +220,32 @@ static char* exception_to_string(exception_type_t exc) { static char* get_register_name(int reg) { switch(reg) { - case REG_RAX: return "Rax"; - case REG_RBX: return "Rbx"; - case REG_RCX: return "Rcx"; - case REG_RDX: return "Rdx"; - case REG_RDI: return "Rdi"; - case REG_RSI: return "Rsi"; - case REG_RBP: return "Rbp"; - case REG_RSP: return "Rsp"; - case REG_R8: return "R8"; - case REG_R9: return "R9"; - case REG_R10: return "R10"; - case REG_R11: return "R11"; - case REG_R12: return "R12"; - case REG_R13: return "R13"; - case REG_R14: return "R14"; - case REG_R15: return "R15"; - case REG_RIP: return "Rip"; - case REG_RFLAGS: return "Rflags"; - - case REG_DR0: return "Dr0"; - case REG_DR1: return "Dr1"; - case REG_DR2: return "Dr2"; - case REG_DR3: return "Dr3"; - case REG_DR4: return "Dr4"; - case REG_DR5: return "Dr5"; - case REG_DR6: return "Dr6"; - case REG_DR7: return "Dr7"; - + case REG_RAX: return "X0"; + case REG_RBX: return "X19"; + case REG_RCX: return "X1"; + case REG_RDX: return "X2"; + case REG_RDI: return "X0"; + case REG_RSI: return "X1"; + case REG_RBP: return "FP"; + case REG_RSP: return "SP"; + case REG_R8: return "X8"; + case REG_R9: return "X9"; + case REG_R10: return "X10"; + case REG_R11: return "X11"; + case REG_R12: return "X12"; + case REG_R13: return "X13"; + case REG_R14: return "X14"; + case REG_R15: return "X15"; + case REG_RIP: return "PC"; + case REG_RFLAGS: return "CPSR"; + case REG_DR0: return "BVR0"; + case REG_DR1: return "BVR1"; + case REG_DR2: return "BVR2"; + case REG_DR3: return "BVR3"; + case REG_DR4: return "BCR0"; + case REG_DR5: return "BCR1"; + case REG_DR6: return "MDSCR"; + case REG_DR7: return "BCR3"; default: return "invalid register"; } } @@ -246,7 +254,7 @@ static char* get_register_name(int reg) { #pragma mark Debug helpers // From: https://developer.apple.com/library/archive/qa/qa1361/_index.html -// Returns true if the current process is being debugged (either +// Returns true if the current process is being debugged (either // running under the debugger or has a debugger attached post facto). bool is_debugger_attached(void) { int junk; @@ -349,87 +357,146 @@ static debug_session *find_session_by_pid(pid_t pid) { #pragma mark Registers +/* + * ARM64 register mapping for HashLink debugger compatibility. + * + * The debugger protocol uses x86 register indices via get_reg() in debug.c: + * REG_RSP (8) -> SP + * REG_RBP (7) -> FP (X29) + * REG_RIP (17) -> PC + * REG_RFLAGS (18) -> CPSR (note: 32-bit, but we return as 64-bit) + * REG_RAX (1) -> X0 + * + * ARM64 thread state structure (non-opaque): + * __x[29] - General purpose registers X0-X28 + * __fp - Frame pointer X29 + * __lr - Link register X30 + * __sp - Stack pointer + * __pc - Program counter + * __cpsr - Current program status register (32-bit!) + */ -__uint64_t *get_reg( x86_thread_state64_t *regs, int r ) { - switch( r ) { - case REG_RAX: return ®s->__rax; - case REG_RBX: return ®s->__rbx; - case REG_RCX: return ®s->__rcx; - case REG_RDX: return ®s->__rdx; - case REG_RDI: return ®s->__rdi; - case REG_RSI: return ®s->__rsi; - case REG_RBP: return ®s->__rbp; - case REG_RSP: return ®s->__rsp; - case REG_R8: return ®s->__r8; - case REG_R9: return ®s->__r9; - case REG_R10: return ®s->__r10; - case REG_R11: return ®s->__r11; - case REG_R12: return ®s->__r12; - case REG_R13: return ®s->__r13; - case REG_R14: return ®s->__r14; - case REG_R15: return ®s->__r15; - case REG_RIP: return ®s->__rip; - case REG_RFLAGS: return ®s->__rflags; +/* Static storage for CPSR as 64-bit (since __cpsr is 32-bit) */ +static __uint64_t cpsr_as_64; + +__uint64_t *get_reg(arm_thread_state64_t *regs, int r) { + switch(r) { + case REG_RAX: return ®s->__x[0]; /* Return value / first arg */ + case REG_RBX: return ®s->__x[19]; /* Callee-saved */ + case REG_RCX: return ®s->__x[1]; /* Second arg */ + case REG_RDX: return ®s->__x[2]; /* Third arg */ + case REG_RDI: return ®s->__x[0]; /* First arg (same as RAX in ARM64 ABI) */ + case REG_RSI: return ®s->__x[1]; /* Second arg (same as RCX) */ + case REG_RBP: return ®s->__fp; /* Frame pointer X29 */ + case REG_RSP: return ®s->__sp; /* Stack pointer */ + case REG_R8: return ®s->__x[8]; + case REG_R9: return ®s->__x[9]; + case REG_R10: return ®s->__x[10]; + case REG_R11: return ®s->__x[11]; + case REG_R12: return ®s->__x[12]; + case REG_R13: return ®s->__x[13]; + case REG_R14: return ®s->__x[14]; + case REG_R15: return ®s->__x[15]; + case REG_RIP: return ®s->__pc; /* Program counter */ + case REG_RFLAGS: + /* CPSR is 32-bit, convert to 64-bit for API compatibility */ + cpsr_as_64 = regs->__cpsr; + return &cpsr_as_64; } return NULL; } -__uint64_t *get_debug_reg( x86_debug_state64_t *regs, int r ) { - switch( r ) { - case REG_DR0: return ®s->__dr0; - case REG_DR1: return ®s->__dr1; - case REG_DR2: return ®s->__dr2; - case REG_DR3: return ®s->__dr3; - case REG_DR4: return ®s->__dr4; - case REG_DR5: return ®s->__dr5; - case REG_DR6: return ®s->__dr6; - case REG_DR7: return ®s->__dr7; +/* + * ARM64 debug registers mapping. + * + * ARM64 debug state structure: + * __bvr[16] - Breakpoint Value Registers + * __bcr[16] - Breakpoint Control Registers + * __wvr[16] - Watchpoint Value Registers + * __wcr[16] - Watchpoint Control Registers + * __mdscr_el1 - Monitor Debug System Control Register (bit 0 = SS) + * + * x86 DR6 is debug status, DR7 is debug control. + * On ARM64, we map these to the debug state registers. + */ +__uint64_t *get_debug_reg(arm_debug_state64_t *regs, int r) { + switch(r) { + case REG_DR0: return ®s->__bvr[0]; /* Breakpoint Value Register 0 */ + case REG_DR1: return ®s->__bvr[1]; + case REG_DR2: return ®s->__bvr[2]; + case REG_DR3: return ®s->__bvr[3]; + case REG_DR4: return ®s->__bcr[0]; /* Breakpoint Control Register 0 */ + case REG_DR5: return ®s->__bcr[1]; + case REG_DR6: return ®s->__mdscr_el1; /* Debug status/control */ + case REG_DR7: return ®s->__bcr[3]; /* Debug control */ } return NULL; } -__uint64_t read_register(mach_port_t task, int thread, int reg, bool is64 ) { +__uint64_t read_register(mach_port_t task, int thread, int reg, bool is64) { __uint64_t *rdata; mach_port_t mach_thread = get_thread(task, thread); if(reg >= REG_DR0) { - x86_debug_state64_t *regs = get_debug_state(mach_thread); - rdata = get_debug_reg(regs, reg - 4); + arm_debug_state64_t *regs = get_debug_state(mach_thread); + rdata = get_debug_reg(regs, reg); + if(rdata == NULL) { + if(regs) free(regs); + return 0; + } + __uint64_t val = *rdata; + free(regs); + return val; } else { - x86_thread_state64_t *regs = get_thread_state(mach_thread); + arm_thread_state64_t *regs = get_thread_state(mach_thread); rdata = get_reg(regs, reg); + if(rdata == NULL) { + if(regs) free(regs); + return 0; + } + __uint64_t val = *rdata; + free(regs); + return val; } - - DEBUG_PRINT_VERBOSE("register %s is: 0x%08x\n", get_register_name(reg), *rdata); - - return *rdata; } -static kern_return_t write_register(mach_port_t task, int thread, int reg, void *value, bool is64 ) { +static kern_return_t write_register(mach_port_t task, int thread, int reg, void *value, bool is64) { DEBUG_PRINT_VERBOSE("write register %i (%s) on thread %i", reg, get_register_name(reg), thread); __uint64_t *rdata; mach_port_t mach_thread = get_thread(task, thread); + kern_return_t kret = KERN_SUCCESS; if(reg >= REG_DR0) { - x86_debug_state64_t *regs = get_debug_state(mach_thread); - rdata = get_debug_reg(regs, reg - 4); - DEBUG_PRINT_VERBOSE("register flag for %s was: 0x%08x\n",get_register_name(reg), *rdata); + arm_debug_state64_t *regs = get_debug_state(mach_thread); + rdata = get_debug_reg(regs, reg); + if(rdata == NULL) { + if(regs) free(regs); + return KERN_INVALID_ARGUMENT; + } + DEBUG_PRINT_VERBOSE("register flag for %s was: 0x%08llx\n", get_register_name(reg), *rdata); *rdata = (__uint64_t)value; - set_debug_state(mach_thread, regs); + kret = set_debug_state(mach_thread, regs); + free(regs); } else { - x86_thread_state64_t *regs = get_thread_state(mach_thread); + arm_thread_state64_t *regs = get_thread_state(mach_thread); rdata = get_reg(regs, reg); - DEBUG_PRINT_VERBOSE("register flag for %s was: 0x%08x\n",get_register_name(reg), *rdata); + if(rdata == NULL) { + if(regs) free(regs); + return KERN_INVALID_ARGUMENT; + } + DEBUG_PRINT_VERBOSE("register flag for %s was: 0x%08llx\n", get_register_name(reg), *rdata); *rdata = (__uint64_t)value; - set_thread_state(mach_thread, regs); + if( reg == REG_RFLAGS ) regs->__cpsr = (unsigned int)(*rdata); + kret = set_thread_state(mach_thread, regs); + free(regs); } - DEBUG_PRINT_VERBOSE("register flag for %s now is: 0x%08x\n",get_register_name(reg), *rdata); + DEBUG_PRINT_VERBOSE("register flag for %s now is: 0x%08llx\n", get_register_name(reg), value); - return KERN_SUCCESS; + return kret; } @@ -438,35 +505,67 @@ static kern_return_t write_register(mach_port_t task, int thread, int reg, void static kern_return_t read_memory(mach_port_t task, mach_vm_address_t addr, mach_vm_address_t dest, int size) { mach_vm_size_t nread; - kern_return_t kret = mach_vm_read_overwrite(task, addr, size, dest, &nread); - - EXIT_ON_MACH_ERROR(kret,"Error: probably reading from invalid address!"); + kern_return_t kret = mach_vm_read_overwrite(task, addr, size, dest, &nread); + + if(kret != KERN_SUCCESS) { + DEBUG_PRINT("Error reading memory at %p: %s", (void*)addr, mach_error_string(kret)); + return kret; + } - DEBUG_PRINT_VERBOSE("read %i bytes from %p", nread, addr); - #if MDBG_DEBUG && MDBG_LOG_LEVEL > 1 - log_buffer(dest, size); + DEBUG_PRINT_VERBOSE("read %llu bytes from %p", nread, (void*)addr); +#if MDBG_DEBUG && MDBG_LOG_LEVEL > 1 + log_buffer((unsigned char*)dest, size); printf("\n\n"); - #endif +#endif return kret; } static kern_return_t write_memory(mach_port_t task, mach_vm_address_t addr, mach_vm_address_t src, int size) { - kern_return_t kret = mach_vm_protect(task, addr, size, 0, VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE); - EXIT_ON_MACH_ERROR(kret,"Fatal error: failed to acquire write permission!"); + kern_return_t kret; + + /* + * Cross-process memory patching on ARM64 requires W^X compliance. + * mach_vm_protect cannot set WRITE+EXECUTE together regardless of JIT entitlements. + * Strategy: try direct write first, then toggle RW->write->RX for code pages. + */ + /* First try direct write without changing protection */ kret = mach_vm_write(task, addr, src, size); - EXIT_ON_MACH_ERROR(kret,"Fatal error: failed to write to traced process memory!"); + if(kret == KERN_SUCCESS) { + DEBUG_PRINT_VERBOSE("wrote %i bytes to %p (direct)", size, (void*)addr); + return KERN_SUCCESS; + } + + DEBUG_PRINT("Direct write failed, trying with protection change..."); + /* Remove execute, add write (W^X compliant) */ + kret = mach_vm_protect(task, addr, size, 0, VM_PROT_READ | VM_PROT_WRITE | VM_PROT_COPY); + if(kret != KERN_SUCCESS) { + DEBUG_PRINT("Failed to set write permission at %p: %s", (void*)addr, mach_error_string(kret)); + return kret; + } + + kret = mach_vm_write(task, addr, src, size); + if(kret != KERN_SUCCESS) { + DEBUG_PRINT("Failed to write to process memory at %p: %s", (void*)addr, mach_error_string(kret)); + /* Try to restore original protection */ + mach_vm_protect(task, addr, size, 0, VM_PROT_READ | VM_PROT_EXECUTE); + return kret; + } + + /* Restore execute permission */ kret = mach_vm_protect(task, addr, size, 0, VM_PROT_READ | VM_PROT_EXECUTE); - EXIT_ON_MACH_ERROR(kret,"Fatal error: failed to reset write permission!"); + if(kret != KERN_SUCCESS) { + DEBUG_PRINT("Failed to restore execute permission at %p: %s", (void*)addr, mach_error_string(kret)); + } - DEBUG_PRINT_VERBOSE("wrote %i bytes to %p",size, addr); - #if MDBG_DEBUG && MDBG_LOG_LEVEL - log_buffer(src, size); - printf("\n\n"); - #endif - return kret; + DEBUG_PRINT_VERBOSE("wrote %i bytes to %p", size, (void*)addr); +#if MDBG_DEBUG && MDBG_LOG_LEVEL + log_buffer((unsigned char*)src, size); + printf("\n\n"); +#endif + return KERN_SUCCESS; } @@ -486,79 +585,75 @@ static mach_port_t get_thread(mach_port_t mach_task, uint thread_id) { kern_return_t kret = task_threads(mach_task, &threadList, &threadCount); if (kret != KERN_SUCCESS) { DEBUG_PRINT("get_thread() failed with message %s!\n", mach_error_string(kret)); - exit(0); + return 0; } - for(int i=0;ithread_id; + uint64_t tid = tinfo->thread_id; + free(tinfo); + return tid; } -static x86_thread_state64_t* get_thread_state(thread_t mach_thread) { - - x86_thread_state64_t* state; - mach_msg_type_number_t stateCount = x86_THREAD_STATE64_COUNT; +static arm_thread_state64_t* get_thread_state(thread_t mach_thread) { + arm_thread_state64_t* state; + mach_msg_type_number_t stateCount = ARM_THREAD_STATE64_COUNT; - state = safe_malloc(sizeof(x86_thread_state64_t)); - kern_return_t kret = thread_get_state( mach_thread, x86_THREAD_STATE64, (thread_state_t)state, &stateCount); + state = safe_malloc(sizeof(arm_thread_state64_t)); + kern_return_t kret = thread_get_state(mach_thread, ARM_THREAD_STATE64, (thread_state_t)state, &stateCount); if (kret != KERN_SUCCESS) { - DEBUG_PRINT("Error failed with message %s!\n", mach_error_string(kret)); - exit(0); + DEBUG_PRINT("get_thread_state failed with message %s!\n", mach_error_string(kret)); + free(state); + return NULL; } return state; } -static kern_return_t set_thread_state(thread_t mach_thread, x86_thread_state64_t *break_state) { - - kern_return_t kret = thread_set_state(mach_thread, x86_THREAD_STATE64, (thread_state_t)break_state, x86_THREAD_STATE64_COUNT); +static kern_return_t set_thread_state(thread_t mach_thread, arm_thread_state64_t *break_state) { + kern_return_t kret = thread_set_state(mach_thread, ARM_THREAD_STATE64, (thread_state_t)break_state, ARM_THREAD_STATE64_COUNT); if (kret != KERN_SUCCESS) { - DEBUG_PRINT("Error failed with message %s!\n", mach_error_string(kret)); - exit(0); + DEBUG_PRINT("set_thread_state failed with message %s!\n", mach_error_string(kret)); } return kret; } +static arm_debug_state64_t* get_debug_state(thread_t mach_thread) { + arm_debug_state64_t* state; + mach_msg_type_number_t stateCount = ARM_DEBUG_STATE64_COUNT; -// Debug register state - -static x86_debug_state64_t* get_debug_state(thread_t mach_thread) { - - x86_debug_state64_t* state; - mach_msg_type_number_t stateCount = x86_DEBUG_STATE64_COUNT; - - state = safe_malloc(sizeof(x86_debug_state64_t)); - kern_return_t kret = thread_get_state( mach_thread, x86_DEBUG_STATE64, (thread_state_t)state, &stateCount); + state = safe_malloc(sizeof(arm_debug_state64_t)); + kern_return_t kret = thread_get_state(mach_thread, ARM_DEBUG_STATE64, (thread_state_t)state, &stateCount); if (kret != KERN_SUCCESS) { - DEBUG_PRINT("Error failed with message %s!\n", mach_error_string(kret)); - exit(0); + DEBUG_PRINT("get_debug_state failed with message %s!\n", mach_error_string(kret)); + free(state); + return NULL; } return state; } -static kern_return_t set_debug_state(thread_t mach_thread, x86_debug_state64_t *break_state) { - - kern_return_t kret = thread_set_state(mach_thread, x86_DEBUG_STATE64, (thread_state_t)break_state, x86_DEBUG_STATE64_COUNT); +static kern_return_t set_debug_state(thread_t mach_thread, arm_debug_state64_t *break_state) { + kern_return_t kret = thread_set_state(mach_thread, ARM_DEBUG_STATE64, (thread_state_t)break_state, ARM_DEBUG_STATE64_COUNT); if (kret != KERN_SUCCESS) { - DEBUG_PRINT("Error failed with message %s!\n", mach_error_string(kret)); - exit(0); + DEBUG_PRINT("set_debug_state failed with message %s!\n", mach_error_string(kret)); } return kret; } @@ -567,7 +662,7 @@ static kern_return_t set_debug_state(thread_t mach_thread, x86_debug_state64_t * #pragma mark Exception ports static kern_return_t save_exception_ports(task_t task, exception_ports_info *info) { - info->count = (sizeof (info->ports) / sizeof (info->ports[0])); + info->count = (sizeof(info->ports) / sizeof(info->ports[0])); return task_get_exception_ports(task, EXC_MASK_ALL, info->masks, &info->count, info->ports, info->behaviors, info->flavors); } @@ -595,7 +690,7 @@ static mach_port_t get_task(pid_t pid) { mach_port_t task; kern_return_t kret = task_for_pid(mach_task_self(), pid, &task); - EXIT_ON_MACH_ERROR(kret,"Fatal error: failed to get task for pid %i",pid); + EXIT_ON_MACH_ERROR(kret,"Fatal error: failed to get task for pid %i", pid); return task; } @@ -603,7 +698,7 @@ static mach_port_t get_task(pid_t pid) { static kern_return_t attach_to_task(mach_port_t task, pid_t pid) { if(find_session(task) != NULL) { - DEBUG_PRINT("Warning already attached to task (%i). Not attaching again!",task); + DEBUG_PRINT("Warning already attached to task (%i). Not attaching again!", task); return KERN_SUCCESS; } debug_session *sess = create_debug_session(task, pid); @@ -620,12 +715,12 @@ static kern_return_t attach_to_task(mach_port_t task, pid_t pid) { // store current exception ports save_exception_ports(task, (exception_ports_info*)sess->old_exception_ports); - kret = task_set_exception_ports(task, EXC_MASK_ALL, sess->exception_port, EXCEPTION_STATE_IDENTITY|MACH_EXCEPTION_CODES, x86_THREAD_STATE64); + kret = task_set_exception_ports(task, EXC_MASK_ALL, sess->exception_port, EXCEPTION_STATE_IDENTITY|MACH_EXCEPTION_CODES, ARM_THREAD_STATE64); RETURN_ON_MACH_ERROR(kret,"task_set_exception_ports failed"); // launch mach exception port thread // - err = pthread_create(&sess->exception_handler_thread, NULL, (void *(*)(void*))task_exception_server, (void *(*)(void*))(unsigned long long)sess->exception_port); - EXIT_ON_MACH_ERROR(err,"can't create *task_exception_server* thread :[%s]",strerror(err)); + err = pthread_create(&sess->exception_handler_thread, NULL, task_exception_server_thread, (void*)(uintptr_t)sess->exception_port); + EXIT_ON_MACH_ERROR(err,"can't create *task_exception_server* thread :[%s]", strerror(err)); DEBUG_PRINT("successfully created mach exception port thread %d\n", 0); @@ -639,7 +734,7 @@ static kern_return_t attach_to_pid(pid_t pid) { } static kern_return_t detach_from_pid(pid_t pid) { - debug_session *sess = find_session_by_pid( pid ); + debug_session *sess = find_session_by_pid(pid); if(sess != NULL) { DEBUG_PRINT("cleaning up debug session..."); @@ -661,12 +756,12 @@ static kern_return_t detach_from_pid(pid_t pid) { extern kern_return_t catch_mach_exception_raise /* stub – will not be called */ ( - mach_port_t exception_port, - mach_port_t thread, - mach_port_t task, - exception_type_t exception, - mach_exception_data_t code, - mach_msg_type_number_t codeCnt + mach_port_t exception_port, + mach_port_t thread, + mach_port_t task, + exception_type_t exception, + mach_exception_data_t code, + mach_msg_type_number_t codeCnt ) { DEBUG_PRINT("this handler should not be called"); return MACH_RCV_INVALID_TYPE; @@ -674,48 +769,57 @@ extern kern_return_t catch_mach_exception_raise /* stub – will not be called * extern kern_return_t catch_mach_exception_raise_state /* stub – will not be called */ ( - mach_port_t exception_port, - exception_type_t exception, - const mach_exception_data_t code, - mach_msg_type_number_t codeCnt, - int *flavor, - const thread_state_t old_state, - mach_msg_type_number_t old_stateCnt, - thread_state_t new_state, - mach_msg_type_number_t *new_stateCnt + mach_port_t exception_port, + exception_type_t exception, + const mach_exception_data_t code, + mach_msg_type_number_t codeCnt, + int *flavor, + const thread_state_t old_state, + mach_msg_type_number_t old_stateCnt, + thread_state_t new_state, + mach_msg_type_number_t *new_stateCnt ) { DEBUG_PRINT("this handler should not be called"); return MACH_RCV_INVALID_TYPE; } extern kern_return_t catch_mach_exception_raise_state_identity( - mach_port_t exception_port, - mach_port_t thread, - mach_port_t task, - exception_type_t exception, - exception_data_t code, - mach_msg_type_number_t codeCnt, - int * flavor, - thread_state_t old_state, - mach_msg_type_number_t old_stateCnt, - thread_state_t new_state, - mach_msg_type_number_t *new_stateCnt + mach_port_t exception_port, + mach_port_t thread, + mach_port_t task, + exception_type_t exception, + exception_data_t code, + mach_msg_type_number_t codeCnt, + int * flavor, + thread_state_t old_state, + mach_msg_type_number_t old_stateCnt, + thread_state_t new_state, + mach_msg_type_number_t *new_stateCnt ) { + DEBUG_PRINT(">>> ENTER catch_mach_exception_raise_state_identity"); + DEBUG_PRINT("exception=%d, codeCnt=%d, flavor=%d, old_stateCnt=%d", + exception, codeCnt, flavor ? *flavor : -1, old_stateCnt); + + arm_thread_state64_t *state = (arm_thread_state64_t *)old_state; + arm_thread_state64_t *newState = (arm_thread_state64_t *)new_state; - x86_thread_state64_t *state = (x86_thread_state64_t *) old_state; - x86_thread_state64_t *newState = (x86_thread_state64_t *) new_state; + DEBUG_PRINT("state=%p, newState=%p", (void*)state, (void*)newState); debug_session *sess = find_session(task); - sess->current_thread = get_thread_id(thread); /* set system-wide thread id */ + if(sess == NULL) { + DEBUG_PRINT("No session found for task!"); + return KERN_FAILURE; + } - DEBUG_PRINT("exception occured on thread (%i): %s",sess->current_thread, exception_to_string(exception)); - DEBUG_PRINT("stack address: 0x%02lx", state->__rip); + sess->current_thread = get_thread_id(thread); + DEBUG_PRINT("exception occurred on thread (%llu): %s", sess->current_thread, exception_to_string(exception)); + DEBUG_PRINT("PC address: 0x%016llx", state->__pc); if (exception == EXC_SOFTWARE && code[0] == EXC_SOFT_SIGNAL) { // handling UNIX soft signal int subcode = code[2]; - DEBUG_PRINT("EXC_SOFTWARE signal: %s",get_signal_name(code[2])); + DEBUG_PRINT("EXC_SOFTWARE signal: %s", get_signal_name(code[2])); if (subcode == SIGSTOP || subcode == SIGTRAP) { // clear signal to prevent default OS handling // @@ -737,36 +841,45 @@ extern kern_return_t catch_mach_exception_raise_state_identity( }*/ } else if(exception == EXC_BREAKPOINT) { + DEBUG_PRINT("*** EXC_BREAKPOINT caught! PC=0x%016llx, code[0]=%d ***", state->__pc, codeCnt > 0 ? code[0] : -1); task_suspend(sess->task); - // check if single step mode - if(state->__rflags & SINGLESTEP_TRAP) { - state->__rflags &= ~SINGLESTEP_TRAP; // clear single-step + /* ARM64 EXC_BREAKPOINT codes: + * EXC_ARM_BREAKPOINT (1) - BRK instruction + * EXC_ARM_SINGLE_STEP (2) - Software single-step + * EXC_ARM_HW_BREAKPOINT (3) - Hardware breakpoint + */ + if(codeCnt > 0 && code[0] == EXC_ARM_SINGLE_STEP) { sess->process_status = STATUS_SINGLESTEP; DEBUG_PRINT("SINGLE STEP"); } else { sess->process_status = STATUS_BREAKPOINT; + DEBUG_PRINT("BREAKPOINT HIT (code=%d)", codeCnt > 0 ? code[0] : -1); } // move past breakpoint by setting old to new thread state *newState = *state; *new_stateCnt = old_stateCnt; - *flavor = x86_THREAD_STATE64; + *flavor = ARM_THREAD_STATE64; semaphore_signal(sess->wait_sem); return KERN_SUCCESS; } else if(exception == EXC_BAD_INSTRUCTION) { - task_suspend(sess->task); - sess->process_status = STATUS_BREAKPOINT; + task_suspend(sess->task); + sess->process_status = STATUS_BREAKPOINT; - return KERN_SUCCESS; + semaphore_signal(sess->wait_sem); + + return KERN_SUCCESS; } else if(exception == EXC_BAD_ACCESS) { task_suspend(sess->task); sess->process_status = STATUS_ERROR; + semaphore_signal(sess->wait_sem); + return KERN_SUCCESS; } else { @@ -776,7 +889,8 @@ extern kern_return_t catch_mach_exception_raise_state_identity( return KERN_FAILURE; } -static void* task_exception_server (mach_port_t exception_port) { +static void* task_exception_server_thread(void* arg) { + mach_port_t exception_port = (mach_port_t)(uintptr_t)arg; mach_msg_return_t rt; mach_msg_header_t *msg; mach_msg_header_t *reply; @@ -788,13 +902,13 @@ static void* task_exception_server (mach_port_t exception_port) { int i = 0; while (1) { - DEBUG_PRINT("waiting for next exception (%i)...",i); + DEBUG_PRINT("waiting for next exception (%i)...", i); i++; rt = mach_msg(msg, MACH_RCV_MSG, 0, sizeof(union __RequestUnion__mach_exc_subsystem), exception_port, 0, MACH_PORT_NULL); - if (rt!= MACH_MSG_SUCCESS) { - DEBUG_PRINT("MACH_RCV_MSG stopped, exit from task_exception_server thread :%d\n", 1); + if (rt != MACH_MSG_SUCCESS) { + DEBUG_PRINT("MACH_RCV_MSG stopped, exit from task_exception_server thread: %d\n", rt); return "MACH_RCV_MSG_FAILURE"; } /* @@ -810,7 +924,7 @@ static void* task_exception_server (mach_port_t exception_port) { // Send the now-initialized reply rt = mach_msg(reply, MACH_SEND_MSG, reply->msgh_size, 0, MACH_PORT_NULL, 0, MACH_PORT_NULL); - if (rt!= MACH_MSG_SUCCESS) { + if (rt != MACH_MSG_SUCCESS) { return "MACH_SEND_MSG_FAILURE"; } } @@ -819,7 +933,7 @@ static void* task_exception_server (mach_port_t exception_port) { static void wait_for_exception(debug_session *sess, int timeout /*in millis*/) { DEBUG_PRINT("waiting for next exception..."); - kern_return_t kret = semaphore_timedwait(sess->wait_sem, (struct mach_timespec){0,timeout * 1000000}); + kern_return_t kret = semaphore_timedwait(sess->wait_sem, (struct mach_timespec){0, timeout * 1000000}); if(kret == KERN_OPERATION_TIMED_OUT) { sess->process_status = STATUS_TIMEOUT; DEBUG_PRINT("wait timed out!"); @@ -831,20 +945,20 @@ static void wait_for_exception(debug_session *sess, int timeout /*in millis*/) { #pragma mark Debug API -status_t MDBG_API(session_attach)( pid_t pid ) { +status_t MDBG_API(session_attach)(pid_t pid) { return attach_to_pid(pid) == KERN_SUCCESS; } -status_t MDBG_API(session_detach)( pid_t pid ) { +status_t MDBG_API(session_detach)(pid_t pid) { return detach_from_pid(pid) == KERN_SUCCESS; } -status_t MDBG_API(session_pause)( pid_t pid ) { +status_t MDBG_API(session_pause)(pid_t pid) { return kill(pid, SIGTRAP) == 0; } -int MDBG_API(session_wait)( pid_t pid, int *thread, int timeout ) { - debug_session *sess = find_session_by_pid( pid ); +int MDBG_API(session_wait)(pid_t pid, int *thread, int timeout) { + debug_session *sess = find_session_by_pid(pid); if(sess != NULL) { wait_for_exception(sess, timeout); *thread = sess->current_thread; @@ -854,8 +968,8 @@ int MDBG_API(session_wait)( pid_t pid, int *thread, int timeout ) { return 4; } -status_t MDBG_API(session_resume)( pid_t pid ) { - debug_session *sess = find_session_by_pid( pid ); +status_t MDBG_API(session_resume)(pid_t pid) { + debug_session *sess = find_session_by_pid(pid); if(sess != NULL) { sess->process_status = STATUS_HANDLED; task_resume(sess->task); @@ -865,24 +979,24 @@ status_t MDBG_API(session_resume)( pid_t pid ) { return false; } -debug_session *MDBG_API(session_get)( pid_t pid ) { - return find_session_by_pid( pid ); +debug_session *MDBG_API(session_get)(pid_t pid) { + return find_session_by_pid(pid); } -status_t MDBG_API(read_memory)( pid_t pid, unsigned char* addr, unsigned char* dest, int size ) { - return read_memory( get_task(pid), (mach_vm_address_t)addr, (mach_vm_address_t)dest, size ) == KERN_SUCCESS; +status_t MDBG_API(read_memory)(pid_t pid, unsigned char* addr, unsigned char* dest, int size) { + return read_memory(get_task(pid), (mach_vm_address_t)addr, (mach_vm_address_t)dest, size) == KERN_SUCCESS; } -status_t MDBG_API(write_memory)( pid_t pid, unsigned char* addr, unsigned char* src, int size ) { - return write_memory( get_task(pid), (mach_vm_address_t)addr, (mach_vm_address_t)src, size ) == KERN_SUCCESS; +status_t MDBG_API(write_memory)(pid_t pid, unsigned char* addr, unsigned char* src, int size) { + return write_memory(get_task(pid), (mach_vm_address_t)addr, (mach_vm_address_t)src, size) == KERN_SUCCESS; } -void* MDBG_API(read_register)( pid_t pid, int thread, int reg, bool is64 ) { - return (void*)read_register( get_task(pid), thread, reg, is64 ); +void* MDBG_API(read_register)(pid_t pid, int thread, int reg, bool is64) { + return (void*)read_register(get_task(pid), thread, reg, is64); } -status_t MDBG_API(write_register)( pid_t pid, int thread, int reg, void *value, bool is64 ) { - return write_register( get_task(pid), thread, reg, value, is64 ) == KERN_SUCCESS; +status_t MDBG_API(write_register)(pid_t pid, int thread, int reg, void *value, bool is64) { + return write_register(get_task(pid), thread, reg, value, is64) == KERN_SUCCESS; } #endif \ No newline at end of file diff --git a/libs/sdl/CMakeLists.txt b/libs/sdl/CMakeLists.txt index c49ef7552..9b4578d2e 100644 --- a/libs/sdl/CMakeLists.txt +++ b/libs/sdl/CMakeLists.txt @@ -56,7 +56,22 @@ if(ANDROID) target_link_libraries(sdl.hdll GLESv3) endif() -if((APPLE OR UNIX) AND NOT ANDROID) +# ARM Linux with OpenGL ES 3.1 (e.g., Asahi, Raspberry Pi) +option(USE_GLES31 "Use OpenGL ES 3.1 instead of Desktop OpenGL" OFF) + +if(USE_GLES31 AND UNIX AND NOT ANDROID AND NOT APPLE) + find_package(PkgConfig REQUIRED) + pkg_check_modules(GLES REQUIRED glesv2) + target_include_directories(sdl.hdll + PRIVATE + ${GLES_INCLUDE_DIRS} + ) + target_link_libraries(sdl.hdll + libhl + ${GLES_LIBRARIES} + ) + target_compile_definitions(sdl.hdll PRIVATE HL_GLES31=1) +elseif((APPLE OR UNIX) AND NOT ANDROID) find_package(OpenGL REQUIRED) target_include_directories(sdl.hdll PRIVATE diff --git a/libs/sdl/GLImports.h b/libs/sdl/GLImports.h index 82ed26f2d..e6d5bed5c 100644 --- a/libs/sdl/GLImports.h +++ b/libs/sdl/GLImports.h @@ -118,7 +118,9 @@ GL_IMPORT(glGetProgramResourceIndex, GETPROGRAMRESOURCEINDEX); GL_IMPORT(glShaderStorageBlockBinding, SHADERSTORAGEBLOCKBINDING); GL_IMPORT(glMultiDrawElementsIndirect, MULTIDRAWELEMENTSINDIRECT); +#if !defined(HL_GLES31) GL_IMPORT(glColorMaski, COLORMASKI); +#endif GL_IMPORT(glTexStorage2D, TEXSTORAGE2D); GL_IMPORT(glTexStorage3D, TEXSTORAGE3D); @@ -126,7 +128,7 @@ GL_IMPORT(glTexStorage3D, TEXSTORAGE3D); GL_IMPORT(glDebugMessageCallback, DEBUGMESSAGECALLBACK); GL_IMPORT(glDebugMessageControl, DEBUGMESSAGECONTROL); -#if !defined(HL_MESA) +#if !defined(HL_MESA) && !defined(HL_GLES31) GL_IMPORT(glGetQueryObjectui64v, GETQUERYOBJECTUI64V); GL_IMPORT(glQueryCounter, QUERYCOUNTER); #endif diff --git a/libs/sdl/gl.c b/libs/sdl/gl.c index 086cd24c1..30a32a2fb 100644 --- a/libs/sdl/gl.c +++ b/libs/sdl/gl.c @@ -28,6 +28,12 @@ # include # include # define HL_GLES +#elif defined(HL_GLES31) +// ARM Linux with OpenGL ES 3.1 (e.g., Asahi, Raspberry Pi) +# include +# include +# include +# define HL_GLES #else # include # include @@ -35,20 +41,42 @@ #ifdef HL_GLES # define GL_IMPORT(fun, t) -# define ES_NOT_SUPPORTED hl_error("Not supported by GLES3") +# define ES_NOT_SUPPORTED hl_error("Not supported by GLES") +// Tier 1: Not available in any GLES version # define glBindFragDataLocation(...) ES_NOT_SUPPORTED -# define glBindImageTexture(...) ES_NOT_SUPPORTED -# define glTexImage2DMultisample(...) ES_NOT_SUPPORTED -# define glFramebufferTexture(...) ES_NOT_SUPPORTED -# define glDispatchCompute(...) ES_NOT_SUPPORTED -# define glMemoryBarrier(...) ES_NOT_SUPPORTED # define glGetBufferSubData(...) ES_NOT_SUPPORTED -# define glShaderStorageBlockBinding(...) ES_NOT_SUPPORTED # define glPolygonMode(face,mode) if( mode != 0x1B02 ) ES_NOT_SUPPORTED # define glGetQueryObjectiv glGetQueryObjectuiv # define glClearDepth glClearDepthf #endif +// Tier 2: Available in GLES 3.1+ but not in GLES 3.0 +#if defined(HL_GLES) && !defined(HL_GLES31) +# define glDispatchCompute(...) ES_NOT_SUPPORTED +# define glMemoryBarrier(...) ES_NOT_SUPPORTED +# define glBindImageTexture(...) ES_NOT_SUPPORTED +# define glGetProgramResourceIndex(...) ES_NOT_SUPPORTED +#endif + +// Not in any GLES version (use layout qualifiers in shaders instead) +#if defined(HL_GLES) +# define glTexImage2DMultisample(...) ES_NOT_SUPPORTED +# define glShaderStorageBlockBinding(...) ES_NOT_SUPPORTED +#endif + +// glFramebufferTexture is GLES 3.2 only - map to layer variant for 3.1 +#if defined(HL_GLES31) +# define glFramebufferTexture(target, attachment, texture, level) \ + glFramebufferTextureLayer(target, attachment, texture, level, 0) +#elif defined(HL_GLES) +# define glFramebufferTexture(...) ES_NOT_SUPPORTED +#endif + +// glColorMaski is GLES 3.2 only +#if defined(HL_GLES31) +# define glColorMaski(...) ES_NOT_SUPPORTED +#endif + #if !defined(HL_CONSOLE) && !defined(GL_IMPORT) #define GL_IMPORT(fun, t) PFNGL##t##PROC fun #include "GLImports.h" @@ -679,14 +707,14 @@ HL_PRIM bool HL_NAME(gl_query_result_available)( vdynamic *q ) { HL_PRIM double HL_NAME(gl_query_result)( vdynamic *q ) { GLuint64 v = -1; -# if !defined(HL_MESA) && !defined(HL_MOBILE) +# if !defined(HL_MESA) && !defined(HL_MOBILE) && !defined(HL_GLES31) glGetQueryObjectui64v(q->v.i, GL_QUERY_RESULT, &v); # endif return (double)v; } HL_PRIM void HL_NAME(gl_query_counter)( vdynamic *q, int target ) { -# if !defined(HL_MESA) && !defined(HL_MOBILE) +# if !defined(HL_MESA) && !defined(HL_MOBILE) && !defined(HL_GLES31) glQueryCounter(q->v.i, target); # endif } diff --git a/libs/sdl/sdl.c b/libs/sdl/sdl.c index 98730a50a..206ef8975 100644 --- a/libs/sdl/sdl.c +++ b/libs/sdl/sdl.c @@ -114,10 +114,14 @@ HL_PRIM bool HL_NAME(init_once)() { # endif // default GL parameters if (!isGlOptionsSet) { -#ifdef HL_MOBILE +#if defined(HL_MOBILE) || defined(HL_GLES31) SDL_GL_SetAttribute(SDL_GL_CONTEXT_PROFILE_MASK, SDL_GL_CONTEXT_PROFILE_ES); SDL_GL_SetAttribute(SDL_GL_CONTEXT_MAJOR_VERSION, 3); +# ifdef HL_GLES31 + SDL_GL_SetAttribute(SDL_GL_CONTEXT_MINOR_VERSION, 1); +# else SDL_GL_SetAttribute(SDL_GL_CONTEXT_MINOR_VERSION, 0); +# endif #else SDL_GL_SetAttribute(SDL_GL_CONTEXT_PROFILE_MASK, SDL_GL_CONTEXT_PROFILE_CORE); SDL_GL_SetAttribute(SDL_GL_CONTEXT_MAJOR_VERSION, 3); @@ -145,7 +149,7 @@ HL_PRIM void HL_NAME(gl_options)( int major, int minor, int depth, int stencil, else if( flags&8 ) SDL_GL_SetAttribute(SDL_GL_CONTEXT_PROFILE_MASK, SDL_GL_CONTEXT_PROFILE_ES); else { -#ifdef HL_MOBILE +#if defined(HL_MOBILE) || defined(HL_GLES31) SDL_GL_SetAttribute(SDL_GL_CONTEXT_PROFILE_MASK, SDL_GL_CONTEXT_PROFILE_ES); #else SDL_GL_SetAttribute(SDL_GL_CONTEXT_PROFILE_MASK, SDL_GL_CONTEXT_PROFILE_CORE); diff --git a/libs/sdl/sdl/GL.hx b/libs/sdl/sdl/GL.hx index a31e44a96..1ac3a2db7 100644 --- a/libs/sdl/sdl/GL.hx +++ b/libs/sdl/sdl/GL.hx @@ -701,6 +701,7 @@ class GL { public static inline var LUMINANCE_ALPHA = 0x190A; public static inline var BGRA = 0x80E1; + public static inline var RGB8 = 0x8051; public static inline var RGBA8 = 0x8058; public static inline var RGB10_A2 = 0x8059; diff --git a/libs/sdl/sdl/Window.hx b/libs/sdl/sdl/Window.hx index aac77be21..06194c739 100644 --- a/libs/sdl/sdl/Window.hx +++ b/libs/sdl/sdl/Window.hx @@ -96,8 +96,10 @@ class Window { var shaderVersion = 120; if (isOpenGLES) { - if( reg.match(v) ) - shaderVersion = Std.int(Math.min( 100, Math.round( Std.parseFloat(reg.matched(0)) * 100 ) )); + shaderVersion = 100; // GLES 2.0 default + if( reg.match(v) ) { + shaderVersion = Math.round( Std.parseFloat(reg.matched(0)) * 100 ); + } } else { shaderVersion = 130; @@ -109,12 +111,17 @@ class Window { } var vertex = GL.createShader(GL.VERTEX_SHADER); - GL.shaderSource(vertex, ["#version " + shaderVersion, "void main() { gl_Position = vec4(1.0); }"].join("\n")); + if (isOpenGLES) + GL.shaderSource(vertex, ["#version " + shaderVersion + " es", "void main() { gl_Position = vec4(1.0); }"].join("\n")); + else + GL.shaderSource(vertex, ["#version " + shaderVersion, "void main() { gl_Position = vec4(1.0); }"].join("\n")); GL.compileShader(vertex); if( GL.getShaderParameter(vertex, GL.COMPILE_STATUS) != 1 ) throw "Failed to compile VS ("+GL.getShaderInfoLog(vertex)+")"; var fragment = GL.createShader(GL.FRAGMENT_SHADER); - if (isOpenGLES) + if (isOpenGLES && shaderVersion >= 300) + GL.shaderSource(fragment, ["#version " + shaderVersion + " es", "precision mediump float;", "out vec4 color; void main() { color = vec4(1.0); }"].join("\n")); + else if (isOpenGLES) GL.shaderSource(fragment, ["#version " + shaderVersion, "lowp vec4 color; void main() { color = vec4(1.0); }"].join("\n")); else GL.shaderSource(fragment, ["#version " + shaderVersion, "out vec4 color; void main() { color = vec4(1.0); }"].join("\n")); diff --git a/other/osx/entitlements.xml b/other/osx/entitlements.xml index c834321b2..292453bf3 100644 --- a/other/osx/entitlements.xml +++ b/other/osx/entitlements.xml @@ -2,7 +2,17 @@ + + com.apple.security.cs.allow-jit + + com.apple.security.get-task-allow + + com.apple.security.cs.allow-unsigned-executable-memory + + + com.apple.security.cs.disable-library-validation + \ No newline at end of file diff --git a/other/tests/Arm64JitTest.hx b/other/tests/Arm64JitTest.hx new file mode 100644 index 000000000..2c12e6473 --- /dev/null +++ b/other/tests/Arm64JitTest.hx @@ -0,0 +1,131 @@ +class Arm64JitTest { + + static function testFloatRegPressure() { + trace("Testing Float (Double) Register Pressure..."); + var v0 = 1.1; + var v1 = 2.2; + var v2 = 3.3; + var v3 = 4.4; + var v4 = 5.5; + var v5 = 6.6; + var v6 = 7.7; + var v7 = 8.8; + var v8 = 9.9; + + var val = Math.random() > 0.5 ? 123.456 : 123.456; + + // Use Float (Double) array + var arr = new hl.NativeArray(10); + arr[0] = 1.0; + + arr[1] = val; + + var sum = v0 + v1 + v2 + v3 + v4 + v5 + v6 + v7 + v8; + var expected = 1.1 + 2.2 + 3.3 + 4.4 + 5.5 + 6.6 + 7.7 + 8.8 + 9.9; + + if (Math.abs(sum - expected) > 0.0001) { + throw "Float register corruption detected! Sum: " + sum + ", Expected: " + expected; + } + + if (arr[1] != 123.456) { + throw "Double Array write failed! Got " + arr[1]; + } + trace("Float (Double) Register Pressure Test Passed"); + } + + static function testSingleRegPressure() { + trace("Testing Single (F32) Register Pressure..."); + // Use Singles + var v0 : Single = 1.5; + var v1 : Single = 2.5; + var v2 : Single = 3.5; + var v3 : Single = 4.5; + var v4 : Single = 5.5; + var v5 : Single = 6.5; + var v6 : Single = 7.5; + var v7 : Single = 8.5; + var v8 : Single = 9.5; + + var val : Single = 123.5; // Exact in float32 + + var arr = new hl.NativeArray(10); + arr[1] = val; + + var sum = v0 + v1 + v2 + v3 + v4 + v5 + v6 + v7 + v8; + var expected = 1.5 + 2.5 + 3.5 + 4.5 + 5.5 + 6.5 + 7.5 + 8.5 + 9.5; + + if (Math.abs(sum - expected) > 0.001) { + throw "Single register corruption detected!"; + } + + if (arr[1] != 123.5) { + throw "Single Array write failed! Got " + arr[1]; + } + trace("Single (F32) Register Pressure Test Passed"); + } + + static function testIntRegPressure() { + trace("Testing Int Register Pressure..."); + var i0 = 10; + var i1 = 11; + var i2 = 12; + var i3 = 13; + var i4 = 14; + var i5 = 15; + var i6 = 16; + var i7 = 17; + var i8 = 18; + var i9 = 19; + + // Use a value calculated at runtime to avoid immediate encoding optimizations if possible + var val = Std.int(Math.random() * 0) + 999; + + var arr = new hl.NativeArray(10); + arr[0] = val; + + var sum = i0 + i1 + i2 + i3 + i4 + i5 + i6 + i7 + i8 + i9; + var expected = 10+11+12+13+14+15+16+17+18+19; + + if (sum != expected) { + throw "Int register corruption detected! Sum: " + sum + ", Expected: " + expected; + } + + if (arr[0] != 999) { + throw "Int Array write failed"; + } + trace("Int Register Pressure Test Passed"); + } + + static function testMemOps() { + trace("Testing Memory Ops (structs)..."); + // Test struct field access which uses op_get_mem / op_set_mem + var c = new TestClass(); + c.a = 1; + c.b = 2.5; + c.c = 3; + + var val = c.a + Std.int(c.b) + c.c; + if (val != 6) throw "Memory Op test failed"; + trace("Memory Ops Test Passed"); + } + + static function main() { + try { + testFloatRegPressure(); + testSingleRegPressure(); + testIntRegPressure(); + testMemOps(); + trace("All tests passed!"); + } catch(e:Dynamic) { + trace("TEST FAILED: " + e); + Sys.exit(1); + } + } +} + +class TestClass { + public var a : Int; + public var b : Float; + public var c : Int; + public function new() {} +} diff --git a/other/tests/Arm64TrapTypeTest.hx b/other/tests/Arm64TrapTypeTest.hx new file mode 100644 index 000000000..9bc688b20 --- /dev/null +++ b/other/tests/Arm64TrapTypeTest.hx @@ -0,0 +1,30 @@ +class Arm64TrapTypeTest { + static function test() { + trace("Testing OTrap type checking..."); + var caught = false; + try { + throw new MyError("test"); + } catch(e:MyError) { + caught = true; + trace("Caught specific error!"); + } catch(e:Dynamic) { + trace("Caught dynamic error (wrong!)"); + } + + if (!caught) { + trace("FAILED: Did not catch MyError in specific catch block"); + Sys.exit(1); + } else { + trace("PASSED: Caught MyError correctly"); + } + } + + static function main() { + test(); + } +} + +class MyError { + var msg:String; + public function new(m:String) { msg = m; } +} diff --git a/other/tests/TestGlobalTypeCheck.hx b/other/tests/TestGlobalTypeCheck.hx new file mode 100644 index 000000000..625eb45f0 --- /dev/null +++ b/other/tests/TestGlobalTypeCheck.hx @@ -0,0 +1,41 @@ +class CustomError { + public var msg:String; + public function new(m:String) { + msg = m; + } +} + +class TestGlobalTypeCheck { + static function main() { + trace("Starting global type check test..."); + + // Test 1: Try-Catch with specific type + // This exercises the OCatch path in the JIT where it loads the global type + var caught = false; + try { + throw new CustomError("test error"); + } catch( e : CustomError ) { + trace("Caught CustomError successfully: " + e.msg); + caught = true; + } catch( e : Dynamic ) { + trace("Failed to match CustomError, caught as Dynamic: " + e); + } + + if (!caught) { + trace("Test 1 FAILED: Did not catch CustomError"); + Sys.exit(1); + } + + // Test 2: Std.is + // This exercises the OGetGlobal + OCall2 (likely Std.is implementation details) path + var c = new CustomError("check"); + if( Std.isOfType(c, CustomError) ) { + trace("Std.isOfType(c, CustomError) is true"); + } else { + trace("Std.isOfType(c, CustomError) is false (FAILED)"); + Sys.exit(1); + } + + trace("All tests passed"); + } +} diff --git a/other/tests/minimal/Empty.hx b/other/tests/minimal/Empty.hx new file mode 100644 index 000000000..0df354d52 --- /dev/null +++ b/other/tests/minimal/Empty.hx @@ -0,0 +1,4 @@ +// Test 1: Absolutely minimal - empty main +class Empty { + static function main() {} +} diff --git a/other/tests/minimal/FieldAccess.hx b/other/tests/minimal/FieldAccess.hx new file mode 100644 index 000000000..f8f902503 --- /dev/null +++ b/other/tests/minimal/FieldAccess.hx @@ -0,0 +1,16 @@ +// Test 4: Object field access +class Point { + public var x:Int; + public var y:Int; + public function new(x:Int, y:Int) { + this.x = x; + this.y = y; + } +} + +class FieldAccess { + static function main() { + var p = new Point(10, 20); + var sum = p.x + p.y; + } +} diff --git a/other/tests/minimal/FuncCall.hx b/other/tests/minimal/FuncCall.hx new file mode 100644 index 000000000..c6b4285c9 --- /dev/null +++ b/other/tests/minimal/FuncCall.hx @@ -0,0 +1,9 @@ +// Test 3: Function call +class FuncCall { + static function add(x:Int, y:Int):Int { + return x + y; + } + static function main() { + var result = add(3, 4); + } +} diff --git a/other/tests/minimal/IntAdd.hx b/other/tests/minimal/IntAdd.hx new file mode 100644 index 000000000..3d9d6e8ae --- /dev/null +++ b/other/tests/minimal/IntAdd.hx @@ -0,0 +1,8 @@ +// Test 2: Integer addition +class IntAdd { + static function main() { + var a = 1; + var b = 2; + var c = a + b; + } +} diff --git a/other/tests/minimal/Makefile b/other/tests/minimal/Makefile new file mode 100644 index 000000000..5f17a396e --- /dev/null +++ b/other/tests/minimal/Makefile @@ -0,0 +1,2 @@ +# NOTE: When adding a new test, add it to CMakeLists.txt in the project root +# (search for "add_minimal_jit_test") diff --git a/other/tests/minimal/hldump.c b/other/tests/minimal/hldump.c new file mode 100644 index 000000000..ed8669ffd --- /dev/null +++ b/other/tests/minimal/hldump.c @@ -0,0 +1,287 @@ +/* + * Simple HashLink bytecode dumper + * Dumps functions and opcodes from a .hl file + */ +#include +#include +#include +#include +#include + +/* Opcode names from opcodes.h */ +static const char *opcode_names[] = { + "OMov", "OInt", "OFloat", "OBool", "OBytes", "OString", "ONull", + "OAdd", "OSub", "OMul", "OSDiv", "OUDiv", "OSMod", "OUMod", + "OShl", "OSShr", "OUShr", "OAnd", "OOr", "OXor", + "ONeg", "ONot", "OIncr", "ODecr", + "OCall0", "OCall1", "OCall2", "OCall3", "OCall4", "OCallN", "OCallMethod", "OCallThis", "OCallClosure", + "OStaticClosure", "OInstanceClosure", "OVirtualClosure", + "OGetGlobal", "OSetGlobal", + "OField", "OSetField", "OGetThis", "OSetThis", + "ODynGet", "ODynSet", + "OJTrue", "OJFalse", "OJNull", "OJNotNull", "OJSLt", "OJSGte", "OJSGt", "OJSLte", "OJULt", "OJUGte", "OJNotLt", "OJNotGte", "OJEq", "OJNotEq", "OJAlways", + "OToDyn", "OToSFloat", "OToUFloat", "OToInt", "OSafeCast", "OUnsafeCast", "OToVirtual", + "OLabel", "ORet", "OThrow", "ORethrow", "OSwitch", "ONullCheck", "OTrap", "OEndTrap", + "OGetI8", "OGetI16", "OGetMem", "OGetArray", "OSetI8", "OSetI16", "OSetMem", "OSetArray", + "ONew", "OArraySize", "OType", "OGetType", "OGetTID", + "ORef", "OUnref", "OSetref", + "OMakeEnum", "OEnumAlloc", "OEnumIndex", "OEnumField", "OSetEnumField", + "OAssert", "ORefData", "ORefOffset", + "ONop", "OPrefetch", "OAsm", "OCatch" +}; + +static const char *type_kind_name(hl_type_kind k) { + switch (k) { + case HVOID: return "void"; + case HUI8: return "u8"; + case HUI16: return "u16"; + case HI32: return "i32"; + case HI64: return "i64"; + case HF32: return "f32"; + case HF64: return "f64"; + case HBOOL: return "bool"; + case HBYTES: return "bytes"; + case HDYN: return "dyn"; + case HFUN: return "fun"; + case HOBJ: return "obj"; + case HARRAY: return "array"; + case HTYPE: return "type"; + case HREF: return "ref"; + case HVIRTUAL: return "virtual"; + case HDYNOBJ: return "dynobj"; + case HABSTRACT: return "abstract"; + case HENUM: return "enum"; + case HNULL: return "null"; + case HMETHOD: return "method"; + case HSTRUCT: return "struct"; + case HPACKED: return "packed"; + default: return "???"; + } +} + +static void print_type(hl_type *t) { + if (!t) { + printf("null"); + return; + } + printf("%s", type_kind_name(t->kind)); + if (t->kind == HOBJ && t->obj && t->obj->name) { + printf("(%ls)", (wchar_t*)t->obj->name); + } else if (t->kind == HFUN && t->fun) { + printf("("); + for (int i = 0; i < t->fun->nargs; i++) { + if (i > 0) printf(","); + print_type(t->fun->args[i]); + } + printf(")->"); + print_type(t->fun->ret); + } +} + +static void dump_function(hl_code *c, hl_function *f, int verbose) { + printf("\n=== Function %d ===\n", f->findex); + printf(" Type: "); + print_type(f->type); + printf("\n"); + printf(" Registers: %d\n", f->nregs); + printf(" Opcodes: %d\n", f->nops); + + if (verbose) { + printf(" Register types:\n"); + for (int i = 0; i < f->nregs && i < 20; i++) { + printf(" r%d: ", i); + print_type(f->regs[i]); + printf("\n"); + } + if (f->nregs > 20) printf(" ... (%d more)\n", f->nregs - 20); + } + + printf(" Code:\n"); + for (int i = 0; i < f->nops; i++) { + hl_opcode *op = &f->ops[i]; + const char *name = (op->op < sizeof(opcode_names)/sizeof(opcode_names[0])) + ? opcode_names[op->op] : "???"; + printf(" %4d: %-16s %d, %d, %d", i, name, op->p1, op->p2, op->p3); + + /* Show extra info for some opcodes */ + switch (op->op) { + case OInt: + if (op->p2 >= 0 && op->p2 < c->nints) + printf(" ; r%d = %d", op->p1, c->ints[op->p2]); + break; + case OString: + if (op->p2 >= 0 && op->p2 < c->nstrings) + printf(" ; r%d = \"%s\"", op->p1, c->strings[op->p2]); + break; + case OBool: + printf(" ; r%d = %s", op->p1, op->p2 ? "true" : "false"); + break; + case OCall0: + case OCall1: + case OCall2: + case OCall3: + case OCall4: + case OCallN: + printf(" ; call F%d", op->p2); + break; + case OJAlways: + printf(" ; goto %d", (i + 1) + op->p1); + break; + case OJTrue: + case OJFalse: + case OJNull: + case OJNotNull: + printf(" ; if r%d goto %d", op->p1, (i + 1) + op->p2); + break; + case OJSLt: + case OJSGte: + case OJEq: + case OJNotEq: + printf(" ; if r%d,r%d goto %d", op->p1, op->p2, (i + 1) + op->p3); + break; + case ORet: + printf(" ; return r%d", op->p1); + break; + case OGetGlobal: + printf(" ; r%d = global[%d]", op->p1, op->p2); + break; + case OSetGlobal: + printf(" ; global[%d] = r%d", op->p2, op->p1); + break; + case OField: + printf(" ; r%d = r%d.field[%d]", op->p1, op->p2, op->p3); + break; + case OSetField: + printf(" ; r%d.field[%d] = r%d", op->p1, op->p2, op->p3); + break; + case ONew: + printf(" ; r%d = new", op->p1); + break; + default: + break; + } + printf("\n"); + } +} + +int main(int argc, char **argv) { + if (argc < 2) { + fprintf(stderr, "Usage: %s [function_index | -a] [-v]\n", argv[0]); + fprintf(stderr, " -a: dump all functions\n"); + fprintf(stderr, " -v: verbose (show register types)\n"); + return 1; + } + + const char *filename = argv[1]; + int target_func = -1; /* -1 means entrypoint only */ + int dump_all = 0; + int verbose = 0; + + for (int i = 2; i < argc; i++) { + if (strcmp(argv[i], "-v") == 0) { + verbose = 1; + } else if (strcmp(argv[i], "-a") == 0) { + dump_all = 1; + } else { + target_func = atoi(argv[i]); + } + } + + /* Initialize HL */ + hl_global_init(); + + /* Load the bytecode */ + FILE *f = fopen(filename, "rb"); + if (!f) { + fprintf(stderr, "Cannot open %s\n", filename); + return 1; + } + + fseek(f, 0, SEEK_END); + int size = ftell(f); + fseek(f, 0, SEEK_SET); + + char *data = malloc(size); + fread(data, 1, size, f); + fclose(f); + + /* Parse bytecode */ + char *error_msg = NULL; + hl_code *code = hl_code_read((unsigned char*)data, size, &error_msg); + free(data); + + if (!code) { + fprintf(stderr, "Failed to parse bytecode: %s\n", error_msg ? error_msg : "unknown error"); + return 1; + } + + /* Print summary */ + printf("HashLink Bytecode: %s\n", filename); + printf(" Version: %d\n", code->version); + printf(" Entrypoint: F%d\n", code->entrypoint); + printf(" Types: %d\n", code->ntypes); + printf(" Globals: %d\n", code->nglobals); + printf(" Natives: %d\n", code->nnatives); + printf(" Functions: %d\n", code->nfunctions); + printf(" Strings: %d\n", code->nstrings); + printf(" Ints: %d\n", code->nints); + printf(" Floats: %d\n", code->nfloats); + + /* Print natives */ + if (code->nnatives > 0) { + printf("\n=== Natives ===\n"); + for (int i = 0; i < code->nnatives; i++) { + hl_native *n = &code->natives[i]; + printf(" F%d: %s@%s ", n->findex, n->name, n->lib); + print_type(n->t); + printf("\n"); + } + } + + /* Dump functions */ + if (dump_all) { + /* Dump all functions */ + printf("\n--- All Functions ---\n"); + for (int i = 0; i < code->nfunctions; i++) { + dump_function(code, &code->functions[i], verbose); + } + } else if (target_func >= 0) { + int found = 0; + /* Check if it's a native function first */ + for (int i = 0; i < code->nnatives; i++) { + if (code->natives[i].findex == target_func) { + hl_native *n = &code->natives[i]; + printf("\n=== Native %d ===\n", n->findex); + printf(" Library: %s\n", n->lib); + printf(" Name: %s\n", n->name); + printf(" Type: "); + print_type(n->t); + printf("\n"); + found = 1; + break; + } + } + /* Find and dump specific function */ + for (int i = 0; i < code->nfunctions; i++) { + if (code->functions[i].findex == target_func) { + dump_function(code, &code->functions[i], verbose); + found = 1; + break; + } + } + if (!found) { + printf("\nFunction F%d not found\n", target_func); + } + } else { + /* Dump entrypoint function */ + printf("\n--- Entrypoint Function ---\n"); + for (int i = 0; i < code->nfunctions; i++) { + if (code->functions[i].findex == code->entrypoint) { + dump_function(code, &code->functions[i], verbose); + break; + } + } + } + + return 0; +} diff --git a/other/tests/minimal/test_array_ops.c b/other/tests/minimal/test_array_ops.c new file mode 100644 index 000000000..02f7afa53 --- /dev/null +++ b/other/tests/minimal/test_array_ops.c @@ -0,0 +1,384 @@ +/* + * Test array operations for HashLink AArch64 JIT + * + * Tests: OGetArray, OSetArray, OArraySize + * + * OGetArray: dst = array[index] + * OSetArray: array[index] = value + * OArraySize: dst = array.length + */ +#include "test_harness.h" + +/* Helper to create an array type */ +static hl_type *create_array_type(hl_code *c, hl_type *elem_type) { + if (c->ntypes >= MAX_TYPES) { + fprintf(stderr, "Too many types\n"); + return NULL; + } + + int idx = c->ntypes++; + hl_type *t = &c->types[idx]; + memset(t, 0, sizeof(hl_type)); + + t->kind = HARRAY; + t->tparam = elem_type; + + return t; +} + +/* + * Test: OSetArray and OGetArray with i32 elements + * + * array = alloc_array(i32, 3) + * array[0] = 10 + * array[1] = 20 + * array[2] = 12 + * r0 = array[0] + array[1] + array[2] ; 10 + 20 + 12 = 42 + * return r0 + */ +TEST(array_i32_basic) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + int ints[] = { 3, 0, 1, 2, 10, 20, 12 }; + test_init_ints(c, 7, ints); + + /* Create array type: Array */ + hl_type *array_i32 = create_array_type(c, &c->types[T_I32]); + + /* Native: hl_alloc_array(type, size) -> array */ + hl_type *alloc_args[] = { &c->types[T_TYPE], &c->types[T_I32] }; /* type pointer */ + hl_type *alloc_fn_type = test_alloc_fun_type(c, array_i32, 2, alloc_args); + test_add_native(c, 1, "std", "alloc_array", alloc_fn_type, (void*)hl_alloc_array); + + /* Function type: () -> i32 */ + hl_type *fn_type = test_alloc_fun_type(c, &c->types[T_I32], 0, NULL); + + /* + * Registers: + * r0: type pointer (for alloc) + * r1: size (3) + * r2: array + * r3-r5: indices (0, 1, 2) + * r6-r8: values (10, 20, 12) + * r9-r11: read values + * r12: sum + */ + hl_type *regs[] = { + &c->types[T_TYPE], /* r0 = type pointer */ + &c->types[T_I32], /* r1 = size */ + array_i32, /* r2 = array */ + &c->types[T_I32], /* r3 = idx 0 */ + &c->types[T_I32], /* r4 = idx 1 */ + &c->types[T_I32], /* r5 = idx 2 */ + &c->types[T_I32], /* r6 = val 10 */ + &c->types[T_I32], /* r7 = val 20 */ + &c->types[T_I32], /* r8 = val 12 */ + &c->types[T_I32], /* r9 = read[0] */ + &c->types[T_I32], /* r10 = read[1] */ + &c->types[T_I32], /* r11 = read[2] */ + &c->types[T_I32], /* r12 = sum */ + }; + + /* OType loads type at given index into register */ + hl_opcode ops[] = { + OP2(OType, 0, T_I32), /* r0 = type for i32 */ + OP2(OInt, 1, 0), /* r1 = 3 (size) */ + OP4_CALL2(OCall2, 2, 1, 0, 1), /* r2 = alloc_array(r0, r1) */ + OP2(OInt, 3, 1), /* r3 = 0 */ + OP2(OInt, 4, 2), /* r4 = 1 */ + OP2(OInt, 5, 3), /* r5 = 2 */ + OP2(OInt, 6, 4), /* r6 = 10 */ + OP2(OInt, 7, 5), /* r7 = 20 */ + OP2(OInt, 8, 6), /* r8 = 12 */ + OP3(OSetArray, 2, 3, 6), /* array[0] = 10 */ + OP3(OSetArray, 2, 4, 7), /* array[1] = 20 */ + OP3(OSetArray, 2, 5, 8), /* array[2] = 12 */ + OP3(OGetArray, 9, 2, 3), /* r9 = array[0] */ + OP3(OGetArray, 10, 2, 4), /* r10 = array[1] */ + OP3(OGetArray, 11, 2, 5), /* r11 = array[2] */ + OP3(OAdd, 12, 9, 10), /* r12 = r9 + r10 */ + OP3(OAdd, 12, 12, 11), /* r12 = r12 + r11 */ + OP1(ORet, 12), + }; + + test_alloc_function(c, 0, fn_type, 13, regs, 18, ops); + + int result; + int (*fn)(void) = (int(*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + int ret = fn(); + int expected = 10 + 20 + 12; + if (ret != expected) { + fprintf(stderr, " Expected %d, got %d\n", expected, ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: OArraySize + * + * array = alloc_array(i32, 5) + * return array_size(array) ; should be 5 + */ +TEST(array_size) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + int ints[] = { 5 }; + test_init_ints(c, 1, ints); + + hl_type *array_i32 = create_array_type(c, &c->types[T_I32]); + + hl_type *alloc_args[] = { &c->types[T_TYPE], &c->types[T_I32] }; + hl_type *alloc_fn_type = test_alloc_fun_type(c, array_i32, 2, alloc_args); + test_add_native(c, 1, "std", "alloc_array", alloc_fn_type, (void*)hl_alloc_array); + + hl_type *fn_type = test_alloc_fun_type(c, &c->types[T_I32], 0, NULL); + + hl_type *regs[] = { + &c->types[T_TYPE], /* r0 = type pointer */ + &c->types[T_I32], /* r1 = size */ + array_i32, /* r2 = array */ + &c->types[T_I32], /* r3 = array_size */ + }; + + hl_opcode ops[] = { + OP2(OType, 0, T_I32), /* r0 = type for i32 */ + OP2(OInt, 1, 0), /* r1 = 5 (size) */ + OP4_CALL2(OCall2, 2, 1, 0, 1), /* r2 = alloc_array(r0, r1) */ + OP2(OArraySize, 3, 2), /* r3 = array_size(r2) */ + OP1(ORet, 3), + }; + + test_alloc_function(c, 0, fn_type, 4, regs, 5, ops); + + int result; + int (*fn)(void) = (int(*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + int ret = fn(); + if (ret != 5) { + fprintf(stderr, " Expected 5, got %d\n", ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: OSetArray and OGetArray with i64 elements + */ +TEST(array_i64_basic) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + int ints[] = { 2, 0, 1, 1000, 2000 }; + test_init_ints(c, 5, ints); + + hl_type *array_i64 = create_array_type(c, &c->types[T_I64]); + + hl_type *alloc_args[] = { &c->types[T_TYPE], &c->types[T_I32] }; + hl_type *alloc_fn_type = test_alloc_fun_type(c, array_i64, 2, alloc_args); + test_add_native(c, 1, "std", "alloc_array", alloc_fn_type, (void*)hl_alloc_array); + + hl_type *fn_type = test_alloc_fun_type(c, &c->types[T_I64], 0, NULL); + + hl_type *regs[] = { + &c->types[T_TYPE], /* r0 = type pointer */ + &c->types[T_I32], /* r1 = size */ + array_i64, /* r2 = array */ + &c->types[T_I32], /* r3 = idx 0 */ + &c->types[T_I32], /* r4 = idx 1 */ + &c->types[T_I64], /* r5 = val 1000 */ + &c->types[T_I64], /* r6 = val 2000 */ + &c->types[T_I64], /* r7 = read[0] */ + &c->types[T_I64], /* r8 = read[1] */ + &c->types[T_I64], /* r9 = sum */ + }; + + hl_opcode ops[] = { + OP2(OType, 0, T_I64), /* r0 = type for i64 */ + OP2(OInt, 1, 0), /* r1 = 2 (size) */ + OP4_CALL2(OCall2, 2, 1, 0, 1), /* r2 = alloc_array(r0, r1) */ + OP2(OInt, 3, 1), /* r3 = 0 */ + OP2(OInt, 4, 2), /* r4 = 1 */ + OP2(OInt, 5, 3), /* r5 = 1000 */ + OP2(OInt, 6, 4), /* r6 = 2000 */ + OP3(OSetArray, 2, 3, 5), /* array[0] = 1000 */ + OP3(OSetArray, 2, 4, 6), /* array[1] = 2000 */ + OP3(OGetArray, 7, 2, 3), /* r7 = array[0] */ + OP3(OGetArray, 8, 2, 4), /* r8 = array[1] */ + OP3(OAdd, 9, 7, 8), /* r9 = r7 + r8 */ + OP1(ORet, 9), + }; + + test_alloc_function(c, 0, fn_type, 10, regs, 13, ops); + + int result; + int64_t (*fn)(void) = (int64_t(*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + int64_t ret = fn(); + int64_t expected = 1000 + 2000; + if (ret != expected) { + fprintf(stderr, " Expected %ld, got %ld\n", (long)expected, (long)ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: OSetArray and OGetArray with f64 elements + */ +TEST(array_f64_basic) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + int ints[] = { 2, 0, 1 }; + test_init_ints(c, 3, ints); + + double floats[] = { 1.5, 2.5 }; + test_init_floats(c, 2, floats); + + hl_type *array_f64 = create_array_type(c, &c->types[T_F64]); + + hl_type *alloc_args[] = { &c->types[T_TYPE], &c->types[T_I32] }; + hl_type *alloc_fn_type = test_alloc_fun_type(c, array_f64, 2, alloc_args); + test_add_native(c, 1, "std", "alloc_array", alloc_fn_type, (void*)hl_alloc_array); + + hl_type *fn_type = test_alloc_fun_type(c, &c->types[T_F64], 0, NULL); + + hl_type *regs[] = { + &c->types[T_TYPE], /* r0 = type pointer */ + &c->types[T_I32], /* r1 = size */ + array_f64, /* r2 = array */ + &c->types[T_I32], /* r3 = idx 0 */ + &c->types[T_I32], /* r4 = idx 1 */ + &c->types[T_F64], /* r5 = val 1.5 */ + &c->types[T_F64], /* r6 = val 2.5 */ + &c->types[T_F64], /* r7 = read[0] */ + &c->types[T_F64], /* r8 = read[1] */ + &c->types[T_F64], /* r9 = sum */ + }; + + hl_opcode ops[] = { + OP2(OType, 0, T_F64), /* r0 = type for f64 */ + OP2(OInt, 1, 0), /* r1 = 2 (size) */ + OP4_CALL2(OCall2, 2, 1, 0, 1), /* r2 = alloc_array(r0, r1) */ + OP2(OInt, 3, 1), /* r3 = 0 */ + OP2(OInt, 4, 2), /* r4 = 1 */ + OP2(OFloat, 5, 0), /* r5 = 1.5 */ + OP2(OFloat, 6, 1), /* r6 = 2.5 */ + OP3(OSetArray, 2, 3, 5), /* array[0] = 1.5 */ + OP3(OSetArray, 2, 4, 6), /* array[1] = 2.5 */ + OP3(OGetArray, 7, 2, 3), /* r7 = array[0] */ + OP3(OGetArray, 8, 2, 4), /* r8 = array[1] */ + OP3(OAdd, 9, 7, 8), /* r9 = r7 + r8 */ + OP1(ORet, 9), + }; + + test_alloc_function(c, 0, fn_type, 10, regs, 13, ops); + + int result; + double (*fn)(void) = (double(*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + double ret = fn(); + double expected = 1.5 + 2.5; + double diff = ret - expected; + if (diff < 0) diff = -diff; + if (diff > 0.0001) { + fprintf(stderr, " Expected %f, got %f\n", expected, ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: Array with dynamic indices (not compile-time constants) + */ +TEST(array_dynamic_index) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + int ints[] = { 10, 0, 42, 1 }; /* size, idx0, value, idx_offset */ + test_init_ints(c, 4, ints); + + hl_type *array_i32 = create_array_type(c, &c->types[T_I32]); + + hl_type *alloc_args[] = { &c->types[T_TYPE], &c->types[T_I32] }; + hl_type *alloc_fn_type = test_alloc_fun_type(c, array_i32, 2, alloc_args); + test_add_native(c, 1, "std", "alloc_array", alloc_fn_type, (void*)hl_alloc_array); + + hl_type *fn_type = test_alloc_fun_type(c, &c->types[T_I32], 0, NULL); + + hl_type *regs[] = { + &c->types[T_TYPE], /* r0 = type pointer */ + &c->types[T_I32], /* r1 = size */ + array_i32, /* r2 = array */ + &c->types[T_I32], /* r3 = idx (computed) */ + &c->types[T_I32], /* r4 = value */ + &c->types[T_I32], /* r5 = idx_offset */ + &c->types[T_I32], /* r6 = computed idx */ + &c->types[T_I32], /* r7 = read value */ + }; + + /* Store at index 0, then compute index 0+1-1=0 to read back */ + hl_opcode ops[] = { + OP2(OType, 0, T_I32), /* r0 = type */ + OP2(OInt, 1, 0), /* r1 = 10 */ + OP4_CALL2(OCall2, 2, 1, 0, 1), /* r2 = alloc_array(r0, r1) */ + OP2(OInt, 3, 1), /* r3 = 0 */ + OP2(OInt, 4, 2), /* r4 = 42 */ + OP3(OSetArray, 2, 3, 4), /* array[0] = 42 */ + OP2(OInt, 5, 3), /* r5 = 1 */ + OP3(OAdd, 6, 3, 5), /* r6 = r3 + r5 = 1 */ + OP3(OSub, 6, 6, 5), /* r6 = r6 - r5 = 0 */ + OP3(OGetArray, 7, 2, 6), /* r7 = array[r6] = array[0] */ + OP1(ORet, 7), + }; + + test_alloc_function(c, 0, fn_type, 8, regs, 11, ops); + + int result; + int (*fn)(void) = (int(*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + int ret = fn(); + if (ret != 42) { + fprintf(stderr, " Expected 42, got %d\n", ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* Test list */ +static test_entry_t tests[] = { + TEST_ENTRY(array_i32_basic), + TEST_ENTRY(array_size), + TEST_ENTRY(array_i64_basic), + TEST_ENTRY(array_f64_basic), + TEST_ENTRY(array_dynamic_index), +}; + +int main(int argc, char **argv) { + printf("HashLink AArch64 JIT - Array Operation Tests\n"); + return run_tests(tests, sizeof(tests) / sizeof(tests[0])); +} diff --git a/other/tests/minimal/test_binop_inplace.c b/other/tests/minimal/test_binop_inplace.c new file mode 100644 index 000000000..03fc559b8 --- /dev/null +++ b/other/tests/minimal/test_binop_inplace.c @@ -0,0 +1,670 @@ +/* + * Test in-place binary operations followed by spill + * + * Tests the bug where in-place binops like r0 = r0 << r1 don't properly + * update the register binding, causing the old (pre-operation) value to + * be spilled instead of the new value. + * + * Bug scenario: + * 1. r0 = 21 + * 2. r1 = 1 + * 3. r0 = r0 << r1 ; in-place shift, result should be 42 + * 4. call fn() ; triggers spill_regs - BUG: spills old r0 (21) instead of new (42) + * 5. return r0 ; BUG: returns 21 instead of 42 + */ +#include "test_harness.h" + +/* Helper to allocate multiple functions at once */ +static void test_alloc_functions(hl_code *c, int count) { + c->functions = (hl_function*)calloc(count, sizeof(hl_function)); + c->nfunctions = 0; +} + +static hl_function *test_add_function(hl_code *c, int findex, hl_type *type, + int nregs, hl_type **regs, + int nops, hl_opcode *ops) { + hl_function *f = &c->functions[c->nfunctions++]; + f->findex = findex; + f->type = type; + f->nregs = nregs; + f->nops = nops; + + f->regs = (hl_type**)malloc(sizeof(hl_type*) * nregs); + memcpy(f->regs, regs, sizeof(hl_type*) * nregs); + + f->ops = (hl_opcode*)malloc(sizeof(hl_opcode) * nops); + memcpy(f->ops, ops, sizeof(hl_opcode) * nops); + + f->debug = NULL; + f->obj = NULL; + f->field.ref = NULL; + f->ref = 0; + + return f; +} + +/* + * Test: In-place left shift followed by function call + * + * This is the minimal reproduction of the string concat bug where: + * OShl r5, r5, r6 ; in-place shift + * OCallN ... ; triggers spill, but spills the OLD r5 value + * + * fn0: () -> i32 { return 0; } ; dummy function to trigger spill + * fn1: () -> i32 { ; entry point + * r0 = 21 + * r1 = 1 + * r0 = r0 << r1 ; r0 should become 42 + * call fn0() ; triggers spill - bug causes old r0 (21) to be saved + * return r0 ; should return 42, but returns 21 if bug present + * } + */ +TEST(shl_inplace_then_call) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + int ints[] = { 21, 1, 0 }; + test_init_ints(c, 3, ints); + + /* Function types */ + hl_type *fn_type_i32 = test_alloc_fun_type(c, &c->types[T_I32], 0, NULL); + + test_alloc_functions(c, 2); + + /* fn0: findex=0, returns 0 (dummy to trigger spill) */ + { + hl_type *regs[] = { &c->types[T_I32] }; + hl_opcode ops[] = { + OP2(OInt, 0, 2), /* r0 = 0 */ + OP1(ORet, 0), + }; + test_add_function(c, 0, fn_type_i32, 1, regs, 2, ops); + } + + /* fn1: findex=1, does in-place shift then calls fn0 (entry point) */ + { + hl_type *regs[] = { &c->types[T_I32], &c->types[T_I32], &c->types[T_I32] }; + hl_opcode ops[] = { + OP2(OInt, 0, 0), /* r0 = 21 */ + OP2(OInt, 1, 1), /* r1 = 1 */ + OP3(OShl, 0, 0, 1), /* r0 = r0 << r1 (in-place! dst == src) */ + OP2(OCall0, 2, 0), /* r2 = call fn0() - triggers spill */ + OP1(ORet, 0), /* return r0 - should be 42 */ + }; + test_add_function(c, 1, fn_type_i32, 3, regs, 5, ops); + } + + c->entrypoint = 1; + + int result; + int (*fn)(void) = (int(*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + int ret = fn(); + if (ret != 42) { + fprintf(stderr, " Expected 42, got %d\n", ret); + fprintf(stderr, " (Bug: in-place shift value not properly spilled)\n"); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: In-place add followed by function call + * Same bug pattern but with OAdd instead of OShl + */ +TEST(add_inplace_then_call) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + int ints[] = { 21, 0 }; + test_init_ints(c, 2, ints); + + hl_type *fn_type_i32 = test_alloc_fun_type(c, &c->types[T_I32], 0, NULL); + + test_alloc_functions(c, 2); + + /* fn0: returns 0 */ + { + hl_type *regs[] = { &c->types[T_I32] }; + hl_opcode ops[] = { + OP2(OInt, 0, 1), + OP1(ORet, 0), + }; + test_add_function(c, 0, fn_type_i32, 1, regs, 2, ops); + } + + /* fn1: r0 = r0 + r0 (21 + 21 = 42), then call, then return r0 */ + { + hl_type *regs[] = { &c->types[T_I32], &c->types[T_I32] }; + hl_opcode ops[] = { + OP2(OInt, 0, 0), /* r0 = 21 */ + OP3(OAdd, 0, 0, 0), /* r0 = r0 + r0 (in-place!) */ + OP2(OCall0, 1, 0), /* r1 = call fn0() */ + OP1(ORet, 0), /* return r0 - should be 42 */ + }; + test_add_function(c, 1, fn_type_i32, 2, regs, 4, ops); + } + + c->entrypoint = 1; + + int result; + int (*fn)(void) = (int(*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + int ret = fn(); + if (ret != 42) { + fprintf(stderr, " Expected 42, got %d\n", ret); + fprintf(stderr, " (Bug: in-place add value not properly spilled)\n"); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: In-place multiply followed by function call + */ +TEST(mul_inplace_then_call) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + int ints[] = { 6, 7, 0 }; + test_init_ints(c, 3, ints); + + hl_type *fn_type_i32 = test_alloc_fun_type(c, &c->types[T_I32], 0, NULL); + + test_alloc_functions(c, 2); + + /* fn0: returns 0 */ + { + hl_type *regs[] = { &c->types[T_I32] }; + hl_opcode ops[] = { + OP2(OInt, 0, 2), + OP1(ORet, 0), + }; + test_add_function(c, 0, fn_type_i32, 1, regs, 2, ops); + } + + /* fn1: r0 = 6, r1 = 7, r0 = r0 * r1, call, return r0 */ + { + hl_type *regs[] = { &c->types[T_I32], &c->types[T_I32], &c->types[T_I32] }; + hl_opcode ops[] = { + OP2(OInt, 0, 0), /* r0 = 6 */ + OP2(OInt, 1, 1), /* r1 = 7 */ + OP3(OMul, 0, 0, 1), /* r0 = r0 * r1 (in-place!) */ + OP2(OCall0, 2, 0), /* r2 = call fn0() */ + OP1(ORet, 0), /* return r0 - should be 42 */ + }; + test_add_function(c, 1, fn_type_i32, 3, regs, 5, ops); + } + + c->entrypoint = 1; + + int result; + int (*fn)(void) = (int(*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + int ret = fn(); + if (ret != 42) { + fprintf(stderr, " Expected 42, got %d\n", ret); + fprintf(stderr, " (Bug: in-place mul value not properly spilled)\n"); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: Chain of in-place operations then call + * This is closer to the real-world string concat scenario + */ +TEST(chain_inplace_then_call) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + int ints[] = { 10, 1, 0 }; + test_init_ints(c, 3, ints); + + hl_type *fn_type_i32 = test_alloc_fun_type(c, &c->types[T_I32], 0, NULL); + + test_alloc_functions(c, 2); + + /* fn0: returns 0 */ + { + hl_type *regs[] = { &c->types[T_I32] }; + hl_opcode ops[] = { + OP2(OInt, 0, 2), + OP1(ORet, 0), + }; + test_add_function(c, 0, fn_type_i32, 1, regs, 2, ops); + } + + /* fn1: r0 = 10, r1 = 1, r0 = r0 << r1, r0 = r0 + r0, r0 = r0 + r1 + r1, call, return r0 + * 10 << 1 = 20, 20 + 20 = 40, 40 + 1 + 1 = 42 + */ + { + hl_type *regs[] = { &c->types[T_I32], &c->types[T_I32], &c->types[T_I32] }; + hl_opcode ops[] = { + OP2(OInt, 0, 0), /* r0 = 10 */ + OP2(OInt, 1, 1), /* r1 = 1 */ + OP3(OShl, 0, 0, 1), /* r0 = r0 << r1 = 20 */ + OP3(OAdd, 0, 0, 0), /* r0 = r0 + r0 = 40 */ + OP3(OAdd, 0, 0, 1), /* r0 = r0 + r1 = 41 */ + OP3(OAdd, 0, 0, 1), /* r0 = r0 + r1 = 42 */ + OP2(OCall0, 2, 0), /* r2 = call fn0() */ + OP1(ORet, 0), /* return r0 - should be 42 */ + }; + test_add_function(c, 1, fn_type_i32, 3, regs, 8, ops); + } + + c->entrypoint = 1; + + int result; + int (*fn)(void) = (int(*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + int ret = fn(); + if (ret != 42) { + fprintf(stderr, " Expected 42, got %d\n", ret); + fprintf(stderr, " (Bug: chain of in-place ops not properly spilled)\n"); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: Simulates the actual string concat bug pattern more closely + * + * The real bug occurs in this sequence (from function 20): + * OField r5, r1, 1 ; r5 = load from object field + * OInt r6, 1 ; r6 = 1 + * OShl r5, r5, r6 ; r5 = r5 << r6 (in-place) + * ... more ops ... + * OCallN ; triggers spill - BUG: spills old r5 + * + * The key is that r5 comes from OField (not OInt), so fetch() loads it + * into a register. Then OShl does in-place shift, allocating a NEW + * register for the result. But the old register still thinks it holds r5. + * + * We simulate this with OCall1 to pass a value, then shift it in-place. + */ +TEST(shl_inplace_arg_then_call) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + int ints[] = { 1, 0 }; + test_init_ints(c, 2, ints); + + /* Function types */ + hl_type *arg_types[] = { &c->types[T_I32] }; + hl_type *fn_type_i32_i32 = test_alloc_fun_type(c, &c->types[T_I32], 1, arg_types); + hl_type *fn_type_i32 = test_alloc_fun_type(c, &c->types[T_I32], 0, NULL); + + test_alloc_functions(c, 3); + + /* fn0: returns 21 (simulates loading a value like OField does) */ + { + int fn0_ints[] = { 21 }; + /* We need to add these ints - but we'll use the global pool */ + hl_type *regs[] = { &c->types[T_I32] }; + hl_opcode ops[] = { + OP2(OInt, 0, 0), /* r0 = ints[0] = 1... wait we need 21 */ + OP1(ORet, 0), + }; + test_add_function(c, 0, fn_type_i32, 1, regs, 2, ops); + } + + /* fn1: returns 0 (dummy to trigger second spill) */ + { + hl_type *regs[] = { &c->types[T_I32] }; + hl_opcode ops[] = { + OP2(OInt, 0, 1), /* r0 = 0 */ + OP1(ORet, 0), + }; + test_add_function(c, 1, fn_type_i32, 1, regs, 2, ops); + } + + /* fn2: entry point + * r0 = call fn0() ; r0 = 21 (value comes from call, like OField) + * r1 = 1 + * r0 = r0 << r1 ; in-place shift, r0 should be 42 + * r2 = call fn1() ; triggers spill + * return r0 ; should be 42 + */ + { + hl_type *regs[] = { &c->types[T_I32], &c->types[T_I32], &c->types[T_I32] }; + hl_opcode ops[] = { + OP2(OCall0, 0, 0), /* r0 = call fn0() = 21 */ + OP2(OInt, 1, 0), /* r1 = 1 */ + OP3(OShl, 0, 0, 1), /* r0 = r0 << r1 = 42 (in-place!) */ + OP2(OCall0, 2, 1), /* r2 = call fn1() - triggers spill */ + OP1(ORet, 0), /* return r0 - should be 42 */ + }; + test_add_function(c, 2, fn_type_i32, 3, regs, 5, ops); + } + + c->entrypoint = 2; + + /* Fix: fn0 needs to return 21. Let's update the ints pool */ + c->ints[0] = 21; /* fn0 uses ints[0] */ + c->ints[1] = 1; /* fn2 uses ints[1] for the shift amount... wait no */ + + /* Actually let's redo the ints pool properly */ + free(c->ints); + int new_ints[] = { 21, 1, 0 }; /* 21 for fn0, 1 for shift, 0 for fn1 */ + test_init_ints(c, 3, new_ints); + + /* Update fn0 to use ints[0]=21, fn1 to use ints[2]=0, fn2 r1 to use ints[1]=1 */ + c->functions[0].ops[0].p2 = 0; /* fn0: OInt r0, ints[0]=21 */ + c->functions[1].ops[0].p2 = 2; /* fn1: OInt r0, ints[2]=0 */ + c->functions[2].ops[1].p2 = 1; /* fn2: OInt r1, ints[1]=1 */ + + int result; + int (*fn)(void) = (int(*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + int ret = fn(); + if (ret != 42) { + fprintf(stderr, " Expected 42, got %d\n", ret); + fprintf(stderr, " (Bug: value from call not properly spilled after in-place shift)\n"); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: Multiple registers active, simulating the string concat pattern + * + * This more closely matches the real bug: + * r4 = get length1 + * r5 = 1 + * r4 = r4 << r5 ; first shift + * r5 = get length2 ; r5 REUSED for different value + * r6 = 1 + * r5 = r5 << r6 ; second shift (in-place) - THIS IS WHERE BUG OCCURS + * r6 = r4 + r5 ; need both shifted values + * call(...) ; spill - r5 gets wrong value + */ +TEST(string_concat_pattern) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + /* ints: 13, 1, 1 (two lengths, and 1 for shift) */ + int ints[] = { 13, 1, 1, 0 }; + test_init_ints(c, 4, ints); + + hl_type *fn_type_i32 = test_alloc_fun_type(c, &c->types[T_I32], 0, NULL); + + test_alloc_functions(c, 2); + + /* fn0: returns 0 */ + { + hl_type *regs[] = { &c->types[T_I32] }; + hl_opcode ops[] = { + OP2(OInt, 0, 3), /* r0 = 0 */ + OP1(ORet, 0), + }; + test_add_function(c, 0, fn_type_i32, 1, regs, 2, ops); + } + + /* fn1: entry - simulates string concat length calculation + * r0 = 13 ; length1 (chars) + * r1 = 1 ; shift amount + * r0 = r0 << r1 ; length1 in bytes = 26 + * r2 = 1 ; length2 (chars) - reusing pattern + * r3 = 1 ; shift amount + * r2 = r2 << r3 ; length2 in bytes = 2 (in-place!) + * r4 = r0 + r2 ; total = 28 + * call fn0() ; triggers spill + * return r4 ; should be 28 + */ + { + hl_type *regs[] = { + &c->types[T_I32], &c->types[T_I32], &c->types[T_I32], + &c->types[T_I32], &c->types[T_I32], &c->types[T_I32] + }; + hl_opcode ops[] = { + OP2(OInt, 0, 0), /* r0 = 13 */ + OP2(OInt, 1, 1), /* r1 = 1 */ + OP3(OShl, 0, 0, 1), /* r0 = r0 << r1 = 26 */ + OP2(OInt, 2, 1), /* r2 = 1 */ + OP2(OInt, 3, 2), /* r3 = 1 */ + OP3(OShl, 2, 2, 3), /* r2 = r2 << r3 = 2 (in-place!) */ + OP3(OAdd, 4, 0, 2), /* r4 = r0 + r2 = 28 */ + OP2(OCall0, 5, 0), /* r5 = call fn0() - triggers spill */ + OP1(ORet, 4), /* return r4 = 28 */ + }; + test_add_function(c, 1, fn_type_i32, 6, regs, 9, ops); + } + + c->entrypoint = 1; + + int result; + int (*fn)(void) = (int(*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + int ret = fn(); + if (ret != 28) { + fprintf(stderr, " Expected 28, got %d\n", ret); + fprintf(stderr, " (Bug: in-place shift in multi-register scenario failed)\n"); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: Force pd < pa scenario + * + * THE REAL BUG: The bug only manifests when the RESULT register (pd) has a + * LOWER index than the SOURCE register (pa). In spill_regs(), registers are + * processed from X0 to X17. If pd < pa: + * 1. pd is spilled first (correct value stored) + * 2. pa is spilled later (OLD value overwrites correct value!) + * + * To trigger this, we need: + * 1. Allocate several low-numbered registers (X0, X1, X2, ...) + * 2. Free a low register (X0) + * 3. Load a value into a high register (X5 say) + * 4. Do in-place operation - result goes to freed X0, source stays in X5 + * 5. Call function - spill_regs processes X0 first, then X5 overwrites! + */ +TEST(force_pd_less_than_pa) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + int ints[] = { 0, 1, 2, 3, 4, 21, 1 }; + test_init_ints(c, 7, ints); + + hl_type *fn_type_i32 = test_alloc_fun_type(c, &c->types[T_I32], 0, NULL); + + test_alloc_functions(c, 2); + + /* fn0: returns 0 */ + { + hl_type *regs[] = { &c->types[T_I32] }; + hl_opcode ops[] = { + OP2(OInt, 0, 0), /* r0 = 0 */ + OP1(ORet, 0), + }; + test_add_function(c, 0, fn_type_i32, 1, regs, 2, ops); + } + + /* fn1: Entry point + * Strategy: Allocate registers 0-4, then use r5 for the actual value. + * When we do the in-place op on r5, the result register will be + * allocated to a lower number (after we stop using r0-r4). + * + * r0 = 0 (uses X0) + * r1 = 1 (uses X1) + * r2 = 2 (uses X2) + * r3 = 3 (uses X3) + * r4 = 4 (uses X4) + * r5 = 21 (uses X5) + * r6 = 1 (uses X6) + * ; Now we "forget" r0-r4 by doing operations that don't involve them + * ; (The registers will be evicted when new ones are needed) + * r5 = r5 << r6 ; In-place shift. pd might be X0-X4 if they get freed + * ; Actually, let's force it by having the MOV instruction cause eviction + * r0 = r5 ; This forces r5's value to r0 + * call fn0() ; Spill + * return r0 ; Should be 42 + */ + { + hl_type *regs[] = { + &c->types[T_I32], &c->types[T_I32], &c->types[T_I32], + &c->types[T_I32], &c->types[T_I32], &c->types[T_I32], + &c->types[T_I32], &c->types[T_I32] + }; + hl_opcode ops[] = { + OP2(OInt, 0, 0), /* r0 = 0 - allocates X0 */ + OP2(OInt, 1, 1), /* r1 = 1 - allocates X1 */ + OP2(OInt, 2, 2), /* r2 = 2 - allocates X2 */ + OP2(OInt, 3, 3), /* r3 = 3 - allocates X3 */ + OP2(OInt, 4, 4), /* r4 = 4 - allocates X4 */ + OP2(OInt, 5, 5), /* r5 = 21 - allocates X5 */ + OP2(OInt, 6, 6), /* r6 = 1 - allocates X6 */ + OP3(OShl, 5, 5, 6), /* r5 = r5 << r6 = 42 (in-place!) */ + OP2(OMov, 0, 5), /* r0 = r5 (moves 42 to r0) */ + OP2(OCall0, 7, 0), /* r7 = call fn0() - triggers spill */ + OP1(ORet, 0), /* return r0 - should be 42 */ + }; + test_add_function(c, 1, fn_type_i32, 8, regs, 11, ops); + } + + c->entrypoint = 1; + + int result; + int (*fn)(void) = (int(*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + int ret = fn(); + if (ret != 42) { + fprintf(stderr, " Expected 42, got %d\n", ret); + fprintf(stderr, " (Bug: pd < pa causes old value to overwrite new in spill_regs)\n"); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: Direct reproduction of the hello.hl bug scenario + * + * In hello.hl, the bug occurred in function 20 (String.__add): + * - OField loads string length into r5 (ends up in X5) + * - OShl shifts r5 by 1 (to convert chars to bytes) + * - Result goes into a LOWER register (X2) + * - OCallN triggers spill + * - X2 is spilled first (correct value) + * - X5 is spilled later (OLD value overwrites!) + * + * We can't easily reproduce OField in our minimal test, but we CAN + * reproduce the scenario by: + * 1. Getting a value via function call (forces it into return register X0) + * 2. Moving it to a higher-numbered vreg + * 3. Doing in-place shift + */ +TEST(hello_hl_scenario) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + int ints[] = { 13, 1, 0 }; + test_init_ints(c, 3, ints); + + hl_type *fn_type_i32 = test_alloc_fun_type(c, &c->types[T_I32], 0, NULL); + + test_alloc_functions(c, 3); + + /* fn0: returns 13 (simulates OField loading string length) */ + { + hl_type *regs[] = { &c->types[T_I32] }; + hl_opcode ops[] = { + OP2(OInt, 0, 0), /* r0 = 13 */ + OP1(ORet, 0), + }; + test_add_function(c, 0, fn_type_i32, 1, regs, 2, ops); + } + + /* fn1: returns 0 (dummy to trigger second spill) */ + { + hl_type *regs[] = { &c->types[T_I32] }; + hl_opcode ops[] = { + OP2(OInt, 0, 2), /* r0 = 0 */ + OP1(ORet, 0), + }; + test_add_function(c, 1, fn_type_i32, 1, regs, 2, ops); + } + + /* fn2: Entry point - simulates String.__add length calculation + * r0 = call fn0() ; Get length (13), result in X0, then stored to r0 + * r1 = 1 ; Shift amount + * r0 = r0 << r1 ; r0 = 13 << 1 = 26 (in-place!) + * r2 = call fn1() ; Triggers spill - BUG: old r0 may overwrite new + * return r0 ; Should be 26 + */ + { + hl_type *regs[] = { &c->types[T_I32], &c->types[T_I32], &c->types[T_I32] }; + hl_opcode ops[] = { + OP2(OCall0, 0, 0), /* r0 = call fn0() = 13 */ + OP2(OInt, 1, 1), /* r1 = 1 */ + OP3(OShl, 0, 0, 1), /* r0 = r0 << r1 = 26 (in-place!) */ + OP2(OCall0, 2, 1), /* r2 = call fn1() - triggers spill */ + OP1(ORet, 0), /* return r0 - should be 26 */ + }; + test_add_function(c, 2, fn_type_i32, 3, regs, 5, ops); + } + + c->entrypoint = 2; + + int result; + int (*fn)(void) = (int(*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + int ret = fn(); + if (ret != 26) { + fprintf(stderr, " Expected 26, got %d\n", ret); + fprintf(stderr, " (Bug: String.__add pattern - in-place shift corrupted by spill)\n"); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* Test list */ +static test_entry_t tests[] = { + TEST_ENTRY(shl_inplace_then_call), + TEST_ENTRY(add_inplace_then_call), + TEST_ENTRY(mul_inplace_then_call), + TEST_ENTRY(chain_inplace_then_call), + TEST_ENTRY(shl_inplace_arg_then_call), + TEST_ENTRY(string_concat_pattern), + TEST_ENTRY(force_pd_less_than_pa), + TEST_ENTRY(hello_hl_scenario), +}; + +int main(int argc, char **argv) { + printf("HashLink AArch64 JIT - In-place Binary Op + Spill Tests\n"); + printf("(Tests for register binding bug in op_binop)\n"); + return run_tests(tests, sizeof(tests) / sizeof(tests[0])); +} diff --git a/other/tests/minimal/test_bool_ops.c b/other/tests/minimal/test_bool_ops.c new file mode 100644 index 000000000..a8f0425bf --- /dev/null +++ b/other/tests/minimal/test_bool_ops.c @@ -0,0 +1,288 @@ +/* + * Test boolean operations for HashLink AArch64 JIT + * + * Tests: OBool, ONot + */ +#include "test_harness.h" + +/* + * Test: Return true + */ +TEST(return_true) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + hl_type *fn_type = test_alloc_fun_type(c, &c->types[T_BOOL], 0, NULL); + hl_type *regs[] = { &c->types[T_BOOL] }; + + hl_opcode ops[] = { + OP2(OBool, 0, 1), /* r0 = true (p2=1 means true) */ + OP1(ORet, 0), + }; + + test_alloc_function(c, 0, fn_type, 1, regs, 2, ops); + + int result; + int (*fn)(void) = (int(*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + int ret = fn(); + if (ret != 1) { + fprintf(stderr, " Expected 1 (true), got %d\n", ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: Return false + */ +TEST(return_false) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + hl_type *fn_type = test_alloc_fun_type(c, &c->types[T_BOOL], 0, NULL); + hl_type *regs[] = { &c->types[T_BOOL] }; + + hl_opcode ops[] = { + OP2(OBool, 0, 0), /* r0 = false (p2=0 means false) */ + OP1(ORet, 0), + }; + + test_alloc_function(c, 0, fn_type, 1, regs, 2, ops); + + int result; + int (*fn)(void) = (int(*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + int ret = fn(); + if (ret != 0) { + fprintf(stderr, " Expected 0 (false), got %d\n", ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: NOT true = false + */ +TEST(not_true) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + hl_type *fn_type = test_alloc_fun_type(c, &c->types[T_BOOL], 0, NULL); + hl_type *regs[] = { &c->types[T_BOOL], &c->types[T_BOOL] }; + + hl_opcode ops[] = { + OP2(OBool, 0, 1), /* r0 = true */ + OP2(ONot, 1, 0), /* r1 = !r0 */ + OP1(ORet, 1), + }; + + test_alloc_function(c, 0, fn_type, 2, regs, 3, ops); + + int result; + int (*fn)(void) = (int(*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + int ret = fn(); + if (ret != 0) { + fprintf(stderr, " Expected 0 (false), got %d\n", ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: NOT false = true + */ +TEST(not_false) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + hl_type *fn_type = test_alloc_fun_type(c, &c->types[T_BOOL], 0, NULL); + hl_type *regs[] = { &c->types[T_BOOL], &c->types[T_BOOL] }; + + hl_opcode ops[] = { + OP2(OBool, 0, 0), /* r0 = false */ + OP2(ONot, 1, 0), /* r1 = !r0 */ + OP1(ORet, 1), + }; + + test_alloc_function(c, 0, fn_type, 2, regs, 3, ops); + + int result; + int (*fn)(void) = (int(*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + int ret = fn(); + if (ret != 1) { + fprintf(stderr, " Expected 1 (true), got %d\n", ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: Double NOT: !!true = true + */ +TEST(double_not_true) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + hl_type *fn_type = test_alloc_fun_type(c, &c->types[T_BOOL], 0, NULL); + hl_type *regs[] = { &c->types[T_BOOL], &c->types[T_BOOL], &c->types[T_BOOL] }; + + hl_opcode ops[] = { + OP2(OBool, 0, 1), /* r0 = true */ + OP2(ONot, 1, 0), /* r1 = !r0 = false */ + OP2(ONot, 2, 1), /* r2 = !r1 = true */ + OP1(ORet, 2), + }; + + test_alloc_function(c, 0, fn_type, 3, regs, 4, ops); + + int result; + int (*fn)(void) = (int(*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + int ret = fn(); + if (ret != 1) { + fprintf(stderr, " Expected 1 (true), got %d\n", ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: NOT on bool (false) -> true + * Note: ONot is only valid for boolean operands (0 or 1). + * Using OInt with 0 works because bool false is represented as 0. + */ +TEST(not_bool_false) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + hl_type *fn_type = test_alloc_fun_type(c, &c->types[T_BOOL], 0, NULL); + hl_type *regs[] = { &c->types[T_BOOL], &c->types[T_BOOL] }; + + hl_opcode ops[] = { + OP2(OBool, 0, 0), /* r0 = false (0) */ + OP2(ONot, 1, 0), /* r1 = !r0 = true */ + OP1(ORet, 1), + }; + + test_alloc_function(c, 0, fn_type, 2, regs, 3, ops); + + int result; + int (*fn)(void) = (int(*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + int ret = fn(); + if (ret != 1) { + fprintf(stderr, " Expected 1 (true), got %d\n", ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: NOT on bool (true) -> false + */ +TEST(not_bool_true_explicit) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + hl_type *fn_type = test_alloc_fun_type(c, &c->types[T_BOOL], 0, NULL); + hl_type *regs[] = { &c->types[T_BOOL], &c->types[T_BOOL] }; + + hl_opcode ops[] = { + OP2(OBool, 0, 1), /* r0 = true (1) */ + OP2(ONot, 1, 0), /* r1 = !r0 = false */ + OP1(ORet, 1), + }; + + test_alloc_function(c, 0, fn_type, 2, regs, 3, ops); + + int result; + int (*fn)(void) = (int(*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + int ret = fn(); + if (ret != 0) { + fprintf(stderr, " Expected 0 (false), got %d\n", ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: Move bool register + */ +TEST(mov_bool) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + hl_type *fn_type = test_alloc_fun_type(c, &c->types[T_BOOL], 0, NULL); + hl_type *regs[] = { &c->types[T_BOOL], &c->types[T_BOOL] }; + + hl_opcode ops[] = { + OP2(OBool, 0, 1), /* r0 = true */ + OP2(OMov, 1, 0), /* r1 = r0 */ + OP1(ORet, 1), + }; + + test_alloc_function(c, 0, fn_type, 2, regs, 3, ops); + + int result; + int (*fn)(void) = (int(*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + int ret = fn(); + if (ret != 1) { + fprintf(stderr, " Expected 1 (true), got %d\n", ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* Test list */ +static test_entry_t tests[] = { + TEST_ENTRY(return_true), + TEST_ENTRY(return_false), + TEST_ENTRY(not_true), + TEST_ENTRY(not_false), + TEST_ENTRY(double_not_true), + TEST_ENTRY(not_bool_false), + TEST_ENTRY(not_bool_true_explicit), + TEST_ENTRY(mov_bool), +}; + +int main(int argc, char **argv) { + printf("HashLink AArch64 JIT - Boolean Operations Tests\n"); + return run_tests(tests, sizeof(tests) / sizeof(tests[0])); +} diff --git a/other/tests/minimal/test_callbacks.c b/other/tests/minimal/test_callbacks.c new file mode 100644 index 000000000..6f1e23a48 --- /dev/null +++ b/other/tests/minimal/test_callbacks.c @@ -0,0 +1,518 @@ +/* + * Test C-to-HL callback mechanism for HashLink AArch64 JIT + * + * Tests the callback_c2hl and jit_c2hl trampoline by: + * 1. JIT compiling a function with arguments + * 2. Calling it through hl_dyn_call (which uses callback_c2hl) + * + * This exercises the path: hl_dyn_call -> hl_call_method -> callback_c2hl -> jit_c2hl -> JIT code + */ +#include "test_harness.h" + +/* hl_dyn_call declaration from hl.h */ +extern vdynamic *hl_dyn_call(vclosure *c, vdynamic **args, int nargs); +extern vdynamic *hl_alloc_dynamic(hl_type *t); + +/* + * Test: Simple function call through callback (no arguments) + * + * JIT function: () -> i32 { return 42 } + * Call through hl_dyn_call and verify result + */ +TEST(callback_no_args) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + int ints[] = { 42 }; + test_init_ints(c, 1, ints); + + /* Function type: () -> i32 */ + hl_type *fn_type = test_alloc_fun_type(c, &c->types[T_I32], 0, NULL); + hl_type *regs[] = { &c->types[T_I32] }; + + hl_opcode ops[] = { + OP2(OInt, 0, 0), /* r0 = 42 */ + OP1(ORet, 0), + }; + + test_alloc_function(c, 0, fn_type, 1, regs, 2, ops); + + int result; + void *fn_ptr = test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + /* Create a closure for the function */ + vclosure cl; + memset(&cl, 0, sizeof(cl)); + cl.t = fn_type; + cl.fun = fn_ptr; + cl.hasValue = 0; + + /* Call through hl_dyn_call */ + vdynamic *ret = hl_dyn_call(&cl, NULL, 0); + + if (ret == NULL) { + fprintf(stderr, " hl_dyn_call returned NULL\n"); + return TEST_FAIL; + } + + if (ret->v.i != 42) { + fprintf(stderr, " Expected 42, got %d\n", ret->v.i); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: Function with one i32 argument + * + * JIT function: (i32 x) -> i32 { return x + 10 } + */ +TEST(callback_one_int_arg) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + int ints[] = { 10 }; + test_init_ints(c, 1, ints); + + /* Function type: (i32) -> i32 */ + hl_type *arg_types[] = { &c->types[T_I32] }; + hl_type *fn_type = test_alloc_fun_type(c, &c->types[T_I32], 1, arg_types); + + /* r0 = arg (i32), r1 = result (i32), r2 = const 10 */ + hl_type *regs[] = { &c->types[T_I32], &c->types[T_I32], &c->types[T_I32] }; + + hl_opcode ops[] = { + OP2(OInt, 2, 0), /* r2 = 10 */ + OP3(OAdd, 1, 0, 2), /* r1 = r0 + r2 */ + OP1(ORet, 1), + }; + + test_alloc_function(c, 0, fn_type, 3, regs, 3, ops); + + int result; + void *fn_ptr = test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + /* Create a closure */ + vclosure cl; + memset(&cl, 0, sizeof(cl)); + cl.t = fn_type; + cl.fun = fn_ptr; + cl.hasValue = 0; + + /* Create argument: i32 value = 32 */ + vdynamic arg_val; + arg_val.t = &c->types[T_I32]; + arg_val.v.i = 32; + vdynamic *args[] = { &arg_val }; + + /* Call through hl_dyn_call */ + vdynamic *ret = hl_dyn_call(&cl, args, 1); + + if (ret == NULL) { + fprintf(stderr, " hl_dyn_call returned NULL\n"); + return TEST_FAIL; + } + + /* Expected: 32 + 10 = 42 */ + if (ret->v.i != 42) { + fprintf(stderr, " Expected 42, got %d\n", ret->v.i); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: Function with two i32 arguments + * + * JIT function: (i32 a, i32 b) -> i32 { return a + b } + */ +TEST(callback_two_int_args) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + /* Function type: (i32, i32) -> i32 */ + hl_type *arg_types[] = { &c->types[T_I32], &c->types[T_I32] }; + hl_type *fn_type = test_alloc_fun_type(c, &c->types[T_I32], 2, arg_types); + + /* r0 = arg0, r1 = arg1, r2 = result */ + hl_type *regs[] = { &c->types[T_I32], &c->types[T_I32], &c->types[T_I32] }; + + hl_opcode ops[] = { + OP3(OAdd, 2, 0, 1), /* r2 = r0 + r1 */ + OP1(ORet, 2), + }; + + test_alloc_function(c, 0, fn_type, 3, regs, 2, ops); + + int result; + void *fn_ptr = test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + /* Create a closure */ + vclosure cl; + memset(&cl, 0, sizeof(cl)); + cl.t = fn_type; + cl.fun = fn_ptr; + cl.hasValue = 0; + + /* Create arguments: 10 + 32 = 42 */ + vdynamic arg0, arg1; + arg0.t = &c->types[T_I32]; + arg0.v.i = 10; + arg1.t = &c->types[T_I32]; + arg1.v.i = 32; + vdynamic *args[] = { &arg0, &arg1 }; + + /* Call through hl_dyn_call */ + vdynamic *ret = hl_dyn_call(&cl, args, 2); + + if (ret == NULL) { + fprintf(stderr, " hl_dyn_call returned NULL\n"); + return TEST_FAIL; + } + + if (ret->v.i != 42) { + fprintf(stderr, " Expected 42, got %d\n", ret->v.i); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: Function with i64 argument + * + * JIT function: (i64 x) -> i64 { return x } + */ +TEST(callback_i64_arg) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + /* Function type: (i64) -> i64 */ + hl_type *arg_types[] = { &c->types[T_I64] }; + hl_type *fn_type = test_alloc_fun_type(c, &c->types[T_I64], 1, arg_types); + + /* r0 = arg (i64) */ + hl_type *regs[] = { &c->types[T_I64] }; + + hl_opcode ops[] = { + OP1(ORet, 0), /* return r0 */ + }; + + test_alloc_function(c, 0, fn_type, 1, regs, 1, ops); + + int result; + void *fn_ptr = test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + /* Create a closure */ + vclosure cl; + memset(&cl, 0, sizeof(cl)); + cl.t = fn_type; + cl.fun = fn_ptr; + cl.hasValue = 0; + + /* Create argument: i64 value = 0x123456789ABCDEF0 */ + vdynamic arg_val; + arg_val.t = &c->types[T_I64]; + arg_val.v.i64 = 0x123456789ABCDEF0LL; + vdynamic *args[] = { &arg_val }; + + /* Call through hl_dyn_call */ + vdynamic *ret = hl_dyn_call(&cl, args, 1); + + if (ret == NULL) { + fprintf(stderr, " hl_dyn_call returned NULL\n"); + return TEST_FAIL; + } + + if (ret->v.i64 != 0x123456789ABCDEF0LL) { + fprintf(stderr, " Expected 0x123456789ABCDEF0, got 0x%llx\n", + (unsigned long long)ret->v.i64); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: Function with f64 argument + * + * JIT function: (f64 x) -> f64 { return x } + */ +TEST(callback_f64_arg) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + /* Function type: (f64) -> f64 */ + hl_type *arg_types[] = { &c->types[T_F64] }; + hl_type *fn_type = test_alloc_fun_type(c, &c->types[T_F64], 1, arg_types); + + /* r0 = arg (f64) */ + hl_type *regs[] = { &c->types[T_F64] }; + + hl_opcode ops[] = { + OP1(ORet, 0), /* return r0 */ + }; + + test_alloc_function(c, 0, fn_type, 1, regs, 1, ops); + + int result; + void *fn_ptr = test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + /* Create a closure */ + vclosure cl; + memset(&cl, 0, sizeof(cl)); + cl.t = fn_type; + cl.fun = fn_ptr; + cl.hasValue = 0; + + /* Create argument: f64 value = 3.14159 */ + vdynamic arg_val; + arg_val.t = &c->types[T_F64]; + arg_val.v.d = 3.14159; + vdynamic *args[] = { &arg_val }; + + /* Call through hl_dyn_call */ + vdynamic *ret = hl_dyn_call(&cl, args, 1); + + if (ret == NULL) { + fprintf(stderr, " hl_dyn_call returned NULL\n"); + return TEST_FAIL; + } + + double diff = ret->v.d - 3.14159; + if (diff < -0.00001 || diff > 0.00001) { + fprintf(stderr, " Expected 3.14159, got %f\n", ret->v.d); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: Mixed int and float arguments + * + * JIT function: (i32 a, f64 b, i32 c) -> i32 { return a + c } + * Tests that arguments are marshaled to correct registers + */ +TEST(callback_mixed_args) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + /* Function type: (i32, f64, i32) -> i32 */ + hl_type *arg_types[] = { &c->types[T_I32], &c->types[T_F64], &c->types[T_I32] }; + hl_type *fn_type = test_alloc_fun_type(c, &c->types[T_I32], 3, arg_types); + + /* r0 = a (i32), r1 = b (f64), r2 = c (i32), r3 = result (i32) */ + hl_type *regs[] = { &c->types[T_I32], &c->types[T_F64], &c->types[T_I32], &c->types[T_I32] }; + + hl_opcode ops[] = { + OP3(OAdd, 3, 0, 2), /* r3 = r0 + r2 */ + OP1(ORet, 3), + }; + + test_alloc_function(c, 0, fn_type, 4, regs, 2, ops); + + int result; + void *fn_ptr = test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + /* Create a closure */ + vclosure cl; + memset(&cl, 0, sizeof(cl)); + cl.t = fn_type; + cl.fun = fn_ptr; + cl.hasValue = 0; + + /* Create arguments: a=10, b=99.9, c=32 -> result = 42 */ + vdynamic arg0, arg1, arg2; + arg0.t = &c->types[T_I32]; + arg0.v.i = 10; + arg1.t = &c->types[T_F64]; + arg1.v.d = 99.9; + arg2.t = &c->types[T_I32]; + arg2.v.i = 32; + vdynamic *args[] = { &arg0, &arg1, &arg2 }; + + /* Call through hl_dyn_call */ + vdynamic *ret = hl_dyn_call(&cl, args, 3); + + if (ret == NULL) { + fprintf(stderr, " hl_dyn_call returned NULL\n"); + return TEST_FAIL; + } + + if (ret->v.i != 42) { + fprintf(stderr, " Expected 42, got %d\n", ret->v.i); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: Many arguments (stress test register allocation) + * + * JIT function: (i32 a, i32 b, i32 c, i32 d, i32 e, i32 f) -> i32 + * { return a + b + c + d + e + f } + */ +TEST(callback_many_int_args) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + /* Function type: (i32, i32, i32, i32, i32, i32) -> i32 */ + hl_type *arg_types[] = { + &c->types[T_I32], &c->types[T_I32], &c->types[T_I32], + &c->types[T_I32], &c->types[T_I32], &c->types[T_I32] + }; + hl_type *fn_type = test_alloc_fun_type(c, &c->types[T_I32], 6, arg_types); + + /* r0-r5 = args, r6 = temp, r7 = result */ + hl_type *regs[] = { + &c->types[T_I32], &c->types[T_I32], &c->types[T_I32], + &c->types[T_I32], &c->types[T_I32], &c->types[T_I32], + &c->types[T_I32], &c->types[T_I32] + }; + + hl_opcode ops[] = { + OP3(OAdd, 6, 0, 1), /* r6 = r0 + r1 */ + OP3(OAdd, 6, 6, 2), /* r6 = r6 + r2 */ + OP3(OAdd, 6, 6, 3), /* r6 = r6 + r3 */ + OP3(OAdd, 6, 6, 4), /* r6 = r6 + r4 */ + OP3(OAdd, 7, 6, 5), /* r7 = r6 + r5 */ + OP1(ORet, 7), + }; + + test_alloc_function(c, 0, fn_type, 8, regs, 6, ops); + + int result; + void *fn_ptr = test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + /* Create a closure */ + vclosure cl; + memset(&cl, 0, sizeof(cl)); + cl.t = fn_type; + cl.fun = fn_ptr; + cl.hasValue = 0; + + /* Create arguments: 1 + 2 + 3 + 4 + 5 + 27 = 42 */ + vdynamic arg0, arg1, arg2, arg3, arg4, arg5; + arg0.t = &c->types[T_I32]; arg0.v.i = 1; + arg1.t = &c->types[T_I32]; arg1.v.i = 2; + arg2.t = &c->types[T_I32]; arg2.v.i = 3; + arg3.t = &c->types[T_I32]; arg3.v.i = 4; + arg4.t = &c->types[T_I32]; arg4.v.i = 5; + arg5.t = &c->types[T_I32]; arg5.v.i = 27; + vdynamic *args[] = { &arg0, &arg1, &arg2, &arg3, &arg4, &arg5 }; + + /* Call through hl_dyn_call */ + vdynamic *ret = hl_dyn_call(&cl, args, 6); + + if (ret == NULL) { + fprintf(stderr, " hl_dyn_call returned NULL\n"); + return TEST_FAIL; + } + + if (ret->v.i != 42) { + fprintf(stderr, " Expected 42, got %d\n", ret->v.i); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: Pointer argument (bytes) + * + * JIT function: (bytes ptr) -> bytes { return ptr } + */ +TEST(callback_ptr_arg) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + /* Function type: (bytes) -> bytes */ + hl_type *arg_types[] = { &c->types[T_BYTES] }; + hl_type *fn_type = test_alloc_fun_type(c, &c->types[T_BYTES], 1, arg_types); + + /* r0 = arg (bytes) */ + hl_type *regs[] = { &c->types[T_BYTES] }; + + hl_opcode ops[] = { + OP1(ORet, 0), /* return r0 */ + }; + + test_alloc_function(c, 0, fn_type, 1, regs, 1, ops); + + int result; + void *fn_ptr = test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + /* Create a closure */ + vclosure cl; + memset(&cl, 0, sizeof(cl)); + cl.t = fn_type; + cl.fun = fn_ptr; + cl.hasValue = 0; + + /* Create argument: pointer to a test value */ + static char test_data[] = "hello"; + vdynamic arg_val; + arg_val.t = &c->types[T_BYTES]; + arg_val.v.ptr = test_data; + vdynamic *args[] = { &arg_val }; + + /* Call through hl_dyn_call */ + vdynamic *ret = hl_dyn_call(&cl, args, 1); + + if (ret == NULL) { + fprintf(stderr, " hl_dyn_call returned NULL\n"); + return TEST_FAIL; + } + + if (ret->v.ptr != test_data) { + fprintf(stderr, " Expected %p, got %p\n", (void*)test_data, ret->v.ptr); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* Test list */ +static test_entry_t tests[] = { + TEST_ENTRY(callback_no_args), + TEST_ENTRY(callback_one_int_arg), + TEST_ENTRY(callback_two_int_args), + TEST_ENTRY(callback_i64_arg), + TEST_ENTRY(callback_f64_arg), + TEST_ENTRY(callback_mixed_args), + TEST_ENTRY(callback_many_int_args), + TEST_ENTRY(callback_ptr_arg), +}; + +int main(int argc, char **argv) { + printf("HashLink AArch64 JIT - C-to-HL Callback Tests\n"); + return run_tests(tests, sizeof(tests) / sizeof(tests[0])); +} diff --git a/other/tests/minimal/test_calls.c b/other/tests/minimal/test_calls.c new file mode 100644 index 000000000..bb0171173 --- /dev/null +++ b/other/tests/minimal/test_calls.c @@ -0,0 +1,446 @@ +/* + * Test function call operations for HashLink AArch64 JIT + * + * Tests: OCall0, OCall1, OCall2, OCall3 + * + * These tests require multiple functions in the hl_code structure. + */ +#include "test_harness.h" +#include + +/* Helper to allocate multiple functions at once */ +static void test_alloc_functions(hl_code *c, int count) { + c->functions = (hl_function*)calloc(count, sizeof(hl_function)); + c->nfunctions = 0; /* Will be incremented as we add */ +} + +/* Add a function to existing array */ +static hl_function *test_add_function(hl_code *c, int findex, hl_type *type, + int nregs, hl_type **regs, + int nops, hl_opcode *ops) { + hl_function *f = &c->functions[c->nfunctions++]; + f->findex = findex; + f->type = type; + f->nregs = nregs; + f->nops = nops; + + f->regs = (hl_type**)malloc(sizeof(hl_type*) * nregs); + memcpy(f->regs, regs, sizeof(hl_type*) * nregs); + + f->ops = (hl_opcode*)malloc(sizeof(hl_opcode) * nops); + memcpy(f->ops, ops, sizeof(hl_opcode) * nops); + + f->debug = NULL; + f->obj = NULL; + f->field.ref = NULL; + f->ref = 0; + + return f; +} + +/* + * Test: Call function with 0 arguments + * + * fn0: () -> i32 { return 42; } + * fn1: () -> i32 { return call0(fn0); } <- entry point + */ +TEST(call0_simple) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + int ints[] = { 42 }; + test_init_ints(c, 1, ints); + + /* Function types */ + hl_type *fn_type_i32 = test_alloc_fun_type(c, &c->types[T_I32], 0, NULL); + + /* Pre-allocate function array */ + test_alloc_functions(c, 2); + + /* fn0: findex=0, returns 42 */ + { + hl_type *regs[] = { &c->types[T_I32] }; + hl_opcode ops[] = { + OP2(OInt, 0, 0), /* r0 = 42 */ + OP1(ORet, 0), + }; + test_add_function(c, 0, fn_type_i32, 1, regs, 2, ops); + } + + /* fn1: findex=1, calls fn0 (entry point) */ + { + hl_type *regs[] = { &c->types[T_I32] }; + hl_opcode ops[] = { + OP2(OCall0, 0, 0), /* r0 = call fn0() */ + OP1(ORet, 0), + }; + test_add_function(c, 1, fn_type_i32, 1, regs, 2, ops); + } + + c->entrypoint = 1; /* fn1 is entry */ + + int result; + int (*fn)(void) = (int(*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + int ret = fn(); + if (ret != 42) { + fprintf(stderr, " Expected 42, got %d\n", ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: Call function with 1 argument + * + * fn0: (i32) -> i32 { return arg + 10; } + * fn1: () -> i32 { return call1(fn0, 32); } <- entry point + */ +TEST(call1_simple) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + int ints[] = { 10, 32 }; + test_init_ints(c, 2, ints); + + /* Function types */ + hl_type *arg_types[] = { &c->types[T_I32] }; + hl_type *fn_type_i32_i32 = test_alloc_fun_type(c, &c->types[T_I32], 1, arg_types); + hl_type *fn_type_i32 = test_alloc_fun_type(c, &c->types[T_I32], 0, NULL); + + test_alloc_functions(c, 2); + + /* fn0: findex=0, returns arg0 + 10 */ + { + hl_type *regs[] = { &c->types[T_I32], &c->types[T_I32], &c->types[T_I32] }; + hl_opcode ops[] = { + /* r0 = first argument (passed in) */ + OP2(OInt, 1, 0), /* r1 = 10 */ + OP3(OAdd, 2, 0, 1), /* r2 = r0 + r1 */ + OP1(ORet, 2), + }; + test_add_function(c, 0, fn_type_i32_i32, 3, regs, 3, ops); + } + + /* fn1: findex=1, calls fn0(32) */ + { + hl_type *regs[] = { &c->types[T_I32], &c->types[T_I32] }; + hl_opcode ops[] = { + OP2(OInt, 1, 1), /* r1 = 32 */ + OP3(OCall1, 0, 0, 1), /* r0 = call fn0(r1) */ + OP1(ORet, 0), + }; + test_add_function(c, 1, fn_type_i32, 2, regs, 3, ops); + } + + c->entrypoint = 1; + + int result; + int (*fn)(void) = (int(*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + int ret = fn(); + if (ret != 42) { /* 32 + 10 = 42 */ + fprintf(stderr, " Expected 42, got %d\n", ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: Call function with 2 arguments + * + * fn0: (i32, i32) -> i32 { return a + b; } + * fn1: () -> i32 { return call2(fn0, 10, 32); } + */ +TEST(call2_simple) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + int ints[] = { 10, 32 }; + test_init_ints(c, 2, ints); + + /* Function types */ + hl_type *arg_types[] = { &c->types[T_I32], &c->types[T_I32] }; + hl_type *fn_type_i32_i32_i32 = test_alloc_fun_type(c, &c->types[T_I32], 2, arg_types); + hl_type *fn_type_i32 = test_alloc_fun_type(c, &c->types[T_I32], 0, NULL); + + test_alloc_functions(c, 2); + + /* fn0: findex=0, returns arg0 + arg1 */ + { + hl_type *regs[] = { &c->types[T_I32], &c->types[T_I32], &c->types[T_I32] }; + hl_opcode ops[] = { + /* r0 = arg0, r1 = arg1 */ + OP3(OAdd, 2, 0, 1), /* r2 = r0 + r1 */ + OP1(ORet, 2), + }; + test_add_function(c, 0, fn_type_i32_i32_i32, 3, regs, 2, ops); + } + + /* fn1: findex=1, calls fn0(10, 32) */ + { + hl_type *regs[] = { &c->types[T_I32], &c->types[T_I32], &c->types[T_I32] }; + hl_opcode ops[] = { + OP2(OInt, 1, 0), /* r1 = 10 */ + OP2(OInt, 2, 1), /* r2 = 32 */ + OP4_CALL2(OCall2, 0, 0, 1, 2), /* r0 = call fn0(r1, r2) */ + OP1(ORet, 0), + }; + test_add_function(c, 1, fn_type_i32, 3, regs, 4, ops); + } + + c->entrypoint = 1; + + int result; + int (*fn)(void) = (int(*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + int ret = fn(); + if (ret != 42) { /* 10 + 32 = 42 */ + fprintf(stderr, " Expected 42, got %d\n", ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: Nested calls + * + * fn0: (i32) -> i32 { return arg * 2; } + * fn1: (i32) -> i32 { return call1(fn0, arg) + 1; } + * fn2: () -> i32 { return call1(fn1, 20); } <- entry (20*2+1 = 41, need 42) + * + * Actually: 21 * 2 = 42 + */ +TEST(nested_calls) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + int ints[] = { 2, 21 }; + test_init_ints(c, 2, ints); + + hl_type *arg_types[] = { &c->types[T_I32] }; + hl_type *fn_type_i32_i32 = test_alloc_fun_type(c, &c->types[T_I32], 1, arg_types); + hl_type *fn_type_i32 = test_alloc_fun_type(c, &c->types[T_I32], 0, NULL); + + test_alloc_functions(c, 3); + + /* fn0: findex=0, returns arg * 2 */ + { + hl_type *regs[] = { &c->types[T_I32], &c->types[T_I32], &c->types[T_I32] }; + hl_opcode ops[] = { + OP2(OInt, 1, 0), /* r1 = 2 */ + OP3(OMul, 2, 0, 1), /* r2 = r0 * 2 */ + OP1(ORet, 2), + }; + test_add_function(c, 0, fn_type_i32_i32, 3, regs, 3, ops); + } + + /* fn1: findex=1, returns call0(fn0, arg) */ + { + hl_type *regs[] = { &c->types[T_I32], &c->types[T_I32] }; + hl_opcode ops[] = { + OP3(OCall1, 1, 0, 0), /* r1 = call fn0(r0) */ + OP1(ORet, 1), + }; + test_add_function(c, 1, fn_type_i32_i32, 2, regs, 2, ops); + } + + /* fn2: findex=2, entry point */ + { + hl_type *regs[] = { &c->types[T_I32], &c->types[T_I32] }; + hl_opcode ops[] = { + OP2(OInt, 1, 1), /* r1 = 21 */ + OP3(OCall1, 0, 1, 1), /* r0 = call fn1(r1) */ + OP1(ORet, 0), + }; + test_add_function(c, 2, fn_type_i32, 2, regs, 3, ops); + } + + c->entrypoint = 2; + + int result; + int (*fn)(void) = (int(*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + int ret = fn(); + if (ret != 42) { /* 21 * 2 = 42 */ + fprintf(stderr, " Expected 42, got %d\n", ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: Recursive call (factorial) + * + * fn0: (i32) -> i32 { + * if (n <= 1) return 1; + * return n * call1(fn0, n-1); + * } + * fn1: () -> i32 { return call1(fn0, 5); } <- 5! = 120 + * + * Note: We want result 42, so let's compute something else + * Let's do: sum from 1 to n recursively + * sum(n) = n + sum(n-1), sum(0) = 0 + * sum(8) = 8+7+6+5+4+3+2+1 = 36... not 42 + * sum(9) = 45... + * + * Let's just verify 5! = 120 works, and accept that as the test value + */ +TEST(recursive_factorial) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + int ints[] = { 1, 5 }; + test_init_ints(c, 2, ints); + + hl_type *arg_types[] = { &c->types[T_I32] }; + hl_type *fn_type_i32_i32 = test_alloc_fun_type(c, &c->types[T_I32], 1, arg_types); + hl_type *fn_type_i32 = test_alloc_fun_type(c, &c->types[T_I32], 0, NULL); + + test_alloc_functions(c, 2); + + /* fn0: findex=0, factorial(n) + * r0 = n + * r1 = 1 (constant) + * r2 = temp + * r3 = n-1 + * r4 = result of recursive call + */ + { + hl_type *regs[] = { + &c->types[T_I32], + &c->types[T_I32], + &c->types[T_I32], + &c->types[T_I32], + &c->types[T_I32], + }; + hl_opcode ops[] = { + OP2(OInt, 1, 0), /* op0: r1 = 1 */ + OP3(OJSLte, 0, 1, 2), /* op1: if n <= 1 goto op3 */ + OP1(OJAlways, 3), /* op2: else goto op6 (skip return 1) */ + /* return 1 path */ + OP0(OLabel), /* op3: label */ + OP1(ORet, 1), /* op4: return 1 */ + /* recursive path */ + OP0(OLabel), /* op5: label */ + OP3(OSub, 3, 0, 1), /* op6: r3 = n - 1 */ + OP3(OCall1, 4, 0, 3), /* op7: r4 = factorial(n-1) */ + OP3(OMul, 2, 0, 4), /* op8: r2 = n * r4 */ + OP1(ORet, 2), /* op9: return r2 */ + }; + test_add_function(c, 0, fn_type_i32_i32, 5, regs, 10, ops); + } + + /* fn1: findex=1, entry point */ + { + hl_type *regs[] = { &c->types[T_I32], &c->types[T_I32] }; + hl_opcode ops[] = { + OP2(OInt, 1, 1), /* r1 = 5 */ + OP3(OCall1, 0, 0, 1), /* r0 = factorial(5) */ + OP1(ORet, 0), + }; + test_add_function(c, 1, fn_type_i32, 2, regs, 3, ops); + } + + c->entrypoint = 1; + + int result; + int (*fn)(void) = (int(*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + int ret = fn(); + if (ret != 120) { /* 5! = 120 */ + fprintf(stderr, " Expected 120, got %d\n", ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: Call with float argument + */ +TEST(call1_float) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + double floats[] = { 10.5, 31.5 }; + test_init_floats(c, 2, floats); + + hl_type *arg_types[] = { &c->types[T_F64] }; + hl_type *fn_type_f64_f64 = test_alloc_fun_type(c, &c->types[T_F64], 1, arg_types); + hl_type *fn_type_f64 = test_alloc_fun_type(c, &c->types[T_F64], 0, NULL); + + test_alloc_functions(c, 2); + + /* fn0: findex=0, returns arg + 10.5 */ + { + hl_type *regs[] = { &c->types[T_F64], &c->types[T_F64], &c->types[T_F64] }; + hl_opcode ops[] = { + OP2(OFloat, 1, 0), /* r1 = 10.5 */ + OP3(OAdd, 2, 0, 1), /* r2 = r0 + r1 */ + OP1(ORet, 2), + }; + test_add_function(c, 0, fn_type_f64_f64, 3, regs, 3, ops); + } + + /* fn1: findex=1, calls fn0(31.5) */ + { + hl_type *regs[] = { &c->types[T_F64], &c->types[T_F64] }; + hl_opcode ops[] = { + OP2(OFloat, 1, 1), /* r1 = 31.5 */ + OP3(OCall1, 0, 0, 1), /* r0 = call fn0(r1) */ + OP1(ORet, 0), + }; + test_add_function(c, 1, fn_type_f64, 2, regs, 3, ops); + } + + c->entrypoint = 1; + + int result; + double (*fn)(void) = (double(*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + double ret = fn(); + if (fabs(ret - 42.0) > 1e-9) { /* 31.5 + 10.5 = 42.0 */ + fprintf(stderr, " Expected 42.0, got %f\n", ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* Test list */ +static test_entry_t tests[] = { + TEST_ENTRY(call0_simple), + TEST_ENTRY(call1_simple), + TEST_ENTRY(call2_simple), + TEST_ENTRY(nested_calls), + TEST_ENTRY(recursive_factorial), + TEST_ENTRY(call1_float), +}; + +int main(int argc, char **argv) { + printf("HashLink AArch64 JIT - Function Call Tests\n"); + return run_tests(tests, sizeof(tests) / sizeof(tests[0])); +} diff --git a/other/tests/minimal/test_closures.c b/other/tests/minimal/test_closures.c new file mode 100644 index 000000000..921329c36 --- /dev/null +++ b/other/tests/minimal/test_closures.c @@ -0,0 +1,280 @@ +/* + * Test closure operations for HashLink AArch64 JIT + * + * Tests: OStaticClosure, OCallClosure + * + * These are key opcodes used in hello.hl's main function. + */ +#include "test_harness.h" + +/* + * Test: Create a static closure and call it with no args + * + * fn0: () -> i32 { return 42; } + * fn1: () -> i32 { + * r0 = static_closure(fn0) ; OStaticClosure + * r1 = call_closure(r0) ; OCallClosure with 0 args + * return r1 + * } + */ +TEST(static_closure_call0) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + int ints[] = { 42 }; + test_init_ints(c, 1, ints); + + /* Function type: () -> i32 */ + hl_type *fn_type_void_i32 = test_alloc_fun_type(c, &c->types[T_I32], 0, NULL); + + /* We need a closure type for the register holding the closure */ + /* For now, use the function type directly */ + + /* Pre-allocate function array */ + c->functions = (hl_function*)calloc(MAX_FUNCTIONS, sizeof(hl_function)); + c->nfunctions = 0; + + /* fn0: findex=0, returns 42 */ + { + hl_type *regs[] = { &c->types[T_I32] }; + hl_opcode ops[] = { + OP2(OInt, 0, 0), /* r0 = 42 */ + OP1(ORet, 0), + }; + hl_function *f = &c->functions[c->nfunctions++]; + f->findex = 0; + f->type = fn_type_void_i32; + f->nregs = 1; + f->nops = 2; + f->regs = (hl_type**)malloc(sizeof(hl_type*) * 1); + f->regs[0] = &c->types[T_I32]; + f->ops = (hl_opcode*)malloc(sizeof(hl_opcode) * 2); + memcpy(f->ops, ops, sizeof(ops)); + } + + /* fn1: findex=1, creates closure and calls it */ + { + /* r0 = closure (pointer type), r1 = result */ + hl_type *regs[] = { fn_type_void_i32, &c->types[T_I32] }; + + /* OCallClosure: p1=dst, p2=closure_reg, p3=nargs, extra=args */ + hl_opcode ops[] = { + OP2(OStaticClosure, 0, 0), /* r0 = closure pointing to fn0 */ + {OCallClosure, 1, 0, 0, NULL}, /* r1 = call_closure(r0) with 0 args */ + OP1(ORet, 1), + }; + + hl_function *f = &c->functions[c->nfunctions++]; + f->findex = 1; + f->type = fn_type_void_i32; + f->nregs = 2; + f->nops = 3; + f->regs = (hl_type**)malloc(sizeof(hl_type*) * 2); + memcpy(f->regs, regs, sizeof(regs)); + f->ops = (hl_opcode*)malloc(sizeof(hl_opcode) * 3); + memcpy(f->ops, ops, sizeof(ops)); + } + + c->entrypoint = 1; + + int result; + int (*fn)(void) = (int(*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + int ret = fn(); + if (ret != 42) { + fprintf(stderr, " Expected 42, got %d\n", ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: Static closure with one argument + * + * fn0: (i32) -> i32 { return arg + 10; } + * fn1: () -> i32 { + * r0 = static_closure(fn0) + * r1 = 32 + * r2 = call_closure(r0, r1) ; 32 + 10 = 42 + * return r2 + * } + */ +TEST(static_closure_call1) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + int ints[] = { 10, 32 }; + test_init_ints(c, 2, ints); + + /* Function types */ + hl_type *arg_types[] = { &c->types[T_I32] }; + hl_type *fn_type_i32_i32 = test_alloc_fun_type(c, &c->types[T_I32], 1, arg_types); + hl_type *fn_type_void_i32 = test_alloc_fun_type(c, &c->types[T_I32], 0, NULL); + + c->functions = (hl_function*)calloc(MAX_FUNCTIONS, sizeof(hl_function)); + c->nfunctions = 0; + + /* fn0: findex=0, returns arg + 10 */ + { + hl_type *regs[] = { &c->types[T_I32], &c->types[T_I32], &c->types[T_I32] }; + hl_opcode ops[] = { + OP2(OInt, 1, 0), /* r1 = 10 */ + OP3(OAdd, 2, 0, 1), /* r2 = r0 + r1 */ + OP1(ORet, 2), + }; + hl_function *f = &c->functions[c->nfunctions++]; + f->findex = 0; + f->type = fn_type_i32_i32; + f->nregs = 3; + f->nops = 3; + f->regs = (hl_type**)malloc(sizeof(hl_type*) * 3); + memcpy(f->regs, regs, sizeof(regs)); + f->ops = (hl_opcode*)malloc(sizeof(hl_opcode) * 3); + memcpy(f->ops, ops, sizeof(ops)); + } + + /* fn1: findex=1, creates closure and calls with arg */ + { + hl_type *regs[] = { fn_type_i32_i32, &c->types[T_I32], &c->types[T_I32] }; + + /* OCallClosure with 1 arg: extra[0] = arg register */ + static int extra[] = { 1 }; /* r1 is the argument */ + hl_opcode ops[] = { + OP2(OStaticClosure, 0, 0), /* r0 = closure pointing to fn0 */ + OP2(OInt, 1, 1), /* r1 = 32 */ + {OCallClosure, 2, 0, 1, extra}, /* r2 = call_closure(r0, r1) */ + OP1(ORet, 2), + }; + + hl_function *f = &c->functions[c->nfunctions++]; + f->findex = 1; + f->type = fn_type_void_i32; + f->nregs = 3; + f->nops = 4; + f->regs = (hl_type**)malloc(sizeof(hl_type*) * 3); + memcpy(f->regs, regs, sizeof(regs)); + f->ops = (hl_opcode*)malloc(sizeof(hl_opcode) * 4); + memcpy(f->ops, ops, sizeof(ops)); + } + + c->entrypoint = 1; + + int result; + int (*fn)(void) = (int(*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + int ret = fn(); + if (ret != 42) { + fprintf(stderr, " Expected 42, got %d\n", ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: Static closure with two arguments + * + * fn0: (i32, i32) -> i32 { return arg0 + arg1; } + * fn1: () -> i32 { + * r0 = static_closure(fn0) + * r1 = 10 + * r2 = 32 + * r3 = call_closure(r0, r1, r2) ; 10 + 32 = 42 + * return r3 + * } + * + * This matches the pattern used in hello.hl's F27. + */ +TEST(static_closure_call2) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + int ints[] = { 10, 32 }; + test_init_ints(c, 2, ints); + + /* Function types */ + hl_type *arg_types[] = { &c->types[T_I32], &c->types[T_I32] }; + hl_type *fn_type_i32_i32_i32 = test_alloc_fun_type(c, &c->types[T_I32], 2, arg_types); + hl_type *fn_type_void_i32 = test_alloc_fun_type(c, &c->types[T_I32], 0, NULL); + + c->functions = (hl_function*)calloc(MAX_FUNCTIONS, sizeof(hl_function)); + c->nfunctions = 0; + + /* fn0: findex=0, returns arg0 + arg1 */ + { + hl_type *regs[] = { &c->types[T_I32], &c->types[T_I32], &c->types[T_I32] }; + hl_opcode ops[] = { + OP3(OAdd, 2, 0, 1), /* r2 = r0 + r1 */ + OP1(ORet, 2), + }; + hl_function *f = &c->functions[c->nfunctions++]; + f->findex = 0; + f->type = fn_type_i32_i32_i32; + f->nregs = 3; + f->nops = 2; + f->regs = (hl_type**)malloc(sizeof(hl_type*) * 3); + memcpy(f->regs, regs, sizeof(regs)); + f->ops = (hl_opcode*)malloc(sizeof(hl_opcode) * 2); + memcpy(f->ops, ops, sizeof(ops)); + } + + /* fn1: findex=1, creates closure and calls with 2 args */ + { + hl_type *regs[] = { fn_type_i32_i32_i32, &c->types[T_I32], &c->types[T_I32], &c->types[T_I32] }; + + /* OCallClosure with 2 args: extra[0] = arg0 reg, extra[1] = arg1 reg */ + static int extra[] = { 1, 2 }; /* r1 and r2 are the arguments */ + hl_opcode ops[] = { + OP2(OStaticClosure, 0, 0), /* r0 = closure pointing to fn0 */ + OP2(OInt, 1, 0), /* r1 = 10 */ + OP2(OInt, 2, 1), /* r2 = 32 */ + {OCallClosure, 3, 0, 2, extra}, /* r3 = call_closure(r0, r1, r2) */ + OP1(ORet, 3), + }; + + hl_function *f = &c->functions[c->nfunctions++]; + f->findex = 1; + f->type = fn_type_void_i32; + f->nregs = 4; + f->nops = 5; + f->regs = (hl_type**)malloc(sizeof(hl_type*) * 4); + memcpy(f->regs, regs, sizeof(regs)); + f->ops = (hl_opcode*)malloc(sizeof(hl_opcode) * 5); + memcpy(f->ops, ops, sizeof(ops)); + } + + c->entrypoint = 1; + + int result; + int (*fn)(void) = (int(*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + int ret = fn(); + if (ret != 42) { + fprintf(stderr, " Expected 42, got %d\n", ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* Test list */ +static test_entry_t tests[] = { + TEST_ENTRY(static_closure_call0), + TEST_ENTRY(static_closure_call1), + TEST_ENTRY(static_closure_call2), +}; + +int main(int argc, char **argv) { + printf("HashLink AArch64 JIT - Closure Tests\n"); + return run_tests(tests, sizeof(tests) / sizeof(tests[0])); +} diff --git a/other/tests/minimal/test_control_flow.c b/other/tests/minimal/test_control_flow.c new file mode 100644 index 000000000..e854eb37e --- /dev/null +++ b/other/tests/minimal/test_control_flow.c @@ -0,0 +1,560 @@ +/* + * Test control flow operations for HashLink AArch64 JIT + * + * Tests: OLabel, OJAlways, OJTrue, OJFalse, OJSLt, OJSGte, OJEq, OJNotEq + * + * Jump offset semantics: target = (currentOpIndex + 1) + offset + * Example: at op1 with offset=1 -> target = (1+1)+1 = 3 + */ +#include "test_harness.h" + +/* + * Test: Unconditional jump - skip one instruction + * + * op0: int r0, 0 ; r0 = 42 + * op1: jalways +1 ; jump to op3 (target = 2+1 = 3) + * op2: int r0, 1 ; r0 = 100 (SKIPPED) + * op3: ret r0 ; return 42 + */ +TEST(jump_always_skip) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + int ints[] = { 42, 100 }; + test_init_ints(c, 2, ints); + + hl_type *fn_type = test_alloc_fun_type(c, &c->types[T_I32], 0, NULL); + hl_type *regs[] = { &c->types[T_I32] }; + + hl_opcode ops[] = { + OP2(OInt, 0, 0), /* op0: r0 = 42 */ + OP1(OJAlways, 1), /* op1: jump to op3 (target = 2+1 = 3) */ + OP2(OInt, 0, 1), /* op2: r0 = 100 (skipped) */ + OP1(ORet, 0), /* op3: return r0 */ + }; + + test_alloc_function(c, 0, fn_type, 1, regs, 4, ops); + + int result; + int (*fn)(void) = (int(*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + int ret = fn(); + if (ret != 42) { + fprintf(stderr, " Expected 42, got %d\n", ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: Jump if true (taken) + * + * op0: bool r0, 1 ; r0 = true + * op1: int r1, 0 ; r1 = 42 + * op2: jtrue r0, +1 ; if r0 goto op4 (target = 3+1 = 4) + * op3: int r1, 1 ; r1 = 100 (skipped) + * op4: ret r1 ; return 42 + */ +TEST(jump_true_taken) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + int ints[] = { 42, 100 }; + test_init_ints(c, 2, ints); + + hl_type *fn_type = test_alloc_fun_type(c, &c->types[T_I32], 0, NULL); + hl_type *regs[] = { &c->types[T_BOOL], &c->types[T_I32] }; + + hl_opcode ops[] = { + OP2(OBool, 0, 1), /* op0: r0 = true */ + OP2(OInt, 1, 0), /* op1: r1 = 42 */ + OP2(OJTrue, 0, 1), /* op2: if r0 goto op4 (target = 3+1 = 4) */ + OP2(OInt, 1, 1), /* op3: r1 = 100 (skipped) */ + OP1(ORet, 1), /* op4: return r1 */ + }; + + test_alloc_function(c, 0, fn_type, 2, regs, 5, ops); + + int result; + int (*fn)(void) = (int(*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + int ret = fn(); + if (ret != 42) { + fprintf(stderr, " Expected 42, got %d\n", ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: Jump if true (not taken) + */ +TEST(jump_true_not_taken) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + int ints[] = { 42, 100 }; + test_init_ints(c, 2, ints); + + hl_type *fn_type = test_alloc_fun_type(c, &c->types[T_I32], 0, NULL); + hl_type *regs[] = { &c->types[T_BOOL], &c->types[T_I32] }; + + hl_opcode ops[] = { + OP2(OBool, 0, 0), /* op0: r0 = false */ + OP2(OInt, 1, 0), /* op1: r1 = 42 */ + OP2(OJTrue, 0, 1), /* op2: if r0 goto op4 (not taken) */ + OP2(OInt, 1, 1), /* op3: r1 = 100 */ + OP1(ORet, 1), /* op4: return r1 */ + }; + + test_alloc_function(c, 0, fn_type, 2, regs, 5, ops); + + int result; + int (*fn)(void) = (int(*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + int ret = fn(); + if (ret != 100) { + fprintf(stderr, " Expected 100, got %d\n", ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: Jump if false (taken) + */ +TEST(jump_false_taken) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + int ints[] = { 42, 100 }; + test_init_ints(c, 2, ints); + + hl_type *fn_type = test_alloc_fun_type(c, &c->types[T_I32], 0, NULL); + hl_type *regs[] = { &c->types[T_BOOL], &c->types[T_I32] }; + + hl_opcode ops[] = { + OP2(OBool, 0, 0), /* op0: r0 = false */ + OP2(OInt, 1, 0), /* op1: r1 = 42 */ + OP2(OJFalse, 0, 1), /* op2: if !r0 goto op4 (target = 3+1 = 4) */ + OP2(OInt, 1, 1), /* op3: r1 = 100 (skipped) */ + OP1(ORet, 1), /* op4: return r1 */ + }; + + test_alloc_function(c, 0, fn_type, 2, regs, 5, ops); + + int result; + int (*fn)(void) = (int(*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + int ret = fn(); + if (ret != 42) { + fprintf(stderr, " Expected 42, got %d\n", ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: Jump if false (not taken) + */ +TEST(jump_false_not_taken) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + int ints[] = { 42, 100 }; + test_init_ints(c, 2, ints); + + hl_type *fn_type = test_alloc_fun_type(c, &c->types[T_I32], 0, NULL); + hl_type *regs[] = { &c->types[T_BOOL], &c->types[T_I32] }; + + hl_opcode ops[] = { + OP2(OBool, 0, 1), /* op0: r0 = true */ + OP2(OInt, 1, 0), /* op1: r1 = 42 */ + OP2(OJFalse, 0, 1), /* op2: if !r0 goto op4 (not taken) */ + OP2(OInt, 1, 1), /* op3: r1 = 100 */ + OP1(ORet, 1), /* op4: return r1 */ + }; + + test_alloc_function(c, 0, fn_type, 2, regs, 5, ops); + + int result; + int (*fn)(void) = (int(*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + int ret = fn(); + if (ret != 100) { + fprintf(stderr, " Expected 100, got %d\n", ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: Jump if signed less than (taken): 5 < 10 + */ +TEST(jump_slt_taken) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + int ints[] = { 5, 10, 42, 100 }; + test_init_ints(c, 4, ints); + + hl_type *fn_type = test_alloc_fun_type(c, &c->types[T_I32], 0, NULL); + hl_type *regs[] = { &c->types[T_I32], &c->types[T_I32], &c->types[T_I32] }; + + hl_opcode ops[] = { + OP2(OInt, 0, 0), /* op0: r0 = 5 */ + OP2(OInt, 1, 1), /* op1: r1 = 10 */ + OP2(OInt, 2, 2), /* op2: r2 = 42 */ + OP3(OJSLt, 0, 1, 1), /* op3: if r0 < r1 goto op5 (target = 4+1 = 5) */ + OP2(OInt, 2, 3), /* op4: r2 = 100 (skipped) */ + OP1(ORet, 2), /* op5: return r2 */ + }; + + test_alloc_function(c, 0, fn_type, 3, regs, 6, ops); + + int result; + int (*fn)(void) = (int(*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + int ret = fn(); + if (ret != 42) { + fprintf(stderr, " Expected 42, got %d\n", ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: Jump if signed less than (not taken): 10 < 5 + */ +TEST(jump_slt_not_taken) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + int ints[] = { 10, 5, 42, 100 }; + test_init_ints(c, 4, ints); + + hl_type *fn_type = test_alloc_fun_type(c, &c->types[T_I32], 0, NULL); + hl_type *regs[] = { &c->types[T_I32], &c->types[T_I32], &c->types[T_I32] }; + + hl_opcode ops[] = { + OP2(OInt, 0, 0), /* op0: r0 = 10 */ + OP2(OInt, 1, 1), /* op1: r1 = 5 */ + OP2(OInt, 2, 2), /* op2: r2 = 42 */ + OP3(OJSLt, 0, 1, 1), /* op3: if r0 < r1 goto op5 (not taken) */ + OP2(OInt, 2, 3), /* op4: r2 = 100 */ + OP1(ORet, 2), /* op5: return r2 */ + }; + + test_alloc_function(c, 0, fn_type, 3, regs, 6, ops); + + int result; + int (*fn)(void) = (int(*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + int ret = fn(); + if (ret != 100) { + fprintf(stderr, " Expected 100, got %d\n", ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: Jump if signed greater-or-equal (taken): 10 >= 5 + */ +TEST(jump_sgte_taken) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + int ints[] = { 10, 5, 42, 100 }; + test_init_ints(c, 4, ints); + + hl_type *fn_type = test_alloc_fun_type(c, &c->types[T_I32], 0, NULL); + hl_type *regs[] = { &c->types[T_I32], &c->types[T_I32], &c->types[T_I32] }; + + hl_opcode ops[] = { + OP2(OInt, 0, 0), /* op0: r0 = 10 */ + OP2(OInt, 1, 1), /* op1: r1 = 5 */ + OP2(OInt, 2, 2), /* op2: r2 = 42 */ + OP3(OJSGte, 0, 1, 1), /* op3: if r0 >= r1 goto op5 (target = 4+1 = 5) */ + OP2(OInt, 2, 3), /* op4: r2 = 100 (skipped) */ + OP1(ORet, 2), /* op5: return r2 */ + }; + + test_alloc_function(c, 0, fn_type, 3, regs, 6, ops); + + int result; + int (*fn)(void) = (int(*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + int ret = fn(); + if (ret != 42) { + fprintf(stderr, " Expected 42, got %d\n", ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: Jump if equal (taken): 42 == 42 + */ +TEST(jump_eq_taken) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + int ints[] = { 42, 100 }; + test_init_ints(c, 2, ints); + + hl_type *fn_type = test_alloc_fun_type(c, &c->types[T_I32], 0, NULL); + hl_type *regs[] = { &c->types[T_I32], &c->types[T_I32], &c->types[T_I32] }; + + hl_opcode ops[] = { + OP2(OInt, 0, 0), /* op0: r0 = 42 */ + OP2(OInt, 1, 0), /* op1: r1 = 42 */ + OP2(OInt, 2, 0), /* op2: r2 = 42 */ + OP3(OJEq, 0, 1, 1), /* op3: if r0 == r1 goto op5 */ + OP2(OInt, 2, 1), /* op4: r2 = 100 (skipped) */ + OP1(ORet, 2), /* op5: return r2 */ + }; + + test_alloc_function(c, 0, fn_type, 3, regs, 6, ops); + + int result; + int (*fn)(void) = (int(*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + int ret = fn(); + if (ret != 42) { + fprintf(stderr, " Expected 42, got %d\n", ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: Jump if equal (not taken): 42 == 100 + */ +TEST(jump_eq_not_taken) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + int ints[] = { 42, 100 }; + test_init_ints(c, 2, ints); + + hl_type *fn_type = test_alloc_fun_type(c, &c->types[T_I32], 0, NULL); + hl_type *regs[] = { &c->types[T_I32], &c->types[T_I32], &c->types[T_I32] }; + + hl_opcode ops[] = { + OP2(OInt, 0, 0), /* op0: r0 = 42 */ + OP2(OInt, 1, 1), /* op1: r1 = 100 */ + OP2(OInt, 2, 0), /* op2: r2 = 42 */ + OP3(OJEq, 0, 1, 1), /* op3: if r0 == r1 goto op5 (not taken) */ + OP2(OInt, 2, 1), /* op4: r2 = 100 */ + OP1(ORet, 2), /* op5: return r2 */ + }; + + test_alloc_function(c, 0, fn_type, 3, regs, 6, ops); + + int result; + int (*fn)(void) = (int(*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + int ret = fn(); + if (ret != 100) { + fprintf(stderr, " Expected 100, got %d\n", ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: Jump if not equal (taken): 42 != 100 + */ +TEST(jump_neq_taken) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + int ints[] = { 42, 100 }; + test_init_ints(c, 2, ints); + + hl_type *fn_type = test_alloc_fun_type(c, &c->types[T_I32], 0, NULL); + hl_type *regs[] = { &c->types[T_I32], &c->types[T_I32], &c->types[T_I32] }; + + hl_opcode ops[] = { + OP2(OInt, 0, 0), /* op0: r0 = 42 */ + OP2(OInt, 1, 1), /* op1: r1 = 100 */ + OP2(OInt, 2, 0), /* op2: r2 = 42 */ + OP3(OJNotEq, 0, 1, 1), /* op3: if r0 != r1 goto op5 */ + OP2(OInt, 2, 1), /* op4: r2 = 100 (skipped) */ + OP1(ORet, 2), /* op5: return r2 */ + }; + + test_alloc_function(c, 0, fn_type, 3, regs, 6, ops); + + int result; + int (*fn)(void) = (int(*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + int ret = fn(); + if (ret != 42) { + fprintf(stderr, " Expected 42, got %d\n", ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: Simple loop - sum 1 to 5 = 15 + * + * r0 = counter (starts at 1) + * r1 = sum (starts at 0) + * r2 = limit (5) + * + * loop: + * sum += counter + * counter++ + * if counter <= limit goto loop + * return sum + */ +TEST(simple_loop_sum) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + int ints[] = { 1, 0, 5 }; + test_init_ints(c, 3, ints); + + hl_type *fn_type = test_alloc_fun_type(c, &c->types[T_I32], 0, NULL); + hl_type *regs[] = { + &c->types[T_I32], /* r0: counter */ + &c->types[T_I32], /* r1: sum */ + &c->types[T_I32], /* r2: limit */ + }; + + hl_opcode ops[] = { + OP2(OInt, 0, 0), /* op0: r0 = 1 (counter) */ + OP2(OInt, 1, 1), /* op1: r1 = 0 (sum) */ + OP2(OInt, 2, 2), /* op2: r2 = 5 (limit) */ + /* loop body starts at op3 */ + OP0(OLabel), /* op3: loop target */ + OP3(OAdd, 1, 1, 0), /* op4: sum += counter */ + OP1(OIncr, 0), /* op5: counter++ */ + OP3(OJSLte, 0, 2, -4), /* op6: if counter <= limit goto op3 (target = 7-4 = 3) */ + OP1(ORet, 1), /* op7: return sum */ + }; + + test_alloc_function(c, 0, fn_type, 3, regs, 8, ops); + + int result; + int (*fn)(void) = (int(*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + int ret = fn(); + if (ret != 15) { /* 1+2+3+4+5 = 15 */ + fprintf(stderr, " Expected 15, got %d\n", ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: Signed comparison with negative numbers: -5 < 5 + */ +TEST(jump_slt_negative) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + int ints[] = { -5, 5, 42, 100 }; + test_init_ints(c, 4, ints); + + hl_type *fn_type = test_alloc_fun_type(c, &c->types[T_I32], 0, NULL); + hl_type *regs[] = { &c->types[T_I32], &c->types[T_I32], &c->types[T_I32] }; + + hl_opcode ops[] = { + OP2(OInt, 0, 0), /* op0: r0 = -5 */ + OP2(OInt, 1, 1), /* op1: r1 = 5 */ + OP2(OInt, 2, 2), /* op2: r2 = 42 */ + OP3(OJSLt, 0, 1, 1), /* op3: if r0 < r1 goto op5 (target = 4+1 = 5) */ + OP2(OInt, 2, 3), /* op4: r2 = 100 (skipped) */ + OP1(ORet, 2), /* op5: return r2 */ + }; + + test_alloc_function(c, 0, fn_type, 3, regs, 6, ops); + + int result; + int (*fn)(void) = (int(*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + int ret = fn(); + if (ret != 42) { + fprintf(stderr, " Expected 42, got %d\n", ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* Test list */ +static test_entry_t tests[] = { + TEST_ENTRY(jump_always_skip), + TEST_ENTRY(jump_true_taken), + TEST_ENTRY(jump_true_not_taken), + TEST_ENTRY(jump_false_taken), + TEST_ENTRY(jump_false_not_taken), + TEST_ENTRY(jump_slt_taken), + TEST_ENTRY(jump_slt_not_taken), + TEST_ENTRY(jump_sgte_taken), + TEST_ENTRY(jump_eq_taken), + TEST_ENTRY(jump_eq_not_taken), + TEST_ENTRY(jump_neq_taken), + TEST_ENTRY(simple_loop_sum), + TEST_ENTRY(jump_slt_negative), +}; + +int main(int argc, char **argv) { + printf("HashLink AArch64 JIT - Control Flow Tests\n"); + return run_tests(tests, sizeof(tests) / sizeof(tests[0])); +} diff --git a/other/tests/minimal/test_dynamic.c b/other/tests/minimal/test_dynamic.c new file mode 100644 index 000000000..4a2cb6281 --- /dev/null +++ b/other/tests/minimal/test_dynamic.c @@ -0,0 +1,294 @@ +/* + * Test dynamic object operations for HashLink AArch64 JIT + * + * Tests: ODynGet, ODynSet, OToVirtual, OToDyn + * + * These are key opcodes used in hello.hl for dynamic field access. + */ +#include "test_harness.h" + +/* Helper to create a HDYN type */ +static hl_type *get_dyn_type(hl_code *c) { + if (c->ntypes >= MAX_TYPES) return NULL; + int idx = c->ntypes++; + hl_type *t = &c->types[idx]; + memset(t, 0, sizeof(hl_type)); + t->kind = HDYN; + return t; +} + +/* Helper to create a virtual type with fields */ +static hl_type *create_virtual_type(hl_code *c, int nfields, const char **field_names, hl_type **field_types) { + if (c->ntypes >= MAX_TYPES) return NULL; + + int idx = c->ntypes++; + hl_type *t = &c->types[idx]; + memset(t, 0, sizeof(hl_type)); + + t->kind = HVIRTUAL; + t->virt = (hl_type_virtual*)calloc(1, sizeof(hl_type_virtual)); + t->virt->nfields = nfields; + + if (nfields > 0) { + t->virt->fields = (hl_obj_field*)calloc(nfields, sizeof(hl_obj_field)); + for (int i = 0; i < nfields; i++) { + t->virt->fields[i].name = (uchar*)field_names[i]; + t->virt->fields[i].t = field_types[i]; + t->virt->fields[i].hashed_name = hl_hash_gen(hl_get_ustring(c, 0), true); /* placeholder */ + } + } + + return t; +} + +/* + * Test: Convert i32 to dynamic with OToDyn + * + * r0 = 42 + * r1 = to_dyn(r0) + * return r0 ; just verify we don't crash + */ +TEST(to_dyn_i32) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + int ints[] = { 42 }; + test_init_ints(c, 1, ints); + + hl_type *dyn_type = get_dyn_type(c); + if (!dyn_type) return TEST_FAIL; + + hl_type *fn_type = test_alloc_fun_type(c, &c->types[T_I32], 0, NULL); + hl_type *regs[] = { &c->types[T_I32], dyn_type }; + + hl_opcode ops[] = { + OP2(OInt, 0, 0), /* r0 = 42 */ + OP2(OToDyn, 1, 0), /* r1 = to_dyn(r0) */ + OP1(ORet, 0), /* return r0 */ + }; + + test_alloc_function(c, 0, fn_type, 2, regs, 3, ops); + + int result; + int (*fn)(void) = (int(*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + int ret = fn(); + if (ret != 42) { + fprintf(stderr, " Expected 42, got %d\n", ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: OMov with various types + * + * r0 = 42 + * r1 = mov r0 + * return r1 + */ +TEST(mov_i32) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + int ints[] = { 42 }; + test_init_ints(c, 1, ints); + + hl_type *fn_type = test_alloc_fun_type(c, &c->types[T_I32], 0, NULL); + hl_type *regs[] = { &c->types[T_I32], &c->types[T_I32] }; + + hl_opcode ops[] = { + OP2(OInt, 0, 0), /* r0 = 42 */ + OP2(OMov, 1, 0), /* r1 = r0 */ + OP1(ORet, 1), + }; + + test_alloc_function(c, 0, fn_type, 2, regs, 3, ops); + + int result; + int (*fn)(void) = (int(*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + int ret = fn(); + if (ret != 42) { + fprintf(stderr, " Expected 42, got %d\n", ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: ONull - load null pointer + * + * r0 = null + * r1 = 42 + * return r1 ; just verify null doesn't crash us + */ +TEST(null_load) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + int ints[] = { 42 }; + test_init_ints(c, 1, ints); + + hl_type *dyn_type = get_dyn_type(c); + if (!dyn_type) return TEST_FAIL; + + hl_type *fn_type = test_alloc_fun_type(c, &c->types[T_I32], 0, NULL); + hl_type *regs[] = { dyn_type, &c->types[T_I32] }; + + hl_opcode ops[] = { + OP1(ONull, 0), /* r0 = null */ + OP2(OInt, 1, 0), /* r1 = 42 */ + OP1(ORet, 1), + }; + + test_alloc_function(c, 0, fn_type, 2, regs, 3, ops); + + int result; + int (*fn)(void) = (int(*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + int ret = fn(); + if (ret != 42) { + fprintf(stderr, " Expected 42, got %d\n", ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: OJNull / OJNotNull - null check branches + * + * r0 = null + * if r0 == null goto L1 + * r1 = 0 ; should not reach here + * jmp L2 + * L1: + * r1 = 42 ; should reach here + * L2: + * return r1 + */ +TEST(jnull_branch) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + int ints[] = { 0, 42 }; + test_init_ints(c, 2, ints); + + hl_type *dyn_type = get_dyn_type(c); + if (!dyn_type) return TEST_FAIL; + + hl_type *fn_type = test_alloc_fun_type(c, &c->types[T_I32], 0, NULL); + hl_type *regs[] = { dyn_type, &c->types[T_I32] }; + + hl_opcode ops[] = { + OP1(ONull, 0), /* op0: r0 = null */ + OP2(OJNull, 0, 2), /* op1: if r0 == null goto op4 (1+1+2=4) */ + OP2(OInt, 1, 0), /* op2: r1 = 0 (not reached) */ + OP1(OJAlways, 1), /* op3: goto op5 (3+1+1=5) */ + OP0(OLabel), /* op4: label */ + OP2(OInt, 1, 1), /* op5: r1 = 42 */ + OP0(OLabel), /* op6: label */ + OP1(ORet, 1), /* op7: return r1 */ + }; + + test_alloc_function(c, 0, fn_type, 2, regs, 8, ops); + + int result; + int (*fn)(void) = (int(*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + int ret = fn(); + if (ret != 42) { + fprintf(stderr, " Expected 42, got %d\n", ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: OJNotNull branch + * + * r0 = 1 (non-null when treated as pointer) + * if r0 != null goto L1 + * r1 = 0 + * jmp L2 + * L1: + * r1 = 42 + * L2: + * return r1 + */ +TEST(jnotnull_branch) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + int ints[] = { 0, 42 }; + test_init_ints(c, 2, ints); + + hl_type *fn_type = test_alloc_fun_type(c, &c->types[T_I32], 0, NULL); + /* Use BYTES type for the "pointer" register */ + hl_type *regs[] = { &c->types[T_BYTES], &c->types[T_I32] }; + + /* We'll use OString to get a non-null pointer */ + c->nstrings = 1; + c->strings = (char**)malloc(sizeof(char*)); + c->strings[0] = "x"; + c->strings_lens = (int*)malloc(sizeof(int)); + c->strings_lens[0] = 1; + c->ustrings = (uchar**)calloc(1, sizeof(uchar*)); + + hl_opcode ops[] = { + OP2(OString, 0, 0), /* op0: r0 = "x" (non-null) */ + OP2(OJNotNull, 0, 2), /* op1: if r0 != null goto op4 */ + OP2(OInt, 1, 0), /* op2: r1 = 0 (not reached) */ + OP1(OJAlways, 1), /* op3: goto op5 */ + OP0(OLabel), /* op4: label */ + OP2(OInt, 1, 1), /* op5: r1 = 42 */ + OP0(OLabel), /* op6: label */ + OP1(ORet, 1), /* op7: return r1 */ + }; + + test_alloc_function(c, 0, fn_type, 2, regs, 8, ops); + + int result; + int (*fn)(void) = (int(*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + int ret = fn(); + if (ret != 42) { + fprintf(stderr, " Expected 42, got %d\n", ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* Test list */ +static test_entry_t tests[] = { + TEST_ENTRY(to_dyn_i32), + TEST_ENTRY(mov_i32), + TEST_ENTRY(null_load), + TEST_ENTRY(jnull_branch), + TEST_ENTRY(jnotnull_branch), +}; + +int main(int argc, char **argv) { + printf("HashLink AArch64 JIT - Dynamic/Null Operations Tests\n"); + return run_tests(tests, sizeof(tests) / sizeof(tests[0])); +} diff --git a/other/tests/minimal/test_enum.c b/other/tests/minimal/test_enum.c new file mode 100644 index 000000000..255e565b6 --- /dev/null +++ b/other/tests/minimal/test_enum.c @@ -0,0 +1,327 @@ +/* + * Test enum operations for HashLink AArch64 JIT + * + * Tests: OEnumAlloc, OEnumField, OSetEnumField, OEnumIndex, OMakeEnum + */ +#include "test_harness.h" + +/* + * Helper to create an enum type with a single construct that has pointer fields. + * This is similar to how Option or similar sum types work. + * + * Construct 0: has `nfields` pointer-sized fields at 8-byte offsets starting at offset 8 + * (offset 0 is typically the enum tag/index) + */ +static hl_type *create_enum_type(hl_code *c, const char *name, int nfields) { + if (c->ntypes >= MAX_TYPES) { + fprintf(stderr, "Too many types\n"); + return NULL; + } + + int idx = c->ntypes++; + hl_type *t = &c->types[idx]; + memset(t, 0, sizeof(hl_type)); + + t->kind = HENUM; + t->tenum = (hl_type_enum*)calloc(1, sizeof(hl_type_enum)); + t->tenum->name = (const uchar*)name; + t->tenum->nconstructs = 1; + t->tenum->constructs = (hl_enum_construct*)calloc(1, sizeof(hl_enum_construct)); + + hl_enum_construct *cons = &t->tenum->constructs[0]; + cons->name = (const uchar*)"Cons"; + cons->nparams = nfields; + cons->hasptr = true; + + if (nfields > 0) { + cons->params = (hl_type**)calloc(nfields, sizeof(hl_type*)); + cons->offsets = (int*)calloc(nfields, sizeof(int)); + for (int i = 0; i < nfields; i++) { + cons->params[i] = &c->types[T_I64]; /* Use i64/pointer type */ + cons->offsets[i] = 8 + i * 8; /* Fields start at offset 8 (after tag) */ + } + } + + /* Size = 8 (tag) + nfields * 8 */ + cons->size = 8 + nfields * 8; + + return t; +} + +/* + * Test: OEnumField - extract a field from an enum, then use it + * + * This test specifically targets the bug where OEnumField doesn't clear + * the destination register binding, causing stale values to be used. + * + * The pattern is: + * r1 = alloc_enum ; allocate enum + * set_enum_field r1, 0, r0 ; store a value (42) into field 0 + * r2 = enum_field r1, 0 ; extract field 0 -> should be 42 + * return r2 ; return extracted value + * + * If the register binding bug exists, r2 might return garbage instead of 42. + */ +TEST(enum_field_basic) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + /* Create enum type with 1 field */ + hl_type *enum_t = create_enum_type(c, "TestEnum", 1); + if (!enum_t) return TEST_FAIL; + + /* Function: () -> i64 */ + hl_type *ret_type = &c->types[T_I64]; + hl_type *fn_type = test_alloc_fun_type(c, ret_type, 0, NULL); + + /* Registers: + * r0: i64 (temp for value 42) + * r1: enum (the allocated enum) + * r2: i64 (extracted field value) + */ + hl_type *regs[] = { &c->types[T_I64], enum_t, &c->types[T_I64] }; + + /* Integer constants */ + int ints[] = { 42 }; + test_init_ints(c, 1, ints); + + /* Opcodes */ + hl_opcode ops[] = { + OP2(OInt, 0, 0), /* r0 = 42 */ + OP2(OEnumAlloc, 1, 0), /* r1 = alloc enum (construct 0) */ + OP3(OSetEnumField, 1, 0, 0), /* r1.field[0] = r0 (42) */ + { OEnumField, 2, 1, 0, (int*)(intptr_t)0 }, /* r2 = r1.field[0] (extra=0) */ + OP1(ORet, 2), /* return r2 */ + }; + + test_alloc_function(c, 0, fn_type, 3, regs, 5, ops); + + int result; + int64_t (*func)(void) = (int64_t (*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + int64_t ret = func(); + if (ret != 42) { + printf("\n Expected 42, got %ld\n", (long)ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: OEnumField with multiple fields and uses + * + * This test more closely matches the uvsample crash pattern: + * - Multiple OEnumField extractions + * - The extracted values are then used as function arguments + * + * Pattern: + * r0 = 100 + * r1 = 200 + * r2 = alloc_enum + * set_enum_field r2, 0, r0 ; field 0 = 100 + * set_enum_field r2, 1, r1 ; field 1 = 200 + * r3 = enum_field r2, 0 ; r3 = 100 + * r4 = enum_field r2, 1 ; r4 = 200 + * r5 = r3 + r4 ; r5 = 300 + * return r5 + */ +TEST(enum_field_multiple) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + /* Create enum type with 2 fields */ + hl_type *enum_t = create_enum_type(c, "TestEnum2", 2); + if (!enum_t) return TEST_FAIL; + + /* Function: () -> i64 */ + hl_type *ret_type = &c->types[T_I64]; + hl_type *fn_type = test_alloc_fun_type(c, ret_type, 0, NULL); + + /* Registers: + * r0: i64 (value 100) + * r1: i64 (value 200) + * r2: enum + * r3: i64 (extracted field 0) + * r4: i64 (extracted field 1) + * r5: i64 (sum) + */ + hl_type *regs[] = { + &c->types[T_I64], &c->types[T_I64], enum_t, + &c->types[T_I64], &c->types[T_I64], &c->types[T_I64] + }; + + /* Integer constants */ + int ints[] = { 100, 200 }; + test_init_ints(c, 2, ints); + + /* Opcodes */ + hl_opcode ops[] = { + OP2(OInt, 0, 0), /* r0 = 100 */ + OP2(OInt, 1, 1), /* r1 = 200 */ + OP2(OEnumAlloc, 2, 0), /* r2 = alloc enum */ + OP3(OSetEnumField, 2, 0, 0), /* r2.field[0] = r0 */ + OP3(OSetEnumField, 2, 1, 1), /* r2.field[1] = r1 */ + { OEnumField, 3, 2, 0, (int*)(intptr_t)0 }, /* r3 = r2.field[0] */ + { OEnumField, 4, 2, 0, (int*)(intptr_t)1 }, /* r4 = r2.field[1] */ + OP3(OAdd, 5, 3, 4), /* r5 = r3 + r4 */ + OP1(ORet, 5), /* return r5 */ + }; + + test_alloc_function(c, 0, fn_type, 6, regs, 9, ops); + + int result; + int64_t (*func)(void) = (int64_t (*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + int64_t ret = func(); + if (ret != 300) { + printf("\n Expected 300, got %ld\n", (long)ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: OEnumField followed by function call + * + * This is the exact pattern that causes the uvsample crash: + * - Extract a field from enum + * - Pass it as argument to a function call + * + * If dst register binding isn't cleared, the call might use a stale value. + * + * Pattern: + * r0 = 42 + * r1 = alloc_enum + * set_enum_field r1, 0, r0 + * r2 = enum_field r1, 0 ; extract 42 + * r3 = call identity(r2) ; call function with extracted value + * return r3 + */ + +/* Native identity function for testing */ +static int64_t native_identity(int64_t x) { + return x; +} + +TEST(enum_field_then_call) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + /* Create enum type with 1 field */ + hl_type *enum_t = create_enum_type(c, "TestEnum3", 1); + if (!enum_t) return TEST_FAIL; + + /* Native function type: (i64) -> i64 */ + hl_type *i64_t = &c->types[T_I64]; + hl_type *native_args[] = { i64_t }; + hl_type *native_fn_type = test_alloc_fun_type(c, i64_t, 1, native_args); + + /* Add native function at findex 1 */ + test_add_native(c, 1, "test", "identity", native_fn_type, (void*)native_identity); + + /* Main function type: () -> i64 */ + hl_type *fn_type = test_alloc_fun_type(c, i64_t, 0, NULL); + + /* Registers: + * r0: i64 (value 42) + * r1: enum + * r2: i64 (extracted field) + * r3: i64 (call result) + */ + hl_type *regs[] = { i64_t, enum_t, i64_t, i64_t }; + + /* Integer constants */ + int ints[] = { 42 }; + test_init_ints(c, 1, ints); + + /* Opcodes */ + hl_opcode ops[] = { + OP2(OInt, 0, 0), /* r0 = 42 */ + OP2(OEnumAlloc, 1, 0), /* r1 = alloc enum */ + OP3(OSetEnumField, 1, 0, 0), /* r1.field[0] = r0 */ + { OEnumField, 2, 1, 0, (int*)(intptr_t)0 }, /* r2 = r1.field[0] */ + OP3(OCall1, 3, 1, 2), /* r3 = call F1(r2) - native identity */ + OP1(ORet, 3), /* return r3 */ + }; + + test_alloc_function(c, 0, fn_type, 4, regs, 6, ops); + + int result; + int64_t (*func)(void) = (int64_t (*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + int64_t ret = func(); + if (ret != 42) { + printf("\n Expected 42, got %ld\n", (long)ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: OEnumIndex - get the construct index of an enum value + */ +TEST(enum_index) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + /* Create enum type */ + hl_type *enum_t = create_enum_type(c, "TestEnum4", 1); + if (!enum_t) return TEST_FAIL; + + /* Function: () -> i32 */ + hl_type *ret_type = &c->types[T_I32]; + hl_type *fn_type = test_alloc_fun_type(c, ret_type, 0, NULL); + + /* Registers: + * r0: enum + * r1: i32 (index result) + */ + hl_type *regs[] = { enum_t, &c->types[T_I32] }; + + /* Opcodes */ + hl_opcode ops[] = { + OP2(OEnumAlloc, 0, 0), /* r0 = alloc enum (construct 0) */ + OP2(OEnumIndex, 1, 0), /* r1 = index of r0 (should be 0) */ + OP1(ORet, 1), /* return r1 */ + }; + + test_alloc_function(c, 0, fn_type, 2, regs, 3, ops); + + int result; + int (*func)(void) = (int (*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + int ret = func(); + if (ret != 0) { + printf("\n Expected 0, got %d\n", ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* Test registry */ +int main(int argc, char **argv) { + test_entry_t tests[] = { + TEST_ENTRY(enum_field_basic), + TEST_ENTRY(enum_field_multiple), + TEST_ENTRY(enum_field_then_call), + TEST_ENTRY(enum_index), + }; + + return run_tests(tests, sizeof(tests) / sizeof(tests[0])); +} diff --git a/other/tests/minimal/test_exceptions.c b/other/tests/minimal/test_exceptions.c new file mode 100644 index 000000000..f0a18155b --- /dev/null +++ b/other/tests/minimal/test_exceptions.c @@ -0,0 +1,291 @@ +/* + * Test exception operations for HashLink AArch64 JIT + * + * Tests: OThrow, ORethrow, OTrap, OEndTrap, OCatch + * + * Exception handling in HashLink uses setjmp/longjmp. + * OTrap: set up exception handler (like try {) + * OEndTrap: tear down exception handler (end of try block) + * OThrow: throw an exception + * ORethrow: rethrow current exception + * OCatch: marks catch block (informational, no code generated) + */ +#include "test_harness.h" + +/* + * Test: OTrap and OEndTrap - basic try block without exception + * + * try { + * r0 = 42 + * } + * return r0 + * + * This tests that trap setup/teardown works without throwing. + */ +TEST(trap_no_exception) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + int ints[] = { 42 }; + test_init_ints(c, 1, ints); + + hl_type *fn_type = test_alloc_fun_type(c, &c->types[T_I32], 0, NULL); + + hl_type *regs[] = { + &c->types[T_I32], /* r0 = result */ + &c->types[T_VOID], /* r1 = exception (unused here) */ + }; + + /* + * Layout: + * 0: OTrap r1, 3 ; setup trap, if exception goto +3 (catch block) + * 1: OInt r0, $0 ; r0 = 42 (try body) + * 2: OEndTrap ; end try block + * 3: ORet r0 ; return r0 (after try or from catch) + * + * Catch block would be at opcode 4 (1+3), but we don't have one. + */ + hl_opcode ops[] = { + OP2(OTrap, 1, 3), /* trap -> catch at +3 */ + OP2(OInt, 0, 0), /* r0 = 42 */ + OP1(OEndTrap, 1), /* end trap */ + OP1(ORet, 0), + }; + + test_alloc_function(c, 0, fn_type, 2, regs, 4, ops); + + int result; + int (*fn)(void) = (int(*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + int ret = fn(); + if (ret != 42) { + fprintf(stderr, " Expected 42, got %d\n", ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: OThrow - throw and catch exception + * + * try { + * throw 123 + * r0 = 10 ; should not execute + * } catch (e) { + * r0 = 42 ; should execute + * } + * return r0 + */ +TEST(throw_catch_basic) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + int ints[] = { 10, 42 }; + test_init_ints(c, 2, ints); + + hl_type *fn_type = test_alloc_fun_type(c, &c->types[T_I32], 0, NULL); + + /* For throwing, we need a dynamic value. + * We'll allocate a simple dynamic int and throw it. */ + hl_type *regs[] = { + &c->types[T_I32], /* r0 = result */ + &c->types[T_VOID], /* r1 = caught exception */ + &c->types[T_VOID], /* r2 = exception to throw */ + }; + + /* + * Layout: + * 0: OTrap r1, 4 ; setup trap, if exception goto +4 (opcode 5) + * 1: ONull r2 ; create null for throw (simplest throwable) + * 2: OThrow r2 ; throw + * 3: OInt r0, $0 ; r0 = 10 (should NOT execute) + * 4: OEndTrap ; end trap (won't reach if thrown) + * 5: OCatch ; catch marker + * 6: OInt r0, $1 ; r0 = 42 (catch body) + * 7: ORet r0 + */ + hl_opcode ops[] = { + OP2(OTrap, 1, 5), /* trap -> catch at op 5 (offset from next = 4) */ + OP1(ONull, 2), /* r2 = null */ + OP1(OThrow, 2), /* throw r2 */ + OP2(OInt, 0, 0), /* r0 = 10 (unreachable) */ + OP1(OEndTrap, 1), /* end trap (unreachable) */ + OP1(OCatch, 0), /* catch marker */ + OP2(OInt, 0, 1), /* r0 = 42 (catch body) */ + OP1(ORet, 0), + }; + + test_alloc_function(c, 0, fn_type, 3, regs, 8, ops); + + int result; + int (*fn)(void) = (int(*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + int ret = fn(); + if (ret != 42) { + fprintf(stderr, " Expected 42 (catch block), got %d\n", ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: Nested try blocks + * + * try { + * try { + * throw + * } catch { + * r0 = 10 + * } + * r0 = r0 + 32 ; 10 + 32 = 42 + * } catch { + * r0 = 99 ; should not reach + * } + * return r0 + */ +TEST(nested_trap) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + int ints[] = { 10, 32, 99 }; + test_init_ints(c, 3, ints); + + hl_type *fn_type = test_alloc_fun_type(c, &c->types[T_I32], 0, NULL); + + hl_type *regs[] = { + &c->types[T_I32], /* r0 = result */ + &c->types[T_VOID], /* r1 = outer exception */ + &c->types[T_VOID], /* r2 = inner exception */ + &c->types[T_VOID], /* r3 = throw value */ + &c->types[T_I32], /* r4 = temp */ + }; + + /* + * Outer try: 0-11 + * Inner try: 1-6 + * Inner catch: 7-8 + * Continue outer (merge point): 9-11 + * Outer catch: 13-15 + * + * Note: OLabel is required at merge points (op 9, op 16) because: + * - Op 9 is reached via OJAlways from op 6 AND via fallthrough from op 8 + * - Op 16 is reached via OJAlways from op 12 AND via fallthrough from op 15 + * At runtime, spill_regs() before jumps puts values on stack, + * but the generated code must use discard_regs() at labels to ensure + * subsequent ops load from stack rather than assuming register bindings. + */ + hl_opcode ops[] = { + OP2(OTrap, 1, 13), /* 0: outer trap -> catch at 14 (0+1+13) */ + OP2(OTrap, 2, 5), /* 1: inner trap -> catch at 7 (1+1+5) */ + OP1(ONull, 3), /* 2: r3 = null */ + OP1(OThrow, 3), /* 3: throw */ + OP2(OInt, 0, 2), /* 4: unreachable */ + OP1(OEndTrap, 2), /* 5: end inner trap (unreachable) */ + OP2(OJAlways, 2, 0), /* 6: skip catch -> goto 9 (6+1+2) */ + OP1(OCatch, 0), /* 7: inner catch marker */ + OP2(OInt, 0, 0), /* 8: r0 = 10 */ + OP0(OLabel), /* 9: merge point for op 6 jump and fallthrough */ + OP2(OInt, 4, 1), /* 10: r4 = 32 */ + OP3(OAdd, 0, 0, 4), /* 11: r0 = r0 + 32 = 42 */ + OP1(OEndTrap, 1), /* 12: end outer trap */ + OP2(OJAlways, 2, 0), /* 13: skip outer catch -> goto 16 (13+1+2) */ + OP1(OCatch, 0), /* 14: outer catch marker */ + OP2(OInt, 0, 2), /* 15: r0 = 99 */ + OP0(OLabel), /* 16: merge point for op 13 jump and fallthrough */ + OP1(ORet, 0), /* 17: return */ + }; + + test_alloc_function(c, 0, fn_type, 5, regs, 18, ops); + + int result; + int (*fn)(void) = (int(*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + int ret = fn(); + if (ret != 42) { + fprintf(stderr, " Expected 42, got %d\n", ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: OEndTrap without exception cleans up properly + * + * Multiple sequential try blocks that don't throw. + */ +TEST(multiple_traps_no_throw) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + int ints[] = { 10, 20, 12 }; + test_init_ints(c, 3, ints); + + hl_type *fn_type = test_alloc_fun_type(c, &c->types[T_I32], 0, NULL); + + hl_type *regs[] = { + &c->types[T_I32], + &c->types[T_VOID], + &c->types[T_I32], + &c->types[T_I32], + }; + + hl_opcode ops[] = { + /* First try block */ + OP2(OTrap, 1, 3), /* trap */ + OP2(OInt, 0, 0), /* r0 = 10 */ + OP1(OEndTrap, 1), /* end trap */ + /* Second try block */ + OP2(OTrap, 1, 3), /* trap */ + OP2(OInt, 2, 1), /* r2 = 20 */ + OP1(OEndTrap, 1), /* end trap */ + /* Third try block */ + OP2(OTrap, 1, 3), /* trap */ + OP2(OInt, 3, 2), /* r3 = 12 */ + OP1(OEndTrap, 1), /* end trap */ + /* Combine */ + OP3(OAdd, 0, 0, 2), /* r0 = r0 + r2 = 30 */ + OP3(OAdd, 0, 0, 3), /* r0 = r0 + r3 = 42 */ + OP1(ORet, 0), + }; + + test_alloc_function(c, 0, fn_type, 4, regs, 12, ops); + + int result; + int (*fn)(void) = (int(*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + int ret = fn(); + if (ret != 42) { + fprintf(stderr, " Expected 42, got %d\n", ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* Test list */ +static test_entry_t tests[] = { + TEST_ENTRY(trap_no_exception), + TEST_ENTRY(throw_catch_basic), + TEST_ENTRY(nested_trap), + TEST_ENTRY(multiple_traps_no_throw), +}; + +int main(int argc, char **argv) { + printf("HashLink AArch64 JIT - Exception Tests\n"); + return run_tests(tests, sizeof(tests) / sizeof(tests[0])); +} diff --git a/other/tests/minimal/test_float_ops.c b/other/tests/minimal/test_float_ops.c new file mode 100644 index 000000000..6200f3352 --- /dev/null +++ b/other/tests/minimal/test_float_ops.c @@ -0,0 +1,511 @@ +/* + * Test floating-point operations for HashLink AArch64 JIT + * + * Tests: OFloat, OAdd/OSub/OMul/OSDiv (f64), ONeg, conversions + */ +#include "test_harness.h" +#include + +/* Helper to compare floats with epsilon */ +static int float_eq(double a, double b) { + double eps = 1e-9; + return fabs(a - b) < eps; +} + +/* + * Test: Return constant float 3.14159 + */ +TEST(return_float_constant) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + /* Float pool */ + double floats[] = { 3.14159 }; + test_init_floats(c, 1, floats); + + /* Function type: () -> f64 */ + hl_type *fn_type = test_alloc_fun_type(c, &c->types[T_F64], 0, NULL); + + /* Registers: r0:f64 */ + hl_type *regs[] = { &c->types[T_F64] }; + + hl_opcode ops[] = { + OP2(OFloat, 0, 0), /* r0 = floats[0] */ + OP1(ORet, 0), + }; + + test_alloc_function(c, 0, fn_type, 1, regs, 2, ops); + + int result; + double (*fn)(void) = (double(*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + double ret = fn(); + if (!float_eq(ret, 3.14159)) { + fprintf(stderr, " Expected 3.14159, got %f\n", ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: Add floats: 1.5 + 2.5 = 4.0 + */ +TEST(add_float_constants) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + double floats[] = { 1.5, 2.5 }; + test_init_floats(c, 2, floats); + + hl_type *fn_type = test_alloc_fun_type(c, &c->types[T_F64], 0, NULL); + hl_type *regs[] = { &c->types[T_F64], &c->types[T_F64], &c->types[T_F64] }; + + hl_opcode ops[] = { + OP2(OFloat, 0, 0), + OP2(OFloat, 1, 1), + OP3(OAdd, 2, 0, 1), + OP1(ORet, 2), + }; + + test_alloc_function(c, 0, fn_type, 3, regs, 4, ops); + + int result; + double (*fn)(void) = (double(*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + double ret = fn(); + if (!float_eq(ret, 4.0)) { + fprintf(stderr, " Expected 4.0, got %f\n", ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: Subtract floats: 10.5 - 6.5 = 4.0 + */ +TEST(sub_float_constants) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + double floats[] = { 10.5, 6.5 }; + test_init_floats(c, 2, floats); + + hl_type *fn_type = test_alloc_fun_type(c, &c->types[T_F64], 0, NULL); + hl_type *regs[] = { &c->types[T_F64], &c->types[T_F64], &c->types[T_F64] }; + + hl_opcode ops[] = { + OP2(OFloat, 0, 0), + OP2(OFloat, 1, 1), + OP3(OSub, 2, 0, 1), + OP1(ORet, 2), + }; + + test_alloc_function(c, 0, fn_type, 3, regs, 4, ops); + + int result; + double (*fn)(void) = (double(*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + double ret = fn(); + if (!float_eq(ret, 4.0)) { + fprintf(stderr, " Expected 4.0, got %f\n", ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: Multiply floats: 2.0 * 3.5 = 7.0 + */ +TEST(mul_float_constants) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + double floats[] = { 2.0, 3.5 }; + test_init_floats(c, 2, floats); + + hl_type *fn_type = test_alloc_fun_type(c, &c->types[T_F64], 0, NULL); + hl_type *regs[] = { &c->types[T_F64], &c->types[T_F64], &c->types[T_F64] }; + + hl_opcode ops[] = { + OP2(OFloat, 0, 0), + OP2(OFloat, 1, 1), + OP3(OMul, 2, 0, 1), + OP1(ORet, 2), + }; + + test_alloc_function(c, 0, fn_type, 3, regs, 4, ops); + + int result; + double (*fn)(void) = (double(*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + double ret = fn(); + if (!float_eq(ret, 7.0)) { + fprintf(stderr, " Expected 7.0, got %f\n", ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: Divide floats: 15.0 / 3.0 = 5.0 + */ +TEST(div_float_constants) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + double floats[] = { 15.0, 3.0 }; + test_init_floats(c, 2, floats); + + hl_type *fn_type = test_alloc_fun_type(c, &c->types[T_F64], 0, NULL); + hl_type *regs[] = { &c->types[T_F64], &c->types[T_F64], &c->types[T_F64] }; + + hl_opcode ops[] = { + OP2(OFloat, 0, 0), + OP2(OFloat, 1, 1), + OP3(OSDiv, 2, 0, 1), + OP1(ORet, 2), + }; + + test_alloc_function(c, 0, fn_type, 3, regs, 4, ops); + + int result; + double (*fn)(void) = (double(*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + double ret = fn(); + if (!float_eq(ret, 5.0)) { + fprintf(stderr, " Expected 5.0, got %f\n", ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: Negate float: -(-3.5) = 3.5 + */ +TEST(neg_float) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + double floats[] = { -3.5 }; + test_init_floats(c, 1, floats); + + hl_type *fn_type = test_alloc_fun_type(c, &c->types[T_F64], 0, NULL); + hl_type *regs[] = { &c->types[T_F64], &c->types[T_F64] }; + + hl_opcode ops[] = { + OP2(OFloat, 0, 0), + OP2(ONeg, 1, 0), + OP1(ORet, 1), + }; + + test_alloc_function(c, 0, fn_type, 2, regs, 3, ops); + + int result; + double (*fn)(void) = (double(*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + double ret = fn(); + if (!float_eq(ret, 3.5)) { + fprintf(stderr, " Expected 3.5, got %f\n", ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: Move float register + */ +TEST(mov_float_register) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + double floats[] = { 2.718281828 }; + test_init_floats(c, 1, floats); + + hl_type *fn_type = test_alloc_fun_type(c, &c->types[T_F64], 0, NULL); + hl_type *regs[] = { &c->types[T_F64], &c->types[T_F64] }; + + hl_opcode ops[] = { + OP2(OFloat, 0, 0), + OP2(OMov, 1, 0), + OP1(ORet, 1), + }; + + test_alloc_function(c, 0, fn_type, 2, regs, 3, ops); + + int result; + double (*fn)(void) = (double(*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + double ret = fn(); + if (!float_eq(ret, 2.718281828)) { + fprintf(stderr, " Expected 2.718281828, got %f\n", ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: Convert int to float (signed): 42 -> 42.0 + */ +TEST(int_to_float_signed) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + int ints[] = { 42 }; + test_init_ints(c, 1, ints); + + hl_type *fn_type = test_alloc_fun_type(c, &c->types[T_F64], 0, NULL); + hl_type *regs[] = { &c->types[T_I32], &c->types[T_F64] }; + + hl_opcode ops[] = { + OP2(OInt, 0, 0), /* r0:i32 = 42 */ + OP2(OToSFloat, 1, 0), /* r1:f64 = (f64)r0 */ + OP1(ORet, 1), + }; + + test_alloc_function(c, 0, fn_type, 2, regs, 3, ops); + + int result; + double (*fn)(void) = (double(*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + double ret = fn(); + if (!float_eq(ret, 42.0)) { + fprintf(stderr, " Expected 42.0, got %f\n", ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: Convert negative int to float: -42 -> -42.0 + */ +TEST(neg_int_to_float) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + int ints[] = { -42 }; + test_init_ints(c, 1, ints); + + hl_type *fn_type = test_alloc_fun_type(c, &c->types[T_F64], 0, NULL); + hl_type *regs[] = { &c->types[T_I32], &c->types[T_F64] }; + + hl_opcode ops[] = { + OP2(OInt, 0, 0), + OP2(OToSFloat, 1, 0), + OP1(ORet, 1), + }; + + test_alloc_function(c, 0, fn_type, 2, regs, 3, ops); + + int result; + double (*fn)(void) = (double(*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + double ret = fn(); + if (!float_eq(ret, -42.0)) { + fprintf(stderr, " Expected -42.0, got %f\n", ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: Convert float to int: 42.7 -> 42 + */ +TEST(float_to_int) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + double floats[] = { 42.7 }; + test_init_floats(c, 1, floats); + + hl_type *fn_type = test_alloc_fun_type(c, &c->types[T_I32], 0, NULL); + hl_type *regs[] = { &c->types[T_F64], &c->types[T_I32] }; + + hl_opcode ops[] = { + OP2(OFloat, 0, 0), /* r0:f64 = 42.7 */ + OP2(OToInt, 1, 0), /* r1:i32 = (i32)r0 */ + OP1(ORet, 1), + }; + + test_alloc_function(c, 0, fn_type, 2, regs, 3, ops); + + int result; + int (*fn)(void) = (int(*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + int ret = fn(); + if (ret != 42) { + fprintf(stderr, " Expected 42, got %d\n", ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: Convert negative float to int: -42.7 -> -42 + */ +TEST(neg_float_to_int) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + double floats[] = { -42.7 }; + test_init_floats(c, 1, floats); + + hl_type *fn_type = test_alloc_fun_type(c, &c->types[T_I32], 0, NULL); + hl_type *regs[] = { &c->types[T_F64], &c->types[T_I32] }; + + hl_opcode ops[] = { + OP2(OFloat, 0, 0), + OP2(OToInt, 1, 0), + OP1(ORet, 1), + }; + + test_alloc_function(c, 0, fn_type, 2, regs, 3, ops); + + int result; + int (*fn)(void) = (int(*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + int ret = fn(); + if (ret != -42) { + fprintf(stderr, " Expected -42, got %d\n", ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: f32 operations - load and return + */ +TEST(return_f32_constant) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + /* f32 is stored in floats pool as f64, converted on load */ + double floats[] = { 3.14159f }; + test_init_floats(c, 1, floats); + + hl_type *fn_type = test_alloc_fun_type(c, &c->types[T_F32], 0, NULL); + hl_type *regs[] = { &c->types[T_F32] }; + + hl_opcode ops[] = { + OP2(OFloat, 0, 0), + OP1(ORet, 0), + }; + + test_alloc_function(c, 0, fn_type, 1, regs, 2, ops); + + int result; + float (*fn)(void) = (float(*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + float ret = fn(); + if (fabsf(ret - 3.14159f) > 1e-5f) { + fprintf(stderr, " Expected ~3.14159, got %f\n", ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: f32 addition + */ +TEST(add_f32_constants) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + double floats[] = { 1.5f, 2.5f }; + test_init_floats(c, 2, floats); + + hl_type *fn_type = test_alloc_fun_type(c, &c->types[T_F32], 0, NULL); + hl_type *regs[] = { &c->types[T_F32], &c->types[T_F32], &c->types[T_F32] }; + + hl_opcode ops[] = { + OP2(OFloat, 0, 0), + OP2(OFloat, 1, 1), + OP3(OAdd, 2, 0, 1), + OP1(ORet, 2), + }; + + test_alloc_function(c, 0, fn_type, 3, regs, 4, ops); + + int result; + float (*fn)(void) = (float(*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + float ret = fn(); + if (fabsf(ret - 4.0f) > 1e-5f) { + fprintf(stderr, " Expected 4.0, got %f\n", ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* Test list */ +static test_entry_t tests[] = { + TEST_ENTRY(return_float_constant), + TEST_ENTRY(add_float_constants), + TEST_ENTRY(sub_float_constants), + TEST_ENTRY(mul_float_constants), + TEST_ENTRY(div_float_constants), + TEST_ENTRY(neg_float), + TEST_ENTRY(mov_float_register), + TEST_ENTRY(int_to_float_signed), + TEST_ENTRY(neg_int_to_float), + TEST_ENTRY(float_to_int), + TEST_ENTRY(neg_float_to_int), + TEST_ENTRY(return_f32_constant), + TEST_ENTRY(add_f32_constants), +}; + +int main(int argc, char **argv) { + printf("HashLink AArch64 JIT - Floating Point Operations Tests\n"); + return run_tests(tests, sizeof(tests) / sizeof(tests[0])); +} diff --git a/other/tests/minimal/test_fp_pressure.c b/other/tests/minimal/test_fp_pressure.c new file mode 100644 index 000000000..f761eb8b3 --- /dev/null +++ b/other/tests/minimal/test_fp_pressure.c @@ -0,0 +1,229 @@ +/* + * Test floating-point register pressure for HashLink AArch64 JIT + * + * This test verifies that the register allocator correctly handles + * high FP register pressure by spilling to stack, without using + * the callee-saved V8-V15 registers (which aren't saved in our prologue). + * + * We have 24 caller-saved FP registers (V0-V7, V16-V31). + * If we use more than 24 float values simultaneously, the allocator + * must spill some to stack. + */ +#include "test_harness.h" +#include + +/* Helper to compare floats with epsilon */ +static int float_eq(double a, double b) { + double eps = 1e-9; + return fabs(a - b) < eps; +} + +/* + * Test: Sum of 10 floats + * Uses moderate register pressure to verify basic allocation works. + * r0-r9: float constants + * r10: accumulator + * Returns sum of 1.0 + 2.0 + ... + 10.0 = 55.0 + */ +TEST(sum_10_floats) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + /* Float pool: 1.0 through 10.0 */ + double floats[] = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0 }; + test_init_floats(c, 10, floats); + + hl_type *fn_type = test_alloc_fun_type(c, &c->types[T_F64], 0, NULL); + + /* 11 registers: r0-r9 for constants, r10 for accumulator */ + hl_type *regs[11]; + for (int i = 0; i < 11; i++) regs[i] = &c->types[T_F64]; + + hl_opcode ops[] = { + OP2(OFloat, 0, 0), /* r0 = 1.0 */ + OP2(OFloat, 1, 1), /* r1 = 2.0 */ + OP2(OFloat, 2, 2), /* r2 = 3.0 */ + OP2(OFloat, 3, 3), /* r3 = 4.0 */ + OP2(OFloat, 4, 4), /* r4 = 5.0 */ + OP2(OFloat, 5, 5), /* r5 = 6.0 */ + OP2(OFloat, 6, 6), /* r6 = 7.0 */ + OP2(OFloat, 7, 7), /* r7 = 8.0 */ + OP2(OFloat, 8, 8), /* r8 = 9.0 */ + OP2(OFloat, 9, 9), /* r9 = 10.0 */ + OP3(OAdd, 10, 0, 1), /* r10 = r0 + r1 = 3.0 */ + OP3(OAdd, 10, 10, 2), /* r10 = 3.0 + 3.0 = 6.0 */ + OP3(OAdd, 10, 10, 3), /* r10 = 6.0 + 4.0 = 10.0 */ + OP3(OAdd, 10, 10, 4), /* r10 = 10.0 + 5.0 = 15.0 */ + OP3(OAdd, 10, 10, 5), /* r10 = 15.0 + 6.0 = 21.0 */ + OP3(OAdd, 10, 10, 6), /* r10 = 21.0 + 7.0 = 28.0 */ + OP3(OAdd, 10, 10, 7), /* r10 = 28.0 + 8.0 = 36.0 */ + OP3(OAdd, 10, 10, 8), /* r10 = 36.0 + 9.0 = 45.0 */ + OP3(OAdd, 10, 10, 9), /* r10 = 45.0 + 10.0 = 55.0 */ + OP1(ORet, 10), + }; + + test_alloc_function(c, 0, fn_type, 11, regs, 20, ops); + + int result; + double (*fn)(void) = (double(*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + double ret = fn(); + if (!float_eq(ret, 55.0)) { + fprintf(stderr, " Expected 55.0, got %f\n", ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: Sum of 25 floats - forces register spilling + * Uses 25 float values, which is more than the 24 available caller-saved + * FP registers (V0-V7, V16-V31). This forces spilling to stack. + * Returns sum of 1.0 + 2.0 + ... + 25.0 = 325.0 + */ +TEST(sum_25_floats_spill) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + /* Float pool: 1.0 through 25.0 */ + double floats[25]; + for (int i = 0; i < 25; i++) { + floats[i] = (double)(i + 1); + } + test_init_floats(c, 25, floats); + + hl_type *fn_type = test_alloc_fun_type(c, &c->types[T_F64], 0, NULL); + + /* 26 registers: r0-r24 for constants, r25 for accumulator */ + hl_type *regs[26]; + for (int i = 0; i < 26; i++) regs[i] = &c->types[T_F64]; + + /* Build opcodes dynamically */ + hl_opcode ops[52]; /* 25 loads + 1 initial add + 23 adds + 1 ret = 50, plus some slack */ + int op_idx = 0; + + /* Load all 25 float constants */ + for (int i = 0; i < 25; i++) { + ops[op_idx++] = (hl_opcode){ .op = OFloat, .p1 = i, .p2 = i }; + } + + /* Sum them: r25 = r0 + r1, then r25 = r25 + r2, etc. */ + ops[op_idx++] = (hl_opcode){ .op = OAdd, .p1 = 25, .p2 = 0, .p3 = 1 }; + for (int i = 2; i < 25; i++) { + ops[op_idx++] = (hl_opcode){ .op = OAdd, .p1 = 25, .p2 = 25, .p3 = i }; + } + + ops[op_idx++] = (hl_opcode){ .op = ORet, .p1 = 25 }; + + test_alloc_function(c, 0, fn_type, 26, regs, op_idx, ops); + + int result; + double (*fn)(void) = (double(*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + double ret = fn(); + double expected = 325.0; /* 1+2+...+25 = 25*26/2 = 325 */ + if (!float_eq(ret, expected)) { + fprintf(stderr, " Expected %f, got %f\n", expected, ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: Complex expression with many live floats + * Computes: (a*b + c*d + e*f + g*h) * (i*j + k*l + m*n + o*p) + * This keeps many intermediate values live simultaneously. + */ +TEST(complex_expression_many_live) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + /* 16 input values: a=1, b=2, c=3, d=4, ... p=16 */ + double floats[16]; + for (int i = 0; i < 16; i++) { + floats[i] = (double)(i + 1); + } + test_init_floats(c, 16, floats); + + hl_type *fn_type = test_alloc_fun_type(c, &c->types[T_F64], 0, NULL); + + /* 28 registers: + * r0-r15: input values (a-p) + * r16-r23: products (a*b, c*d, e*f, g*h, i*j, k*l, m*n, o*p) + * r24-r25: partial sums (left and right) + * r26-r27: more partial sums + */ + hl_type *regs[28]; + for (int i = 0; i < 28; i++) regs[i] = &c->types[T_F64]; + + hl_opcode ops[] = { + /* Load 16 values */ + OP2(OFloat, 0, 0), OP2(OFloat, 1, 1), OP2(OFloat, 2, 2), OP2(OFloat, 3, 3), + OP2(OFloat, 4, 4), OP2(OFloat, 5, 5), OP2(OFloat, 6, 6), OP2(OFloat, 7, 7), + OP2(OFloat, 8, 8), OP2(OFloat, 9, 9), OP2(OFloat, 10, 10), OP2(OFloat, 11, 11), + OP2(OFloat, 12, 12), OP2(OFloat, 13, 13), OP2(OFloat, 14, 14), OP2(OFloat, 15, 15), + + /* 8 products - all computed before any are consumed */ + OP3(OMul, 16, 0, 1), /* r16 = a*b = 1*2 = 2 */ + OP3(OMul, 17, 2, 3), /* r17 = c*d = 3*4 = 12 */ + OP3(OMul, 18, 4, 5), /* r18 = e*f = 5*6 = 30 */ + OP3(OMul, 19, 6, 7), /* r19 = g*h = 7*8 = 56 */ + OP3(OMul, 20, 8, 9), /* r20 = i*j = 9*10 = 90 */ + OP3(OMul, 21, 10, 11), /* r21 = k*l = 11*12 = 132 */ + OP3(OMul, 22, 12, 13), /* r22 = m*n = 13*14 = 182 */ + OP3(OMul, 23, 14, 15), /* r23 = o*p = 15*16 = 240 */ + + /* Left sum: (a*b + c*d + e*f + g*h) */ + OP3(OAdd, 24, 16, 17), /* r24 = 2 + 12 = 14 */ + OP3(OAdd, 25, 18, 19), /* r25 = 30 + 56 = 86 */ + OP3(OAdd, 24, 24, 25), /* r24 = 14 + 86 = 100 */ + + /* Right sum: (i*j + k*l + m*n + o*p) */ + OP3(OAdd, 26, 20, 21), /* r26 = 90 + 132 = 222 */ + OP3(OAdd, 27, 22, 23), /* r27 = 182 + 240 = 422 */ + OP3(OAdd, 26, 26, 27), /* r26 = 222 + 422 = 644 */ + + /* Final result: left * right */ + OP3(OMul, 24, 24, 26), /* r24 = 100 * 644 = 64400 */ + OP1(ORet, 24), + }; + + test_alloc_function(c, 0, fn_type, 28, regs, sizeof(ops)/sizeof(ops[0]), ops); + + int result; + double (*fn)(void) = (double(*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + double ret = fn(); + double expected = 64400.0; + if (!float_eq(ret, expected)) { + fprintf(stderr, " Expected %f, got %f\n", expected, ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test registration + */ +static test_entry_t tests[] = { + TEST_ENTRY(sum_10_floats), + TEST_ENTRY(sum_25_floats_spill), + TEST_ENTRY(complex_expression_many_live), +}; + +int main(int argc, char **argv) { + (void)argc; (void)argv; + return run_tests(tests, sizeof(tests)/sizeof(tests[0])); +} diff --git a/other/tests/minimal/test_globals.c b/other/tests/minimal/test_globals.c new file mode 100644 index 000000000..7704aafc2 --- /dev/null +++ b/other/tests/minimal/test_globals.c @@ -0,0 +1,189 @@ +/* + * Test global variable operations for HashLink AArch64 JIT + * + * Tests: OGetGlobal, OSetGlobal + */ +#include "test_harness.h" + +/* + * Helper to setup globals in the code structure + */ +static void test_init_globals(hl_code *c, int count, hl_type **types) { + c->nglobals = count; + c->globals = (hl_type**)malloc(sizeof(hl_type*) * count); + memcpy(c->globals, types, sizeof(hl_type*) * count); +} + +/* + * Test: Set and get a global integer + * + * op0: int r0, 0 ; r0 = 42 + * op1: setglobal 0, r0 ; global[0] = r0 + * op2: getglobal r1, 0 ; r1 = global[0] + * op3: ret r1 ; return 42 + */ +TEST(global_int_set_get) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + int ints[] = { 42 }; + test_init_ints(c, 1, ints); + + /* Setup one global of type i32 */ + hl_type *global_types[] = { &c->types[T_I32] }; + test_init_globals(c, 1, global_types); + + hl_type *fn_type = test_alloc_fun_type(c, &c->types[T_I32], 0, NULL); + hl_type *regs[] = { &c->types[T_I32], &c->types[T_I32] }; + + hl_opcode ops[] = { + OP2(OInt, 0, 0), /* op0: r0 = 42 */ + OP2(OSetGlobal, 0, 0), /* op1: global[0] = r0 */ + OP2(OGetGlobal, 1, 0), /* op2: r1 = global[0] */ + OP1(ORet, 1), /* op3: return r1 */ + }; + + test_alloc_function(c, 0, fn_type, 2, regs, 4, ops); + + int result; + int (*fn)(void) = (int(*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + int ret = fn(); + if (ret != 42) { + fprintf(stderr, " Expected 42, got %d\n", ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: Multiple globals + * + * op0: int r0, 0 ; r0 = 10 + * op1: int r1, 1 ; r1 = 20 + * op2: setglobal 0, r0 ; global[0] = 10 + * op3: setglobal 1, r1 ; global[1] = 20 + * op4: getglobal r2, 0 ; r2 = global[0] = 10 + * op5: getglobal r3, 1 ; r3 = global[1] = 20 + * op6: add r4, r2, r3 ; r4 = 10 + 20 = 30 + * op7: ret r4 ; return 30 + */ +TEST(global_multiple) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + int ints[] = { 10, 20 }; + test_init_ints(c, 2, ints); + + /* Setup two globals of type i32 */ + hl_type *global_types[] = { &c->types[T_I32], &c->types[T_I32] }; + test_init_globals(c, 2, global_types); + + hl_type *fn_type = test_alloc_fun_type(c, &c->types[T_I32], 0, NULL); + hl_type *regs[] = { + &c->types[T_I32], &c->types[T_I32], &c->types[T_I32], + &c->types[T_I32], &c->types[T_I32] + }; + + hl_opcode ops[] = { + OP2(OInt, 0, 0), /* op0: r0 = 10 */ + OP2(OInt, 1, 1), /* op1: r1 = 20 */ + OP2(OSetGlobal, 0, 0), /* op2: global[0] = r0 */ + OP2(OSetGlobal, 1, 1), /* op3: global[1] = r1 */ + OP2(OGetGlobal, 2, 0), /* op4: r2 = global[0] */ + OP2(OGetGlobal, 3, 1), /* op5: r3 = global[1] */ + OP3(OAdd, 4, 2, 3), /* op6: r4 = r2 + r3 */ + OP1(ORet, 4), /* op7: return r4 */ + }; + + test_alloc_function(c, 0, fn_type, 5, regs, 8, ops); + + int result; + int (*fn)(void) = (int(*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + int ret = fn(); + if (ret != 30) { + fprintf(stderr, " Expected 30, got %d\n", ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: Global persists across calls + * Call function twice - first sets global, second reads it + */ +TEST(global_persists) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + int ints[] = { 0, 99 }; + test_init_ints(c, 2, ints); + + /* Setup one global of type i32 */ + hl_type *global_types[] = { &c->types[T_I32] }; + test_init_globals(c, 1, global_types); + + /* Function takes an int arg: if arg==0, set global to 99; else return global */ + hl_type *arg_types[] = { &c->types[T_I32] }; + hl_type *fn_type = test_alloc_fun_type(c, &c->types[T_I32], 1, arg_types); + hl_type *regs[] = { + &c->types[T_I32], /* r0: arg */ + &c->types[T_I32], /* r1: temp */ + }; + + hl_opcode ops[] = { + OP2(OInt, 1, 0), /* op0: r1 = 0 */ + OP3(OJEq, 0, 1, 2), /* op1: if r0 == 0 goto op4 */ + OP2(OGetGlobal, 1, 0), /* op2: r1 = global[0] */ + OP1(ORet, 1), /* op3: return r1 */ + /* setter path */ + OP2(OInt, 1, 1), /* op4: r1 = 99 */ + OP2(OSetGlobal, 0, 1), /* op5: global[0] = 99 */ + OP1(ORet, 1), /* op6: return 99 */ + }; + + test_alloc_function(c, 0, fn_type, 2, regs, 7, ops); + + int result; + int (*fn)(int) = (int(*)(int))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + /* First call: set global to 99 */ + int ret1 = fn(0); + if (ret1 != 99) { + fprintf(stderr, " First call: expected 99, got %d\n", ret1); + return TEST_FAIL; + } + + /* Second call: read global */ + int ret2 = fn(1); + if (ret2 != 99) { + fprintf(stderr, " Second call: expected 99, got %d\n", ret2); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* Test list */ +static test_entry_t tests[] = { + TEST_ENTRY(global_int_set_get), + TEST_ENTRY(global_multiple), + TEST_ENTRY(global_persists), +}; + +int main(int argc, char **argv) { + printf("HashLink AArch64 JIT - Global Variable Tests\n"); + return run_tests(tests, sizeof(tests) / sizeof(tests[0])); +} diff --git a/other/tests/minimal/test_harness.h b/other/tests/minimal/test_harness.h new file mode 100644 index 000000000..694c814ad --- /dev/null +++ b/other/tests/minimal/test_harness.h @@ -0,0 +1,389 @@ +/* + * Minimal JIT Test Harness for HashLink AArch64 JIT + * + * This provides helpers to construct hl_code structures directly in memory, + * bypassing the bytecode file format. This allows testing individual opcodes + * without pulling in the entire Haxe stdlib. + */ +#ifndef TEST_HARNESS_H +#define TEST_HARNESS_H + +#include +#include +#include +#include +#include + +/* Test result codes */ +#define TEST_PASS 0 +#define TEST_FAIL 1 +#define TEST_SKIP 2 + +/* Colors for output */ +#define GREEN "\033[32m" +#define RED "\033[31m" +#define YELLOW "\033[33m" +#define RESET "\033[0m" + +/* Helper to create a minimal hl_code structure */ +static hl_code *test_alloc_code(void) { + hl_code *c = (hl_code*)calloc(1, sizeof(hl_code)); + c->version = 5; + hl_alloc_init(&c->alloc); + hl_alloc_init(&c->falloc); + return c; +} + +/* Predefined types - indices into types array */ +#define T_VOID 0 +#define T_I32 1 +#define T_I64 2 +#define T_F32 3 +#define T_F64 4 +#define T_BOOL 5 +#define T_BYTES 6 +#define T_TYPE 7 /* HTYPE - for type pointers, size = pointer size */ + +/* Base types array - common types needed for most tests */ +#define BASE_TYPES_COUNT 8 +#define MAX_TYPES 32 /* Pre-allocate space for additional types */ + +static void test_init_base_types(hl_code *c) { + /* Pre-allocate space for base types + function types */ + c->types = (hl_type*)calloc(MAX_TYPES, sizeof(hl_type)); + c->ntypes = BASE_TYPES_COUNT; + c->types[T_VOID].kind = HVOID; + c->types[T_I32].kind = HI32; + c->types[T_I64].kind = HI64; + c->types[T_F32].kind = HF32; + c->types[T_F64].kind = HF64; + c->types[T_BOOL].kind = HBOOL; + c->types[T_BYTES].kind = HBYTES; + c->types[T_TYPE].kind = HTYPE; +} + +/* Allocate a function type: fun(args...) -> ret */ +static hl_type *test_alloc_fun_type(hl_code *c, hl_type *ret, int nargs, hl_type **args) { + if (c->ntypes >= MAX_TYPES) { + fprintf(stderr, "Too many types (max %d)\n", MAX_TYPES); + return NULL; + } + + int idx = c->ntypes++; + hl_type *t = &c->types[idx]; + memset(t, 0, sizeof(hl_type)); + + t->kind = HFUN; + t->fun = (hl_type_fun*)calloc(1, sizeof(hl_type_fun)); + t->fun->ret = ret; + t->fun->nargs = nargs; + if (nargs > 0) { + t->fun->args = (hl_type**)malloc(sizeof(hl_type*) * nargs); + memcpy(t->fun->args, args, sizeof(hl_type*) * nargs); + } + return t; +} + +/* Max functions for pre-allocation */ +#define MAX_FUNCTIONS 16 + +/* Allocate a function */ +static hl_function *test_alloc_function(hl_code *c, int findex, hl_type *type, + int nregs, hl_type **regs, + int nops, hl_opcode *ops) { + if (c->functions == NULL) { + c->functions = (hl_function*)calloc(MAX_FUNCTIONS, sizeof(hl_function)); + c->nfunctions = 0; + } + + if (c->nfunctions >= MAX_FUNCTIONS) { + fprintf(stderr, "Too many functions (max %d)\n", MAX_FUNCTIONS); + return NULL; + } + + hl_function *f = &c->functions[c->nfunctions++]; + f->findex = findex; + f->type = type; + f->nregs = nregs; + f->nops = nops; + + f->regs = (hl_type**)malloc(sizeof(hl_type*) * nregs); + memcpy(f->regs, regs, sizeof(hl_type*) * nregs); + + f->ops = (hl_opcode*)malloc(sizeof(hl_opcode) * nops); + memcpy(f->ops, ops, sizeof(hl_opcode) * nops); + + /* No debug info for minimal tests */ + f->debug = NULL; + f->obj = NULL; + f->field.ref = NULL; + f->ref = 0; + + return f; +} + +/* Helper macro for creating opcodes */ +#define OP0(opcode) {opcode, 0, 0, 0, NULL} +#define OP1(opcode, a) {opcode, a, 0, 0, NULL} +#define OP2(opcode, a, b) {opcode, a, b, 0, NULL} +#define OP3(opcode, a, b, c) {opcode, a, b, c, NULL} + +/* + * For OCall2, the extra field stores the 4th parameter as an int cast to pointer. + * Usage: OP4_CALL2(OCall2, dst, findex, arg1, arg2) + */ +#define OP4_CALL2(opcode, a, b, c, d) {opcode, a, b, c, (int*)(intptr_t)(d)} + +/* Initialize integers pool */ +static void test_init_ints(hl_code *c, int count, int *values) { + c->nints = count; + c->ints = (int*)malloc(sizeof(int) * count); + memcpy(c->ints, values, sizeof(int) * count); +} + +/* Initialize floats pool */ +static void test_init_floats(hl_code *c, int count, double *values) { + c->nfloats = count; + c->floats = (double*)malloc(sizeof(double) * count); + memcpy(c->floats, values, sizeof(double) * count); +} + +/* Native function pointer registry + * Since hl_native doesn't have a ptr field, we track them separately */ +#define MAX_NATIVE_PTRS 16 +static struct { + int findex; + void *ptr; +} g_native_ptrs[MAX_NATIVE_PTRS]; +static int g_native_ptr_count = 0; + +static void test_register_native_ptr(int findex, void *ptr) { + if (g_native_ptr_count >= MAX_NATIVE_PTRS) { + fprintf(stderr, "Too many native functions (max %d)\n", MAX_NATIVE_PTRS); + return; + } + g_native_ptrs[g_native_ptr_count].findex = findex; + g_native_ptrs[g_native_ptr_count].ptr = ptr; + g_native_ptr_count++; +} + +static void test_clear_native_ptrs(void) { + g_native_ptr_count = 0; +} + +/* Add a native function to the code structure */ +static void test_add_native(hl_code *c, int findex, const char *lib, const char *name, + hl_type *fn_type, void *func_ptr) { + if (c->natives == NULL) { + c->natives = (hl_native*)calloc(MAX_NATIVE_PTRS, sizeof(hl_native)); + c->nnatives = 0; + } + + hl_native *n = &c->natives[c->nnatives++]; + n->findex = findex; + n->lib = lib; + n->name = name; + n->t = fn_type; + + /* Register the function pointer separately */ + test_register_native_ptr(findex, func_ptr); +} + +/* Build and JIT compile the code, returns the function pointer */ +typedef void *(*jit_func_t)(void); + +static void *test_jit_compile(hl_code *c, int *out_result) { + /* Set entrypoint if not set */ + if (c->nfunctions > 0 && c->entrypoint == 0) { + c->entrypoint = c->functions[0].findex; + } + + /* Ensure we have globals array (can be empty) */ + if (c->globals == NULL) { + c->nglobals = 0; + c->globals = NULL; + } + + /* Natives are optional - keep if set */ + if (c->natives == NULL) { + c->nnatives = 0; + } + + /* No constants */ + c->nconstants = 0; + c->constants = NULL; + + /* No strings/bytes for now */ + if (c->strings == NULL) { + c->nstrings = 0; + c->strings = NULL; + c->strings_lens = NULL; + c->ustrings = NULL; + } + c->nbytes = 0; + c->bytes = NULL; + c->bytes_pos = NULL; + + /* No debug */ + c->hasdebug = false; + c->ndebugfiles = 0; + c->debugfiles = NULL; + c->debugfiles_lens = NULL; + + /* Allocate module */ + hl_module *m = hl_module_alloc(c); + if (m == NULL) { + fprintf(stderr, "Failed to allocate module\n"); + *out_result = TEST_FAIL; + return NULL; + } + + /* Setup module context for object types (needed for hl_get_obj_rt allocator) */ + for (int i = 0; i < c->ntypes; i++) { + if (c->types[i].kind == HOBJ && c->types[i].obj != NULL) { + c->types[i].obj->m = &m->ctx; + } + } + + /* Setup function indexes */ + for (int i = 0; i < c->nfunctions; i++) { + hl_function *f = c->functions + i; + m->functions_indexes[f->findex] = i; + m->ctx.functions_types[f->findex] = f->type; + } + + /* Setup native function indexes and pointers */ + for (int i = 0; i < c->nnatives; i++) { + hl_native *n = &c->natives[i]; + m->functions_indexes[n->findex] = i + c->nfunctions; /* natives come after functions */ + m->ctx.functions_types[n->findex] = n->t; + } + for (int i = 0; i < g_native_ptr_count; i++) { + m->functions_ptrs[g_native_ptrs[i].findex] = g_native_ptrs[i].ptr; + } + test_clear_native_ptrs(); /* Reset for next test */ + + /* JIT compile */ + jit_ctx *ctx = hl_jit_alloc(); + if (ctx == NULL) { + fprintf(stderr, "Failed to allocate JIT context\n"); + hl_module_free(m); + *out_result = TEST_FAIL; + return NULL; + } + + hl_jit_init(ctx, m); + + for (int i = 0; i < c->nfunctions; i++) { + hl_function *f = c->functions + i; + int fpos = hl_jit_function(ctx, m, f); + if (fpos < 0) { + fprintf(stderr, "Failed to JIT function %d\n", f->findex); + hl_jit_free(ctx, false); + hl_module_free(m); + *out_result = TEST_FAIL; + return NULL; + } + m->functions_ptrs[f->findex] = (void*)(intptr_t)fpos; + } + + int codesize; + hl_debug_infos *debug_info = NULL; + void *jit_code = hl_jit_code(ctx, m, &codesize, &debug_info, NULL); + + if (jit_code == NULL) { + fprintf(stderr, "Failed to finalize JIT code\n"); + hl_jit_free(ctx, false); + hl_module_free(m); + *out_result = TEST_FAIL; + return NULL; + } + + /* Fix up function pointers */ + for (int i = 0; i < c->nfunctions; i++) { + hl_function *f = c->functions + i; + m->functions_ptrs[f->findex] = (unsigned char*)jit_code + (intptr_t)m->functions_ptrs[f->findex]; + } + + m->jit_code = jit_code; + m->codesize = codesize; + + hl_jit_free(ctx, false); + + *out_result = TEST_PASS; + + /* Return pointer to entry function */ + return m->functions_ptrs[c->entrypoint]; +} + +/* Test runner infrastructure */ +typedef int (*test_func_t)(void); + +typedef struct { + const char *name; + test_func_t func; +} test_entry_t; + +static int run_tests(test_entry_t *tests, int count) { + int passed = 0, failed = 0, skipped = 0; + + printf("\n=== Running %d tests ===\n\n", count); + + for (int i = 0; i < count; i++) { + printf(" [%d/%d] %s ... ", i + 1, count, tests[i].name); + fflush(stdout); + + int result = tests[i].func(); + + switch (result) { + case TEST_PASS: + printf(GREEN "PASS" RESET "\n"); + passed++; + break; + case TEST_FAIL: + printf(RED "FAIL" RESET "\n"); + failed++; + break; + case TEST_SKIP: + printf(YELLOW "SKIP" RESET "\n"); + skipped++; + break; + } + } + + printf("\n=== Results: %d passed, %d failed, %d skipped ===\n\n", + passed, failed, skipped); + + return failed > 0 ? 1 : 0; +} + +/* Convenience macro to define a test */ +#define TEST(name) static int test_##name(void) +#define TEST_ENTRY(name) { #name, test_##name } + +/* Stub functions for exception handling */ +static uchar *test_resolve_symbol(void *addr, uchar *out, int *outSize) { + (void)addr; (void)out; (void)outSize; + return NULL; /* No symbol resolution in minimal tests */ +} + +static int test_capture_stack(void **stack, int size) { + (void)stack; (void)size; + return 0; /* No stack capture in minimal tests */ +} + +/* Initialize HL runtime - call once at start */ +static void test_init_runtime(void) { + static int initialized = 0; + if (!initialized) { + hl_global_init(); + static int ctx; + hl_register_thread(&ctx); + /* Set up exception handling - REQUIRED for hl_throw to work! */ + hl_setup.resolve_symbol = test_resolve_symbol; + hl_setup.capture_stack = test_capture_stack; + initialized = 1; + } +} + +#endif /* TEST_HARNESS_H */ diff --git a/other/tests/minimal/test_i64_ops.c b/other/tests/minimal/test_i64_ops.c new file mode 100644 index 000000000..31c6695bb --- /dev/null +++ b/other/tests/minimal/test_i64_ops.c @@ -0,0 +1,545 @@ +/* + * Test 64-bit integer operations for HashLink AArch64 JIT + * + * Tests: i64 arithmetic with OAdd, OSub, OMul, OSDiv + * + * Note: OInt loads 32-bit values. For i64 registers, the value is sign-extended. + */ +#include "test_harness.h" + +/* + * Test: Return 64-bit constant (sign-extended from i32) + */ +TEST(return_i64_constant) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + int ints[] = { 42 }; + test_init_ints(c, 1, ints); + + hl_type *fn_type = test_alloc_fun_type(c, &c->types[T_I64], 0, NULL); + hl_type *regs[] = { &c->types[T_I64] }; + + hl_opcode ops[] = { + OP2(OInt, 0, 0), /* r0:i64 = 42 (sign-extended) */ + OP1(ORet, 0), + }; + + test_alloc_function(c, 0, fn_type, 1, regs, 2, ops); + + int result; + long long (*fn)(void) = (long long(*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + long long ret = fn(); + if (ret != 42LL) { + fprintf(stderr, " Expected 42, got %lld\n", ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: Add 64-bit integers + */ +TEST(add_i64) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + int ints[] = { 10, 32 }; + test_init_ints(c, 2, ints); + + hl_type *fn_type = test_alloc_fun_type(c, &c->types[T_I64], 0, NULL); + hl_type *regs[] = { &c->types[T_I64], &c->types[T_I64], &c->types[T_I64] }; + + hl_opcode ops[] = { + OP2(OInt, 0, 0), + OP2(OInt, 1, 1), + OP3(OAdd, 2, 0, 1), + OP1(ORet, 2), + }; + + test_alloc_function(c, 0, fn_type, 3, regs, 4, ops); + + int result; + long long (*fn)(void) = (long long(*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + long long ret = fn(); + if (ret != 42LL) { + fprintf(stderr, " Expected 42, got %lld\n", ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: Subtract 64-bit integers + */ +TEST(sub_i64) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + int ints[] = { 100, 58 }; + test_init_ints(c, 2, ints); + + hl_type *fn_type = test_alloc_fun_type(c, &c->types[T_I64], 0, NULL); + hl_type *regs[] = { &c->types[T_I64], &c->types[T_I64], &c->types[T_I64] }; + + hl_opcode ops[] = { + OP2(OInt, 0, 0), + OP2(OInt, 1, 1), + OP3(OSub, 2, 0, 1), + OP1(ORet, 2), + }; + + test_alloc_function(c, 0, fn_type, 3, regs, 4, ops); + + int result; + long long (*fn)(void) = (long long(*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + long long ret = fn(); + if (ret != 42LL) { + fprintf(stderr, " Expected 42, got %lld\n", ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: Multiply 64-bit integers + */ +TEST(mul_i64) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + int ints[] = { 6, 7 }; + test_init_ints(c, 2, ints); + + hl_type *fn_type = test_alloc_fun_type(c, &c->types[T_I64], 0, NULL); + hl_type *regs[] = { &c->types[T_I64], &c->types[T_I64], &c->types[T_I64] }; + + hl_opcode ops[] = { + OP2(OInt, 0, 0), + OP2(OInt, 1, 1), + OP3(OMul, 2, 0, 1), + OP1(ORet, 2), + }; + + test_alloc_function(c, 0, fn_type, 3, regs, 4, ops); + + int result; + long long (*fn)(void) = (long long(*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + long long ret = fn(); + if (ret != 42LL) { + fprintf(stderr, " Expected 42, got %lld\n", ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: Divide 64-bit integers + */ +TEST(sdiv_i64) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + int ints[] = { 84, 2 }; + test_init_ints(c, 2, ints); + + hl_type *fn_type = test_alloc_fun_type(c, &c->types[T_I64], 0, NULL); + hl_type *regs[] = { &c->types[T_I64], &c->types[T_I64], &c->types[T_I64] }; + + hl_opcode ops[] = { + OP2(OInt, 0, 0), + OP2(OInt, 1, 1), + OP3(OSDiv, 2, 0, 1), + OP1(ORet, 2), + }; + + test_alloc_function(c, 0, fn_type, 3, regs, 4, ops); + + int result; + long long (*fn)(void) = (long long(*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + long long ret = fn(); + if (ret != 42LL) { + fprintf(stderr, " Expected 42, got %lld\n", ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: Modulo 64-bit integers + */ +TEST(smod_i64) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + int ints[] = { 142, 100 }; + test_init_ints(c, 2, ints); + + hl_type *fn_type = test_alloc_fun_type(c, &c->types[T_I64], 0, NULL); + hl_type *regs[] = { &c->types[T_I64], &c->types[T_I64], &c->types[T_I64] }; + + hl_opcode ops[] = { + OP2(OInt, 0, 0), + OP2(OInt, 1, 1), + OP3(OSMod, 2, 0, 1), + OP1(ORet, 2), + }; + + test_alloc_function(c, 0, fn_type, 3, regs, 4, ops); + + int result; + long long (*fn)(void) = (long long(*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + long long ret = fn(); + if (ret != 42LL) { + fprintf(stderr, " Expected 42, got %lld\n", ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: Negate 64-bit integer + */ +TEST(neg_i64) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + int ints[] = { -42 }; + test_init_ints(c, 1, ints); + + hl_type *fn_type = test_alloc_fun_type(c, &c->types[T_I64], 0, NULL); + hl_type *regs[] = { &c->types[T_I64], &c->types[T_I64] }; + + hl_opcode ops[] = { + OP2(OInt, 0, 0), + OP2(ONeg, 1, 0), + OP1(ORet, 1), + }; + + test_alloc_function(c, 0, fn_type, 2, regs, 3, ops); + + int result; + long long (*fn)(void) = (long long(*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + long long ret = fn(); + if (ret != 42LL) { + fprintf(stderr, " Expected 42, got %lld\n", ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: Bitwise AND 64-bit + */ +TEST(and_i64) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + int ints[] = { 0xFF, 0x2A }; + test_init_ints(c, 2, ints); + + hl_type *fn_type = test_alloc_fun_type(c, &c->types[T_I64], 0, NULL); + hl_type *regs[] = { &c->types[T_I64], &c->types[T_I64], &c->types[T_I64] }; + + hl_opcode ops[] = { + OP2(OInt, 0, 0), + OP2(OInt, 1, 1), + OP3(OAnd, 2, 0, 1), + OP1(ORet, 2), + }; + + test_alloc_function(c, 0, fn_type, 3, regs, 4, ops); + + int result; + long long (*fn)(void) = (long long(*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + long long ret = fn(); + if (ret != 42LL) { + fprintf(stderr, " Expected 42, got %lld\n", ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: Bitwise OR 64-bit + */ +TEST(or_i64) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + int ints[] = { 0x20, 0x0A }; + test_init_ints(c, 2, ints); + + hl_type *fn_type = test_alloc_fun_type(c, &c->types[T_I64], 0, NULL); + hl_type *regs[] = { &c->types[T_I64], &c->types[T_I64], &c->types[T_I64] }; + + hl_opcode ops[] = { + OP2(OInt, 0, 0), + OP2(OInt, 1, 1), + OP3(OOr, 2, 0, 1), + OP1(ORet, 2), + }; + + test_alloc_function(c, 0, fn_type, 3, regs, 4, ops); + + int result; + long long (*fn)(void) = (long long(*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + long long ret = fn(); + if (ret != 42LL) { + fprintf(stderr, " Expected 42, got %lld\n", ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: Left shift 64-bit: 21 << 1 = 42 + */ +TEST(shl_i64) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + int ints[] = { 21, 1 }; + test_init_ints(c, 2, ints); + + hl_type *fn_type = test_alloc_fun_type(c, &c->types[T_I64], 0, NULL); + hl_type *regs[] = { &c->types[T_I64], &c->types[T_I64], &c->types[T_I64] }; + + hl_opcode ops[] = { + OP2(OInt, 0, 0), + OP2(OInt, 1, 1), + OP3(OShl, 2, 0, 1), + OP1(ORet, 2), + }; + + test_alloc_function(c, 0, fn_type, 3, regs, 4, ops); + + int result; + long long (*fn)(void) = (long long(*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + long long ret = fn(); + if (ret != 42LL) { + fprintf(stderr, " Expected 42, got %lld\n", ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: Large shift - beyond 32 bits + * 1 << 40 = 0x10000000000 (1099511627776) + */ +TEST(shl_i64_large) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + int ints[] = { 1, 40 }; + test_init_ints(c, 2, ints); + + hl_type *fn_type = test_alloc_fun_type(c, &c->types[T_I64], 0, NULL); + hl_type *regs[] = { &c->types[T_I64], &c->types[T_I64], &c->types[T_I64] }; + + hl_opcode ops[] = { + OP2(OInt, 0, 0), + OP2(OInt, 1, 1), + OP3(OShl, 2, 0, 1), + OP1(ORet, 2), + }; + + test_alloc_function(c, 0, fn_type, 3, regs, 4, ops); + + int result; + long long (*fn)(void) = (long long(*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + long long ret = fn(); + long long expected = 1LL << 40; + if (ret != expected) { + fprintf(stderr, " Expected %lld, got %lld\n", expected, ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: Move i64 register + */ +TEST(mov_i64) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + int ints[] = { 42 }; + test_init_ints(c, 1, ints); + + hl_type *fn_type = test_alloc_fun_type(c, &c->types[T_I64], 0, NULL); + hl_type *regs[] = { &c->types[T_I64], &c->types[T_I64] }; + + hl_opcode ops[] = { + OP2(OInt, 0, 0), + OP2(OMov, 1, 0), + OP1(ORet, 1), + }; + + test_alloc_function(c, 0, fn_type, 2, regs, 3, ops); + + int result; + long long (*fn)(void) = (long long(*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + long long ret = fn(); + if (ret != 42LL) { + fprintf(stderr, " Expected 42, got %lld\n", ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: Increment i64 + */ +TEST(incr_i64) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + int ints[] = { 41 }; + test_init_ints(c, 1, ints); + + hl_type *fn_type = test_alloc_fun_type(c, &c->types[T_I64], 0, NULL); + hl_type *regs[] = { &c->types[T_I64] }; + + hl_opcode ops[] = { + OP2(OInt, 0, 0), + OP1(OIncr, 0), + OP1(ORet, 0), + }; + + test_alloc_function(c, 0, fn_type, 1, regs, 3, ops); + + int result; + long long (*fn)(void) = (long long(*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + long long ret = fn(); + if (ret != 42LL) { + fprintf(stderr, " Expected 42, got %lld\n", ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: Decrement i64 + */ +TEST(decr_i64) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + int ints[] = { 43 }; + test_init_ints(c, 1, ints); + + hl_type *fn_type = test_alloc_fun_type(c, &c->types[T_I64], 0, NULL); + hl_type *regs[] = { &c->types[T_I64] }; + + hl_opcode ops[] = { + OP2(OInt, 0, 0), + OP1(ODecr, 0), + OP1(ORet, 0), + }; + + test_alloc_function(c, 0, fn_type, 1, regs, 3, ops); + + int result; + long long (*fn)(void) = (long long(*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + long long ret = fn(); + if (ret != 42LL) { + fprintf(stderr, " Expected 42, got %lld\n", ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* Test list */ +static test_entry_t tests[] = { + TEST_ENTRY(return_i64_constant), + TEST_ENTRY(add_i64), + TEST_ENTRY(sub_i64), + TEST_ENTRY(mul_i64), + TEST_ENTRY(sdiv_i64), + TEST_ENTRY(smod_i64), + TEST_ENTRY(neg_i64), + TEST_ENTRY(and_i64), + TEST_ENTRY(or_i64), + TEST_ENTRY(shl_i64), + TEST_ENTRY(shl_i64_large), + TEST_ENTRY(mov_i64), + TEST_ENTRY(incr_i64), + TEST_ENTRY(decr_i64), +}; + +int main(int argc, char **argv) { + printf("HashLink AArch64 JIT - 64-bit Integer Operations Tests\n"); + return run_tests(tests, sizeof(tests) / sizeof(tests[0])); +} diff --git a/other/tests/minimal/test_instance_closure.c b/other/tests/minimal/test_instance_closure.c new file mode 100644 index 000000000..d9dd59bca --- /dev/null +++ b/other/tests/minimal/test_instance_closure.c @@ -0,0 +1,390 @@ +/* + * Test instance and virtual closure operations for HashLink AArch64 JIT + * + * Tests: OInstanceClosure, OVirtualClosure, OCallClosure with captured values + * + * OInstanceClosure creates a closure that captures a value (typically 'this'). + * OVirtualClosure creates a closure from a virtual method lookup. + */ +#include "test_harness.h" + +/* + * Test: OInstanceClosure with captured i32 value + * + * fn0: (i32) -> i32 { return arg; } // The captured value becomes the arg + * fn1: () -> i32 { + * r0 = 42 + * r1 = instance_closure(fn0, r0) ; OInstanceClosure with captured value + * r2 = call_closure(r1) ; OCallClosure with 0 explicit args + * return r2 + * } + * + * When called, the closure passes the captured value (42) as the first argument. + */ +TEST(instance_closure_basic) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + int ints[] = { 42 }; + test_init_ints(c, 1, ints); + + /* Function types */ + hl_type *arg_types[] = { &c->types[T_I32] }; + hl_type *fn_type_i32_i32 = test_alloc_fun_type(c, &c->types[T_I32], 1, arg_types); + hl_type *fn_type_void_i32 = test_alloc_fun_type(c, &c->types[T_I32], 0, NULL); + + c->functions = (hl_function*)calloc(MAX_FUNCTIONS, sizeof(hl_function)); + c->nfunctions = 0; + + /* fn0: findex=0, returns its argument */ + { + hl_type *regs[] = { &c->types[T_I32] }; + hl_opcode ops[] = { + OP1(ORet, 0), /* return r0 (the captured value passed as arg) */ + }; + hl_function *f = &c->functions[c->nfunctions++]; + f->findex = 0; + f->type = fn_type_i32_i32; + f->nregs = 1; + f->nops = 1; + f->regs = (hl_type**)malloc(sizeof(hl_type*) * 1); + memcpy(f->regs, regs, sizeof(regs)); + f->ops = (hl_opcode*)malloc(sizeof(hl_opcode) * 1); + memcpy(f->ops, ops, sizeof(ops)); + } + + /* fn1: findex=1, creates instance closure and calls it */ + { + /* r0 = captured value, r1 = closure, r2 = result */ + hl_type *regs[] = { &c->types[T_I32], fn_type_i32_i32, &c->types[T_I32] }; + + hl_opcode ops[] = { + OP2(OInt, 0, 0), /* r0 = 42 (captured value) */ + OP3(OInstanceClosure, 1, 0, 0), /* r1 = closure(fn0, r0) */ + {OCallClosure, 2, 1, 0, NULL}, /* r2 = call_closure(r1) with 0 explicit args */ + OP1(ORet, 2), + }; + + hl_function *f = &c->functions[c->nfunctions++]; + f->findex = 1; + f->type = fn_type_void_i32; + f->nregs = 3; + f->nops = 4; + f->regs = (hl_type**)malloc(sizeof(hl_type*) * 3); + memcpy(f->regs, regs, sizeof(regs)); + f->ops = (hl_opcode*)malloc(sizeof(hl_opcode) * 4); + memcpy(f->ops, ops, sizeof(ops)); + } + + c->entrypoint = 1; + + int result; + int (*fn)(void) = (int(*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + int ret = fn(); + if (ret != 42) { + fprintf(stderr, " Expected 42, got %d\n", ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: OInstanceClosure with captured value and additional arguments + * + * fn0: (i32, i32) -> i32 { return arg0 + arg1; } + * fn1: () -> i32 { + * r0 = 10 ; value to capture + * r1 = instance_closure(fn0, r0) ; closure captures 10 + * r2 = 32 + * r3 = call_closure(r1, r2) ; calls fn0(10, 32) = 42 + * return r3 + * } + */ +TEST(instance_closure_with_arg) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + int ints[] = { 10, 32 }; + test_init_ints(c, 2, ints); + + /* Function types */ + hl_type *two_args[] = { &c->types[T_I32], &c->types[T_I32] }; + hl_type *fn_type_i32_i32_i32 = test_alloc_fun_type(c, &c->types[T_I32], 2, two_args); + hl_type *fn_type_void_i32 = test_alloc_fun_type(c, &c->types[T_I32], 0, NULL); + + /* For the closure type: when called with 1 arg, passes captured + arg */ + hl_type *one_arg[] = { &c->types[T_I32] }; + hl_type *fn_type_i32_i32 = test_alloc_fun_type(c, &c->types[T_I32], 1, one_arg); + + c->functions = (hl_function*)calloc(MAX_FUNCTIONS, sizeof(hl_function)); + c->nfunctions = 0; + + /* fn0: findex=0, returns arg0 + arg1 */ + { + hl_type *regs[] = { &c->types[T_I32], &c->types[T_I32], &c->types[T_I32] }; + hl_opcode ops[] = { + OP3(OAdd, 2, 0, 1), /* r2 = r0 + r1 */ + OP1(ORet, 2), + }; + hl_function *f = &c->functions[c->nfunctions++]; + f->findex = 0; + f->type = fn_type_i32_i32_i32; + f->nregs = 3; + f->nops = 2; + f->regs = (hl_type**)malloc(sizeof(hl_type*) * 3); + memcpy(f->regs, regs, sizeof(regs)); + f->ops = (hl_opcode*)malloc(sizeof(hl_opcode) * 2); + memcpy(f->ops, ops, sizeof(ops)); + } + + /* fn1: findex=1, creates instance closure and calls with additional arg */ + { + /* r0 = captured value, r1 = closure, r2 = additional arg, r3 = result */ + hl_type *regs[] = { &c->types[T_I32], fn_type_i32_i32, &c->types[T_I32], &c->types[T_I32] }; + + static int extra[] = { 2 }; /* r2 is the additional argument */ + hl_opcode ops[] = { + OP2(OInt, 0, 0), /* r0 = 10 (captured value) */ + OP3(OInstanceClosure, 1, 0, 0), /* r1 = closure(fn0, r0) */ + OP2(OInt, 2, 1), /* r2 = 32 */ + {OCallClosure, 3, 1, 1, extra}, /* r3 = call_closure(r1, r2) -> fn0(10, 32) */ + OP1(ORet, 3), + }; + + hl_function *f = &c->functions[c->nfunctions++]; + f->findex = 1; + f->type = fn_type_void_i32; + f->nregs = 4; + f->nops = 5; + f->regs = (hl_type**)malloc(sizeof(hl_type*) * 4); + memcpy(f->regs, regs, sizeof(regs)); + f->ops = (hl_opcode*)malloc(sizeof(hl_opcode) * 5); + memcpy(f->ops, ops, sizeof(ops)); + } + + c->entrypoint = 1; + + int result; + int (*fn)(void) = (int(*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + int ret = fn(); + if (ret != 42) { + fprintf(stderr, " Expected 42, got %d\n", ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: OInstanceClosure used in a loop pattern + * + * This tests that closures work correctly when called multiple times, + * similar to how they're used in event handlers. + * + * fn0: (i32, i32) -> i32 { return arg0 + arg1; } + * fn1: () -> i32 { + * r0 = 0 ; accumulator + * r1 = instance_closure(fn0, r0) ; closure captures accumulator reference + * // Call closure 3 times with different values + * r2 = 10 + * r3 = call_closure(r1, r2) ; 0 + 10 = 10 + * r4 = 20 + * r5 = call_closure(r1, r4) ; 0 + 20 = 20 + * r6 = r3 + r5 ; 10 + 20 = 30 + * return r6 + * } + */ +TEST(instance_closure_multiple_calls) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + int ints[] = { 0, 10, 20 }; + test_init_ints(c, 3, ints); + + /* Function types */ + hl_type *two_args[] = { &c->types[T_I32], &c->types[T_I32] }; + hl_type *fn_type_i32_i32_i32 = test_alloc_fun_type(c, &c->types[T_I32], 2, two_args); + hl_type *fn_type_void_i32 = test_alloc_fun_type(c, &c->types[T_I32], 0, NULL); + hl_type *one_arg[] = { &c->types[T_I32] }; + hl_type *fn_type_i32_i32 = test_alloc_fun_type(c, &c->types[T_I32], 1, one_arg); + + c->functions = (hl_function*)calloc(MAX_FUNCTIONS, sizeof(hl_function)); + c->nfunctions = 0; + + /* fn0: findex=0, returns arg0 + arg1 */ + { + hl_type *regs[] = { &c->types[T_I32], &c->types[T_I32], &c->types[T_I32] }; + hl_opcode ops[] = { + OP3(OAdd, 2, 0, 1), + OP1(ORet, 2), + }; + hl_function *f = &c->functions[c->nfunctions++]; + f->findex = 0; + f->type = fn_type_i32_i32_i32; + f->nregs = 3; + f->nops = 2; + f->regs = (hl_type**)malloc(sizeof(hl_type*) * 3); + memcpy(f->regs, regs, sizeof(regs)); + f->ops = (hl_opcode*)malloc(sizeof(hl_opcode) * 2); + memcpy(f->ops, ops, sizeof(ops)); + } + + /* fn1: findex=1, creates closure and calls it multiple times */ + { + /* + * r0 = captured base value (0) + * r1 = closure + * r2 = first arg (10) + * r3 = first result + * r4 = second arg (20) + * r5 = second result + * r6 = final sum + */ + hl_type *regs[] = { + &c->types[T_I32], /* r0 */ + fn_type_i32_i32, /* r1 */ + &c->types[T_I32], /* r2 */ + &c->types[T_I32], /* r3 */ + &c->types[T_I32], /* r4 */ + &c->types[T_I32], /* r5 */ + &c->types[T_I32], /* r6 */ + }; + + static int extra1[] = { 2 }; + static int extra2[] = { 4 }; + hl_opcode ops[] = { + OP2(OInt, 0, 0), /* r0 = 0 */ + OP3(OInstanceClosure, 1, 0, 0), /* r1 = closure(fn0, r0) */ + OP2(OInt, 2, 1), /* r2 = 10 */ + {OCallClosure, 3, 1, 1, extra1},/* r3 = closure(10) = 0 + 10 = 10 */ + OP2(OInt, 4, 2), /* r4 = 20 */ + {OCallClosure, 5, 1, 1, extra2},/* r5 = closure(20) = 0 + 20 = 20 */ + OP3(OAdd, 6, 3, 5), /* r6 = r3 + r5 = 30 */ + OP1(ORet, 6), + }; + + hl_function *f = &c->functions[c->nfunctions++]; + f->findex = 1; + f->type = fn_type_void_i32; + f->nregs = 7; + f->nops = 8; + f->regs = (hl_type**)malloc(sizeof(hl_type*) * 7); + memcpy(f->regs, regs, sizeof(regs)); + f->ops = (hl_opcode*)malloc(sizeof(hl_opcode) * 8); + memcpy(f->ops, ops, sizeof(ops)); + } + + c->entrypoint = 1; + + int result; + int (*fn)(void) = (int(*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + int ret = fn(); + if (ret != 30) { + fprintf(stderr, " Expected 30, got %d\n", ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: OInstanceClosure with i64 captured value + * + * This tests that pointer-sized captured values work correctly. + */ +TEST(instance_closure_i64) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + int ints[] = { 42 }; + test_init_ints(c, 1, ints); + + /* Function types with i64 */ + hl_type *arg_types[] = { &c->types[T_I64] }; + hl_type *fn_type_i64_i64 = test_alloc_fun_type(c, &c->types[T_I64], 1, arg_types); + hl_type *fn_type_void_i64 = test_alloc_fun_type(c, &c->types[T_I64], 0, NULL); + + c->functions = (hl_function*)calloc(MAX_FUNCTIONS, sizeof(hl_function)); + c->nfunctions = 0; + + /* fn0: findex=0, returns its argument */ + { + hl_type *regs[] = { &c->types[T_I64] }; + hl_opcode ops[] = { + OP1(ORet, 0), + }; + hl_function *f = &c->functions[c->nfunctions++]; + f->findex = 0; + f->type = fn_type_i64_i64; + f->nregs = 1; + f->nops = 1; + f->regs = (hl_type**)malloc(sizeof(hl_type*) * 1); + memcpy(f->regs, regs, sizeof(regs)); + f->ops = (hl_opcode*)malloc(sizeof(hl_opcode) * 1); + memcpy(f->ops, ops, sizeof(ops)); + } + + /* fn1: findex=1, creates instance closure with i64 */ + { + hl_type *regs[] = { &c->types[T_I64], fn_type_i64_i64, &c->types[T_I64] }; + + hl_opcode ops[] = { + OP2(OInt, 0, 0), /* r0 = 42 (will be i64) */ + OP3(OInstanceClosure, 1, 0, 0), /* r1 = closure(fn0, r0) */ + {OCallClosure, 2, 1, 0, NULL}, /* r2 = call_closure(r1) */ + OP1(ORet, 2), + }; + + hl_function *f = &c->functions[c->nfunctions++]; + f->findex = 1; + f->type = fn_type_void_i64; + f->nregs = 3; + f->nops = 4; + f->regs = (hl_type**)malloc(sizeof(hl_type*) * 3); + memcpy(f->regs, regs, sizeof(regs)); + f->ops = (hl_opcode*)malloc(sizeof(hl_opcode) * 4); + memcpy(f->ops, ops, sizeof(ops)); + } + + c->entrypoint = 1; + + int result; + int64_t (*fn)(void) = (int64_t(*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + int64_t ret = fn(); + if (ret != 42) { + fprintf(stderr, " Expected 42, got %ld\n", (long)ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* Test list */ +static test_entry_t tests[] = { + TEST_ENTRY(instance_closure_basic), + TEST_ENTRY(instance_closure_with_arg), + TEST_ENTRY(instance_closure_multiple_calls), + TEST_ENTRY(instance_closure_i64), +}; + +int main(int argc, char **argv) { + printf("HashLink AArch64 JIT - Instance Closure Tests\n"); + return run_tests(tests, sizeof(tests) / sizeof(tests[0])); +} diff --git a/other/tests/minimal/test_int_ops.c b/other/tests/minimal/test_int_ops.c new file mode 100644 index 000000000..51afbb186 --- /dev/null +++ b/other/tests/minimal/test_int_ops.c @@ -0,0 +1,622 @@ +/* + * Test integer operations for HashLink AArch64 JIT + * + * Tests: OInt, OMov, OAdd, OSub, OMul, ORet + */ +#include "test_harness.h" + +/* + * Test: Return constant integer 42 + * + * function test() -> i32: + * r0 = 42 + * ret r0 + */ +TEST(return_int_constant) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + /* Integer pool: [42] */ + int ints[] = { 42 }; + test_init_ints(c, 1, ints); + + /* Function type: () -> i32 */ + hl_type *fn_type = test_alloc_fun_type(c, &c->types[T_I32], 0, NULL); + + /* Registers: r0:i32 */ + hl_type *regs[] = { &c->types[T_I32] }; + + /* Opcodes: + * OInt r0, $0 ; r0 = ints[0] = 42 + * ORet r0 ; return r0 + */ + hl_opcode ops[] = { + OP2(OInt, 0, 0), /* r0 = ints[0] */ + OP1(ORet, 0), /* return r0 */ + }; + + test_alloc_function(c, 0, fn_type, 1, regs, 2, ops); + + int result; + int (*fn)(void) = (int(*)(void))test_jit_compile(c, &result); + + if (result != TEST_PASS) return result; + + int ret = fn(); + if (ret != 42) { + fprintf(stderr, " Expected 42, got %d\n", ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: Add two constants: 10 + 32 = 42 + * + * function test() -> i32: + * r0 = 10 + * r1 = 32 + * r2 = r0 + r1 + * ret r2 + */ +TEST(add_int_constants) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + /* Integer pool: [10, 32] */ + int ints[] = { 10, 32 }; + test_init_ints(c, 2, ints); + + /* Function type: () -> i32 */ + hl_type *fn_type = test_alloc_fun_type(c, &c->types[T_I32], 0, NULL); + + /* Registers: r0:i32, r1:i32, r2:i32 */ + hl_type *regs[] = { &c->types[T_I32], &c->types[T_I32], &c->types[T_I32] }; + + /* Opcodes: + * OInt r0, $0 ; r0 = 10 + * OInt r1, $1 ; r1 = 32 + * OAdd r2, r0, r1 ; r2 = r0 + r1 + * ORet r2 ; return r2 + */ + hl_opcode ops[] = { + OP2(OInt, 0, 0), /* r0 = ints[0] = 10 */ + OP2(OInt, 1, 1), /* r1 = ints[1] = 32 */ + OP3(OAdd, 2, 0, 1), /* r2 = r0 + r1 */ + OP1(ORet, 2), /* return r2 */ + }; + + test_alloc_function(c, 0, fn_type, 3, regs, 4, ops); + + int result; + int (*fn)(void) = (int(*)(void))test_jit_compile(c, &result); + + if (result != TEST_PASS) return result; + + int ret = fn(); + if (ret != 42) { + fprintf(stderr, " Expected 42, got %d\n", ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: Subtract: 100 - 58 = 42 + */ +TEST(sub_int_constants) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + int ints[] = { 100, 58 }; + test_init_ints(c, 2, ints); + + hl_type *fn_type = test_alloc_fun_type(c, &c->types[T_I32], 0, NULL); + hl_type *regs[] = { &c->types[T_I32], &c->types[T_I32], &c->types[T_I32] }; + + hl_opcode ops[] = { + OP2(OInt, 0, 0), /* r0 = 100 */ + OP2(OInt, 1, 1), /* r1 = 58 */ + OP3(OSub, 2, 0, 1), /* r2 = r0 - r1 */ + OP1(ORet, 2), /* return r2 */ + }; + + test_alloc_function(c, 0, fn_type, 3, regs, 4, ops); + + int result; + int (*fn)(void) = (int(*)(void))test_jit_compile(c, &result); + + if (result != TEST_PASS) return result; + + int ret = fn(); + if (ret != 42) { + fprintf(stderr, " Expected 42, got %d\n", ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: Multiply: 6 * 7 = 42 + */ +TEST(mul_int_constants) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + int ints[] = { 6, 7 }; + test_init_ints(c, 2, ints); + + hl_type *fn_type = test_alloc_fun_type(c, &c->types[T_I32], 0, NULL); + hl_type *regs[] = { &c->types[T_I32], &c->types[T_I32], &c->types[T_I32] }; + + hl_opcode ops[] = { + OP2(OInt, 0, 0), /* r0 = 6 */ + OP2(OInt, 1, 1), /* r1 = 7 */ + OP3(OMul, 2, 0, 1), /* r2 = r0 * r1 */ + OP1(ORet, 2), /* return r2 */ + }; + + test_alloc_function(c, 0, fn_type, 3, regs, 4, ops); + + int result; + int (*fn)(void) = (int(*)(void))test_jit_compile(c, &result); + + if (result != TEST_PASS) return result; + + int ret = fn(); + if (ret != 42) { + fprintf(stderr, " Expected 42, got %d\n", ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: Move register: r1 = r0 + */ +TEST(mov_register) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + int ints[] = { 42 }; + test_init_ints(c, 1, ints); + + hl_type *fn_type = test_alloc_fun_type(c, &c->types[T_I32], 0, NULL); + hl_type *regs[] = { &c->types[T_I32], &c->types[T_I32] }; + + hl_opcode ops[] = { + OP2(OInt, 0, 0), /* r0 = 42 */ + OP2(OMov, 1, 0), /* r1 = r0 */ + OP1(ORet, 1), /* return r1 */ + }; + + test_alloc_function(c, 0, fn_type, 2, regs, 3, ops); + + int result; + int (*fn)(void) = (int(*)(void))test_jit_compile(c, &result); + + if (result != TEST_PASS) return result; + + int ret = fn(); + if (ret != 42) { + fprintf(stderr, " Expected 42, got %d\n", ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: Signed division: 84 / 2 = 42 + */ +TEST(sdiv_int_constants) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + int ints[] = { 84, 2 }; + test_init_ints(c, 2, ints); + + hl_type *fn_type = test_alloc_fun_type(c, &c->types[T_I32], 0, NULL); + hl_type *regs[] = { &c->types[T_I32], &c->types[T_I32], &c->types[T_I32] }; + + hl_opcode ops[] = { + OP2(OInt, 0, 0), /* r0 = 84 */ + OP2(OInt, 1, 1), /* r1 = 2 */ + OP3(OSDiv, 2, 0, 1), /* r2 = r0 / r1 */ + OP1(ORet, 2), /* return r2 */ + }; + + test_alloc_function(c, 0, fn_type, 3, regs, 4, ops); + + int result; + int (*fn)(void) = (int(*)(void))test_jit_compile(c, &result); + + if (result != TEST_PASS) return result; + + int ret = fn(); + if (ret != 42) { + fprintf(stderr, " Expected 42, got %d\n", ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: Signed modulo: 142 % 100 = 42 + */ +TEST(smod_int_constants) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + int ints[] = { 142, 100 }; + test_init_ints(c, 2, ints); + + hl_type *fn_type = test_alloc_fun_type(c, &c->types[T_I32], 0, NULL); + hl_type *regs[] = { &c->types[T_I32], &c->types[T_I32], &c->types[T_I32] }; + + hl_opcode ops[] = { + OP2(OInt, 0, 0), /* r0 = 142 */ + OP2(OInt, 1, 1), /* r1 = 100 */ + OP3(OSMod, 2, 0, 1), /* r2 = r0 % r1 */ + OP1(ORet, 2), /* return r2 */ + }; + + test_alloc_function(c, 0, fn_type, 3, regs, 4, ops); + + int result; + int (*fn)(void) = (int(*)(void))test_jit_compile(c, &result); + + if (result != TEST_PASS) return result; + + int ret = fn(); + if (ret != 42) { + fprintf(stderr, " Expected 42, got %d\n", ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: Bitwise AND: 0xFF & 0x2A = 42 + */ +TEST(and_int_constants) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + int ints[] = { 0xFF, 0x2A }; /* 255 & 42 = 42 */ + test_init_ints(c, 2, ints); + + hl_type *fn_type = test_alloc_fun_type(c, &c->types[T_I32], 0, NULL); + hl_type *regs[] = { &c->types[T_I32], &c->types[T_I32], &c->types[T_I32] }; + + hl_opcode ops[] = { + OP2(OInt, 0, 0), /* r0 = 0xFF */ + OP2(OInt, 1, 1), /* r1 = 0x2A */ + OP3(OAnd, 2, 0, 1), /* r2 = r0 & r1 */ + OP1(ORet, 2), /* return r2 */ + }; + + test_alloc_function(c, 0, fn_type, 3, regs, 4, ops); + + int result; + int (*fn)(void) = (int(*)(void))test_jit_compile(c, &result); + + if (result != TEST_PASS) return result; + + int ret = fn(); + if (ret != 42) { + fprintf(stderr, " Expected 42, got %d\n", ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: Bitwise OR: 0x20 | 0x0A = 42 + */ +TEST(or_int_constants) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + int ints[] = { 0x20, 0x0A }; /* 32 | 10 = 42 */ + test_init_ints(c, 2, ints); + + hl_type *fn_type = test_alloc_fun_type(c, &c->types[T_I32], 0, NULL); + hl_type *regs[] = { &c->types[T_I32], &c->types[T_I32], &c->types[T_I32] }; + + hl_opcode ops[] = { + OP2(OInt, 0, 0), + OP2(OInt, 1, 1), + OP3(OOr, 2, 0, 1), + OP1(ORet, 2), + }; + + test_alloc_function(c, 0, fn_type, 3, regs, 4, ops); + + int result; + int (*fn)(void) = (int(*)(void))test_jit_compile(c, &result); + + if (result != TEST_PASS) return result; + + int ret = fn(); + if (ret != 42) { + fprintf(stderr, " Expected 42, got %d\n", ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: Bitwise XOR: 0x55 ^ 0x7F = 42 + */ +TEST(xor_int_constants) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + int ints[] = { 0x55, 0x7F }; /* 85 ^ 127 = 42 */ + test_init_ints(c, 2, ints); + + hl_type *fn_type = test_alloc_fun_type(c, &c->types[T_I32], 0, NULL); + hl_type *regs[] = { &c->types[T_I32], &c->types[T_I32], &c->types[T_I32] }; + + hl_opcode ops[] = { + OP2(OInt, 0, 0), + OP2(OInt, 1, 1), + OP3(OXor, 2, 0, 1), + OP1(ORet, 2), + }; + + test_alloc_function(c, 0, fn_type, 3, regs, 4, ops); + + int result; + int (*fn)(void) = (int(*)(void))test_jit_compile(c, &result); + + if (result != TEST_PASS) return result; + + int ret = fn(); + if (ret != 42) { + fprintf(stderr, " Expected 42, got %d\n", ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: Left shift: 21 << 1 = 42 + */ +TEST(shl_int_constants) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + int ints[] = { 21, 1 }; + test_init_ints(c, 2, ints); + + hl_type *fn_type = test_alloc_fun_type(c, &c->types[T_I32], 0, NULL); + hl_type *regs[] = { &c->types[T_I32], &c->types[T_I32], &c->types[T_I32] }; + + hl_opcode ops[] = { + OP2(OInt, 0, 0), + OP2(OInt, 1, 1), + OP3(OShl, 2, 0, 1), + OP1(ORet, 2), + }; + + test_alloc_function(c, 0, fn_type, 3, regs, 4, ops); + + int result; + int (*fn)(void) = (int(*)(void))test_jit_compile(c, &result); + + if (result != TEST_PASS) return result; + + int ret = fn(); + if (ret != 42) { + fprintf(stderr, " Expected 42, got %d\n", ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: Signed right shift: 168 >> 2 = 42 + */ +TEST(sshr_int_constants) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + int ints[] = { 168, 2 }; + test_init_ints(c, 2, ints); + + hl_type *fn_type = test_alloc_fun_type(c, &c->types[T_I32], 0, NULL); + hl_type *regs[] = { &c->types[T_I32], &c->types[T_I32], &c->types[T_I32] }; + + hl_opcode ops[] = { + OP2(OInt, 0, 0), + OP2(OInt, 1, 1), + OP3(OSShr, 2, 0, 1), + OP1(ORet, 2), + }; + + test_alloc_function(c, 0, fn_type, 3, regs, 4, ops); + + int result; + int (*fn)(void) = (int(*)(void))test_jit_compile(c, &result); + + if (result != TEST_PASS) return result; + + int ret = fn(); + if (ret != 42) { + fprintf(stderr, " Expected 42, got %d\n", ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: Negate: -(-42) = 42 + */ +TEST(neg_int) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + int ints[] = { -42 }; + test_init_ints(c, 1, ints); + + hl_type *fn_type = test_alloc_fun_type(c, &c->types[T_I32], 0, NULL); + hl_type *regs[] = { &c->types[T_I32], &c->types[T_I32] }; + + hl_opcode ops[] = { + OP2(OInt, 0, 0), /* r0 = -42 */ + OP2(ONeg, 1, 0), /* r1 = -r0 = 42 */ + OP1(ORet, 1), + }; + + test_alloc_function(c, 0, fn_type, 2, regs, 3, ops); + + int result; + int (*fn)(void) = (int(*)(void))test_jit_compile(c, &result); + + if (result != TEST_PASS) return result; + + int ret = fn(); + if (ret != 42) { + fprintf(stderr, " Expected 42, got %d\n", ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: Increment: 41 + 1 = 42 + */ +TEST(incr_int) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + int ints[] = { 41 }; + test_init_ints(c, 1, ints); + + hl_type *fn_type = test_alloc_fun_type(c, &c->types[T_I32], 0, NULL); + hl_type *regs[] = { &c->types[T_I32] }; + + hl_opcode ops[] = { + OP2(OInt, 0, 0), /* r0 = 41 */ + OP1(OIncr, 0), /* r0++ */ + OP1(ORet, 0), + }; + + test_alloc_function(c, 0, fn_type, 1, regs, 3, ops); + + int result; + int (*fn)(void) = (int(*)(void))test_jit_compile(c, &result); + + if (result != TEST_PASS) return result; + + int ret = fn(); + if (ret != 42) { + fprintf(stderr, " Expected 42, got %d\n", ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: Decrement: 43 - 1 = 42 + */ +TEST(decr_int) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + int ints[] = { 43 }; + test_init_ints(c, 1, ints); + + hl_type *fn_type = test_alloc_fun_type(c, &c->types[T_I32], 0, NULL); + hl_type *regs[] = { &c->types[T_I32] }; + + hl_opcode ops[] = { + OP2(OInt, 0, 0), /* r0 = 43 */ + OP1(ODecr, 0), /* r0-- */ + OP1(ORet, 0), + }; + + test_alloc_function(c, 0, fn_type, 1, regs, 3, ops); + + int result; + int (*fn)(void) = (int(*)(void))test_jit_compile(c, &result); + + if (result != TEST_PASS) return result; + + int ret = fn(); + if (ret != 42) { + fprintf(stderr, " Expected 42, got %d\n", ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* Test list */ +static test_entry_t tests[] = { + TEST_ENTRY(return_int_constant), + TEST_ENTRY(add_int_constants), + TEST_ENTRY(sub_int_constants), + TEST_ENTRY(mul_int_constants), + TEST_ENTRY(mov_register), + TEST_ENTRY(sdiv_int_constants), + TEST_ENTRY(smod_int_constants), + TEST_ENTRY(and_int_constants), + TEST_ENTRY(or_int_constants), + TEST_ENTRY(xor_int_constants), + TEST_ENTRY(shl_int_constants), + TEST_ENTRY(sshr_int_constants), + TEST_ENTRY(neg_int), + TEST_ENTRY(incr_int), + TEST_ENTRY(decr_int), +}; + +int main(int argc, char **argv) { + printf("HashLink AArch64 JIT - Integer Operations Tests\n"); + return run_tests(tests, sizeof(tests) / sizeof(tests[0])); +} diff --git a/other/tests/minimal/test_jumps_unsigned.c b/other/tests/minimal/test_jumps_unsigned.c new file mode 100644 index 000000000..53abc1525 --- /dev/null +++ b/other/tests/minimal/test_jumps_unsigned.c @@ -0,0 +1,422 @@ +/* + * Test unsigned jump operations for HashLink AArch64 JIT + * + * Tests: OJULt, OJUGte, OJNotLt, OJNotGte, OJSGt + * + * These opcodes perform unsigned comparisons and conditional jumps. + * OJNotLt and OJNotGte are for NaN-aware float comparisons. + */ +#include "test_harness.h" + +/* + * Test: OJULt - unsigned less than + * + * Tests that -1 (0xFFFFFFFF) is NOT less than 1 when compared as unsigned. + * With signed comparison, -1 < 1 would be true. + */ +TEST(jult_basic) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + int ints[] = { -1, 1, 10, 20 }; /* -1 as unsigned is 0xFFFFFFFF */ + test_init_ints(c, 4, ints); + + hl_type *fn_type = test_alloc_fun_type(c, &c->types[T_I32], 0, NULL); + + hl_type *regs[] = { + &c->types[T_I32], /* r0 = large unsigned (0xFFFFFFFF) */ + &c->types[T_I32], /* r1 = small value (1) */ + &c->types[T_I32], /* r2 = result */ + }; + + /* + * if (0xFFFFFFFF types[T_I32], 0, NULL); + + hl_type *regs[] = { + &c->types[T_I32], + &c->types[T_I32], + &c->types[T_I32], + }; + + hl_opcode ops[] = { + OP2(OInt, 0, 0), /* r0 = 1, opcode 0 */ + OP2(OInt, 1, 1), /* r1 = 100, opcode 1 */ + OP3(OJULt, 0, 1, 3), /* if r0 =u 1 should be true (unsigned) + */ +TEST(jugte_basic) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + int ints[] = { -1, 1, 10, 20 }; + test_init_ints(c, 4, ints); + + hl_type *fn_type = test_alloc_fun_type(c, &c->types[T_I32], 0, NULL); + + hl_type *regs[] = { + &c->types[T_I32], + &c->types[T_I32], + &c->types[T_I32], + }; + + hl_opcode ops[] = { + OP2(OInt, 0, 0), /* r0 = -1 (0xFFFFFFFF), opcode 0 */ + OP2(OInt, 1, 1), /* r1 = 1, opcode 1 */ + OP3(OJUGte, 0, 1, 3), /* if r0 >=u r1 goto opcode 6, opcode 2 */ + OP2(OInt, 2, 3), /* r2 = 20 (false branch), opcode 3 */ + OP2(OJAlways, 2, 0), /* goto opcode 7, opcode 4 */ + OP0(OLabel), /* true branch target, opcode 5 */ + OP2(OInt, 2, 2), /* r2 = 10 (true branch), opcode 6 */ + OP0(OLabel), /* end (merge point), opcode 7 */ + OP1(ORet, 2), /* opcode 8 */ + }; + + test_alloc_function(c, 0, fn_type, 3, regs, 9, ops); + + int result; + int (*fn)(void) = (int(*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + int ret = fn(); + if (ret != 10) { + fprintf(stderr, " Expected 10 (true branch), got %d\n", ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: OJSGt - signed greater than + * + * Tests signed comparison: 1 > -1 should be true + */ +TEST(jsgt_basic) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + int ints[] = { 1, -1, 10, 20 }; + test_init_ints(c, 4, ints); + + hl_type *fn_type = test_alloc_fun_type(c, &c->types[T_I32], 0, NULL); + + hl_type *regs[] = { + &c->types[T_I32], + &c->types[T_I32], + &c->types[T_I32], + }; + + hl_opcode ops[] = { + OP2(OInt, 0, 0), /* r0 = 1, opcode 0 */ + OP2(OInt, 1, 1), /* r1 = -1, opcode 1 */ + OP3(OJSGt, 0, 1, 3), /* if r0 > r1 (signed) goto opcode 6, opcode 2 */ + OP2(OInt, 2, 3), /* r2 = 20 (false branch), opcode 3 */ + OP2(OJAlways, 2, 0), /* goto opcode 7, opcode 4 */ + OP0(OLabel), /* true branch target, opcode 5 */ + OP2(OInt, 2, 2), /* r2 = 10 (true branch), opcode 6 */ + OP0(OLabel), /* end (merge point), opcode 7 */ + OP1(ORet, 2), /* opcode 8 */ + }; + + test_alloc_function(c, 0, fn_type, 3, regs, 9, ops); + + int result; + int (*fn)(void) = (int(*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + int ret = fn(); + if (ret != 10) { + fprintf(stderr, " Expected 10 (true branch), got %d\n", ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: OJNotLt - "not less than" for NaN-aware float comparison + * + * For floats, NaN comparisons need special handling. + * OJNotLt: jumps if !(a < b), which includes NaN cases. + */ +TEST(jnotlt_float) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + double floats[] = { 2.0, 1.0 }; /* 2.0 is not less than 1.0 */ + test_init_floats(c, 2, floats); + + int ints[] = { 10, 20 }; + test_init_ints(c, 2, ints); + + hl_type *fn_type = test_alloc_fun_type(c, &c->types[T_I32], 0, NULL); + + hl_type *regs[] = { + &c->types[T_F64], + &c->types[T_F64], + &c->types[T_I32], + }; + + hl_opcode ops[] = { + OP2(OFloat, 0, 0), /* r0 = 2.0, opcode 0 */ + OP2(OFloat, 1, 1), /* r1 = 1.0, opcode 1 */ + OP3(OJNotLt, 0, 1, 3), /* if !(r0 < r1) goto opcode 6, opcode 2 */ + OP2(OInt, 2, 1), /* r2 = 20 (false: r0 < r1), opcode 3 */ + OP2(OJAlways, 2, 0), /* goto opcode 7, opcode 4 */ + OP0(OLabel), /* true branch target, opcode 5 */ + OP2(OInt, 2, 0), /* r2 = 10 (true: r0 >= r1 or NaN), opcode 6 */ + OP0(OLabel), /* end (merge point), opcode 7 */ + OP1(ORet, 2), /* opcode 8 */ + }; + + test_alloc_function(c, 0, fn_type, 3, regs, 9, ops); + + int result; + int (*fn)(void) = (int(*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + int ret = fn(); + /* 2.0 is NOT less than 1.0, so we should take the true branch */ + if (ret != 10) { + fprintf(stderr, " Expected 10 (not-less-than branch), got %d\n", ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: OJNotGte - "not greater than or equal" for NaN-aware comparison + */ +TEST(jnotgte_float) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + double floats[] = { 1.0, 2.0 }; /* 1.0 is not >= 2.0 */ + test_init_floats(c, 2, floats); + + int ints[] = { 10, 20 }; + test_init_ints(c, 2, ints); + + hl_type *fn_type = test_alloc_fun_type(c, &c->types[T_I32], 0, NULL); + + hl_type *regs[] = { + &c->types[T_F64], + &c->types[T_F64], + &c->types[T_I32], + }; + + hl_opcode ops[] = { + OP2(OFloat, 0, 0), /* r0 = 1.0, opcode 0 */ + OP2(OFloat, 1, 1), /* r1 = 2.0, opcode 1 */ + OP3(OJNotGte, 0, 1, 3), /* if !(r0 >= r1) goto opcode 6, opcode 2 */ + OP2(OInt, 2, 1), /* r2 = 20 (false: r0 >= r1), opcode 3 */ + OP2(OJAlways, 2, 0), /* goto opcode 7, opcode 4 */ + OP0(OLabel), /* true branch target, opcode 5 */ + OP2(OInt, 2, 0), /* r2 = 10 (true: r0 < r1 or NaN), opcode 6 */ + OP0(OLabel), /* end (merge point), opcode 7 */ + OP1(ORet, 2), /* opcode 8 */ + }; + + test_alloc_function(c, 0, fn_type, 3, regs, 9, ops); + + int result; + int (*fn)(void) = (int(*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + int ret = fn(); + /* 1.0 is NOT >= 2.0, so we should take the true branch */ + if (ret != 10) { + fprintf(stderr, " Expected 10 (not-gte branch), got %d\n", ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: Compare signed vs unsigned jump behavior + * + * -1 vs 1: + * Signed: -1 < 1 (true) + * Unsigned: 0xFFFFFFFF > 1 (true) + */ +TEST(signed_vs_unsigned) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + int ints[] = { -1, 1, 0 }; + test_init_ints(c, 3, ints); + + hl_type *fn_type = test_alloc_fun_type(c, &c->types[T_I32], 0, NULL); + + hl_type *regs[] = { + &c->types[T_I32], /* r0 = -1 */ + &c->types[T_I32], /* r1 = 1 */ + &c->types[T_I32], /* r2 = signed result */ + &c->types[T_I32], /* r3 = unsigned result */ + &c->types[T_I32], /* r4 = combined */ + }; + + /* + * Test signed: -1 < 1 (true) -> r2 = 1 + * Test unsigned: -1 r3 = 0 + * Return r2 * 10 + r3 = 10 + * + * Structure for each test: + * if (condition) goto set_value + * goto after_test + * OLabel (set_value target) + * set value = 1 + * OLabel (after_test / merge point) + */ + hl_opcode ops[] = { + OP2(OInt, 0, 0), /* r0 = -1, opcode 0 */ + OP2(OInt, 1, 1), /* r1 = 1, opcode 1 */ + OP2(OInt, 2, 2), /* r2 = 0 (default), opcode 2 */ + OP2(OInt, 3, 2), /* r3 = 0 (default), opcode 3 */ + /* Signed test: if -1 < 1 (true), set r2 = 1 */ + OP3(OJSLt, 0, 1, 2), /* if r0 < r1 goto opcode 7 (set_r2), opcode 4 */ + OP2(OJAlways, 2, 0), /* goto opcode 8 (after_signed), opcode 5 */ + OP0(OLabel), /* set_r2 target, opcode 6 */ + OP2(OInt, 2, 1), /* r2 = 1 (signed true), opcode 7 */ + OP0(OLabel), /* after_signed, opcode 8 */ + /* Unsigned test: if -1 1), so r3 = 0 + * Result: 1 * 10 + 0 = 10 + */ + if (ret != 10) { + fprintf(stderr, " Expected 10, got %d\n", ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* Test list */ +static test_entry_t tests[] = { + TEST_ENTRY(jult_basic), + TEST_ENTRY(jult_small_values), + TEST_ENTRY(jugte_basic), + TEST_ENTRY(jsgt_basic), + TEST_ENTRY(jnotlt_float), + TEST_ENTRY(jnotgte_float), + TEST_ENTRY(signed_vs_unsigned), +}; + +int main(int argc, char **argv) { + printf("HashLink AArch64 JIT - Unsigned Jump Tests\n"); + return run_tests(tests, sizeof(tests) / sizeof(tests[0])); +} diff --git a/other/tests/minimal/test_mdbg.c b/other/tests/minimal/test_mdbg.c new file mode 100644 index 000000000..e7d9ca37b --- /dev/null +++ b/other/tests/minimal/test_mdbg.c @@ -0,0 +1,562 @@ +/* + * Test ARM64 debugger (mdbg) code quality and bug detection + * + * These tests verify that known bugs in mdbg.c have been fixed. + * Tests will FAIL if bugs are present, PASS when fixed. + * + * Compile: cc -o test_mdbg test_mdbg.c -framework CoreFoundation -arch arm64 + * Run: ./test_mdbg + */ + +#ifdef __aarch64__ + +#include +#include +#include +#include +#include +#include + +/* Test result codes - matching test_harness.h */ +#define TEST_PASS 0 +#define TEST_FAIL 1 +#define TEST_SKIP 2 + +/* Colors for output - matching test_harness.h */ +#define GREEN "\033[32m" +#define RED "\033[31m" +#define YELLOW "\033[33m" +#define RESET "\033[0m" + +/* Test infrastructure */ +typedef int (*test_func_t)(void); + +typedef struct { + const char *name; + test_func_t func; +} test_entry_t; + +#define TEST(name) static int test_##name(void) +#define TEST_ENTRY(name) { #name, test_##name } + +static int run_tests(test_entry_t *tests, int count) { + int passed = 0, failed = 0, skipped = 0; + + printf("\n=== Running %d mdbg tests ===\n\n", count); + + for (int i = 0; i < count; i++) { + printf(" [%d/%d] %s ... ", i + 1, count, tests[i].name); + fflush(stdout); + + int result = tests[i].func(); + + switch (result) { + case TEST_PASS: + printf(GREEN "PASS" RESET "\n"); + passed++; + break; + case TEST_FAIL: + printf(RED "FAIL" RESET "\n"); + failed++; + break; + case TEST_SKIP: + printf(YELLOW "SKIP" RESET "\n"); + skipped++; + break; + } + } + + printf("\n=== Results: %d passed, %d failed, %d skipped ===\n\n", + passed, failed, skipped); + + return failed > 0 ? 1 : 0; +} + +/* Helper: Read file contents */ +static char* read_file(const char *path) { + FILE *f = fopen(path, "r"); + if (!f) return NULL; + + fseek(f, 0, SEEK_END); + long size = ftell(f); + fseek(f, 0, SEEK_SET); + + char *content = malloc(size + 1); + if (!content) { + fclose(f); + return NULL; + } + + fread(content, 1, size, f); + content[size] = '\0'; + fclose(f); + + return content; +} + +/* Helper: Check if pattern exists in content */ +static bool contains(const char *content, const char *pattern) { + return strstr(content, pattern) != NULL; +} + +/* Helper: Count occurrences of pattern */ +static int count_occurrences(const char *content, const char *pattern) { + int count = 0; + const char *p = content; + size_t len = strlen(pattern); + + while ((p = strstr(p, pattern)) != NULL) { + count++; + p += len; + } + return count; +} + +/* Path to mdbg.c - adjust if needed */ +#define MDBG_PATH "include/mdbg/mdbg.c" + +/* ============================================================ + * Bug #1: Missing semaphore_signal in EXC_BAD_ACCESS handler + * + * The EXC_BAD_ACCESS handler must call semaphore_signal() + * before returning, otherwise session_wait() will timeout. + * ============================================================ */ +TEST(bug1_exc_bad_access_signals_semaphore) { + char *content = read_file(MDBG_PATH); + if (!content) { + fprintf(stderr, " Cannot read %s\n", MDBG_PATH); + return TEST_SKIP; + } + + /* + * Look for the pattern in EXC_BAD_ACCESS handler: + * else if(exception == EXC_BAD_ACCESS) { + * ... + * semaphore_signal(sess->wait_sem); <-- MUST EXIST + * return KERN_SUCCESS; + * } + * + * We check that between "exception == EXC_BAD_ACCESS" and next "return KERN_SUCCESS" + * there is a semaphore_signal call. + */ + + char *bad_access = strstr(content, "exception == EXC_BAD_ACCESS"); + if (!bad_access) { + fprintf(stderr, " EXC_BAD_ACCESS handler not found\n"); + free(content); + return TEST_FAIL; + } + + /* Find the return statement after EXC_BAD_ACCESS */ + char *return_stmt = strstr(bad_access, "return KERN_SUCCESS"); + if (!return_stmt) { + fprintf(stderr, " return statement not found in handler\n"); + free(content); + return TEST_FAIL; + } + + /* Check if semaphore_signal exists between EXC_BAD_ACCESS and return */ + size_t range = return_stmt - bad_access; + char *handler_code = malloc(range + 1); + strncpy(handler_code, bad_access, range); + handler_code[range] = '\0'; + + bool has_signal = contains(handler_code, "semaphore_signal"); + free(handler_code); + free(content); + + if (!has_signal) { + fprintf(stderr, " MISSING: semaphore_signal() in EXC_BAD_ACCESS handler\n"); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* ============================================================ + * Bug #1b: Missing semaphore_signal in EXC_BAD_INSTRUCTION handler + * ============================================================ */ +TEST(bug1b_exc_bad_instruction_signals_semaphore) { + char *content = read_file(MDBG_PATH); + if (!content) { + fprintf(stderr, " Cannot read %s\n", MDBG_PATH); + return TEST_SKIP; + } + + char *bad_instr = strstr(content, "exception == EXC_BAD_INSTRUCTION"); + if (!bad_instr) { + fprintf(stderr, " EXC_BAD_INSTRUCTION handler not found\n"); + free(content); + return TEST_FAIL; + } + + char *return_stmt = strstr(bad_instr, "return KERN_SUCCESS"); + if (!return_stmt) { + fprintf(stderr, " return statement not found in handler\n"); + free(content); + return TEST_FAIL; + } + + size_t range = return_stmt - bad_instr; + char *handler_code = malloc(range + 1); + strncpy(handler_code, bad_instr, range); + handler_code[range] = '\0'; + + bool has_signal = contains(handler_code, "semaphore_signal"); + free(handler_code); + free(content); + + if (!has_signal) { + fprintf(stderr, " MISSING: semaphore_signal() in EXC_BAD_INSTRUCTION handler\n"); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* ============================================================ + * Bug #2: Memory leak in read_register + * + * get_thread_state() allocates memory that must be freed + * after extracting the register value. + * ============================================================ */ +TEST(bug2_read_register_frees_memory) { + char *content = read_file(MDBG_PATH); + if (!content) { + fprintf(stderr, " Cannot read %s\n", MDBG_PATH); + return TEST_SKIP; + } + + /* Find read_register function */ + char *func_start = strstr(content, "read_register(mach_port_t task"); + if (!func_start) { + fprintf(stderr, " read_register function not found\n"); + free(content); + return TEST_FAIL; + } + + /* Find end of function (next function or end marker) */ + char *func_end = strstr(func_start, "\nstatic kern_return_t write_register"); + if (!func_end) { + func_end = func_start + 500; /* Approximate */ + } + + size_t range = func_end - func_start; + char *func_code = malloc(range + 1); + strncpy(func_code, func_start, range); + func_code[range] = '\0'; + + /* Check for free() call after get_thread_state or get_debug_state */ + bool has_free = contains(func_code, "free(regs)") || + contains(func_code, "free(state)"); + + free(func_code); + free(content); + + if (!has_free) { + fprintf(stderr, " MISSING: free() call in read_register - memory leak!\n"); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* ============================================================ + * Bug #2b: Memory leak in write_register + * ============================================================ */ +TEST(bug2b_write_register_frees_memory) { + char *content = read_file(MDBG_PATH); + if (!content) { + fprintf(stderr, " Cannot read %s\n", MDBG_PATH); + return TEST_SKIP; + } + + char *func_start = strstr(content, "write_register(mach_port_t task"); + if (!func_start) { + fprintf(stderr, " write_register function not found\n"); + free(content); + return TEST_FAIL; + } + + char *func_end = strstr(func_start, "\n#pragma mark Memory"); + if (!func_end) { + func_end = func_start + 800; + } + + size_t range = func_end - func_start; + char *func_code = malloc(range + 1); + strncpy(func_code, func_start, range); + func_code[range] = '\0'; + + bool has_free = contains(func_code, "free(regs)") || + contains(func_code, "free(state)"); + + free(func_code); + free(content); + + if (!has_free) { + fprintf(stderr, " MISSING: free() call in write_register - memory leak!\n"); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* ============================================================ + * Bug #3: Incomplete debug register names + * + * get_register_name should handle REG_DR4-REG_DR7 since + * get_debug_reg handles them. + * ============================================================ */ +TEST(bug3_complete_debug_register_names) { + char *content = read_file(MDBG_PATH); + if (!content) { + fprintf(stderr, " Cannot read %s\n", MDBG_PATH); + return TEST_SKIP; + } + + /* Find get_register_name function */ + char *func_start = strstr(content, "get_register_name(int reg)"); + if (!func_start) { + fprintf(stderr, " get_register_name function not found\n"); + free(content); + return TEST_FAIL; + } + + char *func_end = strstr(func_start, "#pragma mark"); + if (!func_end) { + func_end = func_start + 1000; + } + + size_t range = func_end - func_start; + char *func_code = malloc(range + 1); + strncpy(func_code, func_start, range); + func_code[range] = '\0'; + + /* Check for REG_DR4, DR5, DR6, DR7 cases */ + bool has_dr4 = contains(func_code, "REG_DR4"); + bool has_dr5 = contains(func_code, "REG_DR5"); + bool has_dr6 = contains(func_code, "REG_DR6"); + bool has_dr7 = contains(func_code, "REG_DR7"); + + free(func_code); + free(content); + + if (!has_dr4 || !has_dr5 || !has_dr6 || !has_dr7) { + fprintf(stderr, " MISSING: REG_DR4-DR7 cases in get_register_name\n"); + fprintf(stderr, " DR4:%s DR5:%s DR6:%s DR7:%s\n", + has_dr4 ? "ok" : "MISSING", + has_dr5 ? "ok" : "MISSING", + has_dr6 ? "ok" : "MISSING", + has_dr7 ? "ok" : "MISSING"); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* ============================================================ + * Verification: ARM64 thread state structure size + * ============================================================ */ +TEST(verify_arm64_thread_state_size) { + size_t expected = 272; + size_t actual = sizeof(arm_thread_state64_t); + + if (actual != expected) { + fprintf(stderr, " Expected %zu bytes, got %zu\n", expected, actual); + return TEST_FAIL; + } + return TEST_PASS; +} + +/* ============================================================ + * Verification: ARM64 debug state structure size + * ============================================================ */ +TEST(verify_arm64_debug_state_size) { + size_t expected = 520; + size_t actual = sizeof(arm_debug_state64_t); + + if (actual != expected) { + fprintf(stderr, " Expected %zu bytes, got %zu\n", expected, actual); + return TEST_FAIL; + } + return TEST_PASS; +} + +/* ============================================================ + * Verification: CPSR is 32-bit requiring special handling + * ============================================================ */ +TEST(verify_cpsr_is_32bit) { + arm_thread_state64_t state; + if (sizeof(state.__cpsr) != 4) { + fprintf(stderr, " __cpsr should be 4 bytes, got %zu\n", + sizeof(state.__cpsr)); + return TEST_FAIL; + } + return TEST_PASS; +} + +/* ============================================================ + * Bug #4: ARM64 single-step requires MDSCR_EL1.SS, not CPSR TF + * + * On ARM64, single-stepping is enabled via MDSCR_EL1.SS (bit 0) + * which is accessed via debug registers (REG_DR6 -> __mdscr_el1). + * + * On x86, single-stepping uses EFLAGS.TF (bit 8). + * + * The Haxe debugger's singleStep() must use DR6 on ARM64, + * not EFlags which maps to CPSR (which has no trap flag). + * + * Expected behavior: + * - ARM64: Set/clear bit 0 of MDSCR_EL1 (via DR6) + * - x86: Set/clear bit 8 of EFLAGS + * ============================================================ */ +TEST(bug4_arm64_single_step_uses_mdscr_not_cpsr) { + char *content = read_file("hld/Debugger.hx"); + if (!content) { + /* Try alternate paths - from hashlink repo or test directory */ + content = read_file("../../../hashlink-debugger/hld/Debugger.hx"); + } + if (!content) { + /* Try absolute path for development */ + content = read_file("/Users/jameskim/Develop/hashlink-debugger/hld/Debugger.hx"); + } + if (!content) { + fprintf(stderr, " Cannot read Debugger.hx (expected in hashlink-debugger)\n"); + fprintf(stderr, " This test verifies ARM64 single-step implementation\n"); + return TEST_SKIP; + } + + /* Find singleStep function */ + char *func_start = strstr(content, "function singleStep"); + if (!func_start) { + fprintf(stderr, " singleStep function not found\n"); + free(content); + return TEST_FAIL; + } + + /* Find the next function (to limit search scope) */ + char *func_end = strstr(func_start + 20, "\n\tfunction "); + if (!func_end) { + func_end = func_start + 500; + } + + size_t range = func_end - func_start; + char *func_code = malloc(range + 1); + strncpy(func_code, func_start, range); + func_code[range] = '\0'; + + /* + * The singleStep function should: + * 1. Check isArm64 to determine which mechanism to use + * 2. For ARM64: Use DR6 (MDSCR_EL1) bit 0 + * 3. For x86: Use EFlags bit 8 (0x100) + * + * Current buggy code only handles x86: + * var r = getReg(tid, EFlags).toInt(); + * if( set ) r |= 256 else r &= ~256; // bit 8 = trap flag + * + * Fixed code should check isArm64 and use DR6 bit 0 for ARM64. + */ + + bool mentions_arm64 = contains(func_code, "isArm64") || + contains(func_code, "Arm64") || + contains(func_code, "arm64"); + bool mentions_dr6 = contains(func_code, "Dr6") || + contains(func_code, "DR6") || + contains(func_code, "MDSCR"); + + free(func_code); + free(content); + + if (!mentions_arm64) { + fprintf(stderr, " BUG: singleStep() does not check for ARM64!\n"); + fprintf(stderr, " ARM64 requires MDSCR_EL1.SS (bit 0) for single-step,\n"); + fprintf(stderr, " not CPSR/EFLAGS which has no trap flag on ARM64.\n"); + return TEST_FAIL; + } + + if (!mentions_dr6) { + fprintf(stderr, " WARNING: singleStep() mentions ARM64 but may not use DR6\n"); + fprintf(stderr, " ARM64 single-step requires DR6 (MDSCR_EL1) bit 0\n"); + /* Don't fail yet - might be handled differently */ + } + + return TEST_PASS; +} + +/* ============================================================ + * Verification: MDSCR_EL1 bit 0 is SS (Software Step) enable + * ============================================================ */ +TEST(verify_mdscr_ss_bit) { + /* + * ARM64 MDSCR_EL1 register layout: + * Bit 0: SS - Software Step enable + * When set, the processor generates a Software Step exception + * after executing the next instruction. + * + * Reference: ARM Architecture Reference Manual ARMv8-A + */ + int ss_bit_position = 0; /* Bit 0 */ + int ss_mask = 1 << ss_bit_position; /* 0x1 */ + + if (ss_mask != 1) { + fprintf(stderr, " SS bit mask should be 0x1 (bit 0)\n"); + return TEST_FAIL; + } + + /* x86 EFLAGS trap flag is bit 8 (0x100) - different from ARM64! */ + int x86_tf_bit = 8; + int x86_tf_mask = 1 << x86_tf_bit; /* 0x100 = 256 */ + + if (x86_tf_mask == ss_mask) { + fprintf(stderr, " x86 TF and ARM64 SS are at different bit positions!\n"); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* ============================================================ + * Main + * ============================================================ */ +int main(int argc, char *argv[]) { + (void)argc; + (void)argv; + + printf("mdbg ARM64 Bug Detection Tests\n"); + printf("================================\n"); + printf("Tests will FAIL if bugs are present, PASS when fixed.\n"); + + test_entry_t tests[] = { + /* Bug detection tests - should FAIL until fixed */ + TEST_ENTRY(bug1_exc_bad_access_signals_semaphore), + TEST_ENTRY(bug1b_exc_bad_instruction_signals_semaphore), + TEST_ENTRY(bug2_read_register_frees_memory), + TEST_ENTRY(bug2b_write_register_frees_memory), + TEST_ENTRY(bug3_complete_debug_register_names), + TEST_ENTRY(bug4_arm64_single_step_uses_mdscr_not_cpsr), + + /* Verification tests - should PASS */ + TEST_ENTRY(verify_arm64_thread_state_size), + TEST_ENTRY(verify_arm64_debug_state_size), + TEST_ENTRY(verify_cpsr_is_32bit), + TEST_ENTRY(verify_mdscr_ss_bit), + }; + + int count = sizeof(tests) / sizeof(tests[0]); + return run_tests(tests, count); +} + +#else /* !__aarch64__ */ + +#include + +int main(int argc, char *argv[]) { + (void)argc; + (void)argv; + printf("mdbg tests are only applicable to ARM64 architecture.\n"); + return 0; +} + +#endif /* __aarch64__ */ diff --git a/other/tests/minimal/test_memory_ops.c b/other/tests/minimal/test_memory_ops.c new file mode 100644 index 000000000..3975513f8 --- /dev/null +++ b/other/tests/minimal/test_memory_ops.c @@ -0,0 +1,448 @@ +/* + * Test memory operations for HashLink AArch64 JIT + * + * Tests: OGetI8, OGetI16, OGetMem, OSetI8, OSetI16, OSetMem + * + * These opcodes access memory at (base + offset) where offset is a register value. + * OGetI8/OGetI16/OGetMem: dst = *(type*)(base + offset) + * OSetI8/OSetI16/OSetMem: *(type*)(base + offset) = value + */ +#include "test_harness.h" + +/* Native function to allocate test buffer */ +static void *alloc_test_buffer(int size) { + void *buf = malloc(size); + memset(buf, 0, size); + return buf; +} + +/* Native function to free test buffer */ +static void free_test_buffer(void *buf) { + free(buf); +} + +/* + * Test: OSetI8 and OGetI8 - write and read byte values + * + * alloc buffer + * set_i8(buffer, 0, 0x42) + * set_i8(buffer, 1, 0x37) + * r0 = get_i8(buffer, 0) ; should be 0x42 = 66 + * r1 = get_i8(buffer, 1) ; should be 0x37 = 55 + * r2 = r0 + r1 ; 66 + 55 = 121 + * return r2 + */ +TEST(mem_i8_basic) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + int ints[] = { 64, 0, 1, 0x42, 0x37 }; /* size, offset0, offset1, val0, val1 */ + test_init_ints(c, 5, ints); + + /* Native: alloc_test_buffer(size) -> bytes */ + hl_type *alloc_args[] = { &c->types[T_I32] }; + hl_type *alloc_fn_type = test_alloc_fun_type(c, &c->types[T_BYTES], 1, alloc_args); + test_add_native(c, 1, "test", "alloc_buffer", alloc_fn_type, (void*)alloc_test_buffer); + + /* Function type: () -> i32 */ + hl_type *fn_type = test_alloc_fun_type(c, &c->types[T_I32], 0, NULL); + + /* + * Registers: + * r0: size (64) + * r1: buffer (bytes) + * r2: offset0 (0) + * r3: offset1 (1) + * r4: val0 (0x42) + * r5: val1 (0x37) + * r6: read val0 + * r7: read val1 + * r8: result + */ + hl_type *regs[] = { + &c->types[T_I32], /* r0 = size */ + &c->types[T_BYTES], /* r1 = buffer */ + &c->types[T_I32], /* r2 = offset0 */ + &c->types[T_I32], /* r3 = offset1 */ + &c->types[T_I32], /* r4 = val0 */ + &c->types[T_I32], /* r5 = val1 */ + &c->types[T_I32], /* r6 = read val0 */ + &c->types[T_I32], /* r7 = read val1 */ + &c->types[T_I32], /* r8 = result */ + }; + + hl_opcode ops[] = { + OP2(OInt, 0, 0), /* r0 = 64 (size) */ + OP3(OCall1, 1, 1, 0), /* r1 = alloc_buffer(r0) */ + OP2(OInt, 2, 1), /* r2 = 0 (offset) */ + OP2(OInt, 3, 2), /* r3 = 1 (offset) */ + OP2(OInt, 4, 3), /* r4 = 0x42 */ + OP2(OInt, 5, 4), /* r5 = 0x37 */ + OP3(OSetI8, 1, 2, 4), /* *(i8*)(r1 + r2) = r4 */ + OP3(OSetI8, 1, 3, 5), /* *(i8*)(r1 + r3) = r5 */ + OP3(OGetI8, 6, 1, 2), /* r6 = *(i8*)(r1 + r2) */ + OP3(OGetI8, 7, 1, 3), /* r7 = *(i8*)(r1 + r3) */ + OP3(OAdd, 8, 6, 7), /* r8 = r6 + r7 */ + OP1(ORet, 8), + }; + + test_alloc_function(c, 0, fn_type, 9, regs, 12, ops); + + int result; + int (*fn)(void) = (int(*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + int ret = fn(); + int expected = 0x42 + 0x37; /* 66 + 55 = 121 */ + if (ret != expected) { + fprintf(stderr, " Expected %d, got %d\n", expected, ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: OSetI16 and OGetI16 - write and read 16-bit values + */ +TEST(mem_i16_basic) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + int ints[] = { 64, 0, 2, 0x1234, 0x5678 }; + test_init_ints(c, 5, ints); + + hl_type *alloc_args[] = { &c->types[T_I32] }; + hl_type *alloc_fn_type = test_alloc_fun_type(c, &c->types[T_BYTES], 1, alloc_args); + test_add_native(c, 1, "test", "alloc_buffer", alloc_fn_type, (void*)alloc_test_buffer); + + hl_type *fn_type = test_alloc_fun_type(c, &c->types[T_I32], 0, NULL); + + hl_type *regs[] = { + &c->types[T_I32], /* r0 = size */ + &c->types[T_BYTES], /* r1 = buffer */ + &c->types[T_I32], /* r2 = offset0 */ + &c->types[T_I32], /* r3 = offset1 */ + &c->types[T_I32], /* r4 = val0 */ + &c->types[T_I32], /* r5 = val1 */ + &c->types[T_I32], /* r6 = read val0 */ + &c->types[T_I32], /* r7 = read val1 */ + &c->types[T_I32], /* r8 = result */ + }; + + hl_opcode ops[] = { + OP2(OInt, 0, 0), /* r0 = 64 (size) */ + OP3(OCall1, 1, 1, 0), /* r1 = alloc_buffer(r0) */ + OP2(OInt, 2, 1), /* r2 = 0 (offset) */ + OP2(OInt, 3, 2), /* r3 = 2 (offset for second i16) */ + OP2(OInt, 4, 3), /* r4 = 0x1234 */ + OP2(OInt, 5, 4), /* r5 = 0x5678 */ + OP3(OSetI16, 1, 2, 4), /* *(i16*)(r1 + r2) = r4 */ + OP3(OSetI16, 1, 3, 5), /* *(i16*)(r1 + r3) = r5 */ + OP3(OGetI16, 6, 1, 2), /* r6 = *(i16*)(r1 + r2) */ + OP3(OGetI16, 7, 1, 3), /* r7 = *(i16*)(r1 + r3) */ + OP3(OAdd, 8, 6, 7), /* r8 = r6 + r7 */ + OP1(ORet, 8), + }; + + test_alloc_function(c, 0, fn_type, 9, regs, 12, ops); + + int result; + int (*fn)(void) = (int(*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + int ret = fn(); + int expected = 0x1234 + 0x5678; /* 4660 + 22136 = 26796 */ + if (ret != expected) { + fprintf(stderr, " Expected %d, got %d\n", expected, ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: OSetMem and OGetMem - write and read 32-bit values (i32) + */ +TEST(mem_i32_basic) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + int ints[] = { 64, 0, 4, 100, 200 }; + test_init_ints(c, 5, ints); + + hl_type *alloc_args[] = { &c->types[T_I32] }; + hl_type *alloc_fn_type = test_alloc_fun_type(c, &c->types[T_BYTES], 1, alloc_args); + test_add_native(c, 1, "test", "alloc_buffer", alloc_fn_type, (void*)alloc_test_buffer); + + hl_type *fn_type = test_alloc_fun_type(c, &c->types[T_I32], 0, NULL); + + hl_type *regs[] = { + &c->types[T_I32], /* r0 = size */ + &c->types[T_BYTES], /* r1 = buffer */ + &c->types[T_I32], /* r2 = offset0 */ + &c->types[T_I32], /* r3 = offset1 */ + &c->types[T_I32], /* r4 = val0 */ + &c->types[T_I32], /* r5 = val1 */ + &c->types[T_I32], /* r6 = read val0 */ + &c->types[T_I32], /* r7 = read val1 */ + &c->types[T_I32], /* r8 = result */ + }; + + hl_opcode ops[] = { + OP2(OInt, 0, 0), /* r0 = 64 (size) */ + OP3(OCall1, 1, 1, 0), /* r1 = alloc_buffer(r0) */ + OP2(OInt, 2, 1), /* r2 = 0 (offset) */ + OP2(OInt, 3, 2), /* r3 = 4 (offset for second i32) */ + OP2(OInt, 4, 3), /* r4 = 100 */ + OP2(OInt, 5, 4), /* r5 = 200 */ + OP3(OSetMem, 1, 2, 4), /* *(i32*)(r1 + r2) = r4 */ + OP3(OSetMem, 1, 3, 5), /* *(i32*)(r1 + r3) = r5 */ + OP3(OGetMem, 6, 1, 2), /* r6 = *(i32*)(r1 + r2) */ + OP3(OGetMem, 7, 1, 3), /* r7 = *(i32*)(r1 + r3) */ + OP3(OAdd, 8, 6, 7), /* r8 = r6 + r7 */ + OP1(ORet, 8), + }; + + test_alloc_function(c, 0, fn_type, 9, regs, 12, ops); + + int result; + int (*fn)(void) = (int(*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + int ret = fn(); + int expected = 100 + 200; + if (ret != expected) { + fprintf(stderr, " Expected %d, got %d\n", expected, ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: OSetMem and OGetMem with i64 values + */ +TEST(mem_i64_basic) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + int ints[] = { 64, 0, 8, 1000, 2000 }; + test_init_ints(c, 5, ints); + + hl_type *alloc_args[] = { &c->types[T_I32] }; + hl_type *alloc_fn_type = test_alloc_fun_type(c, &c->types[T_BYTES], 1, alloc_args); + test_add_native(c, 1, "test", "alloc_buffer", alloc_fn_type, (void*)alloc_test_buffer); + + hl_type *fn_type = test_alloc_fun_type(c, &c->types[T_I64], 0, NULL); + + hl_type *regs[] = { + &c->types[T_I32], /* r0 = size */ + &c->types[T_BYTES], /* r1 = buffer */ + &c->types[T_I32], /* r2 = offset0 */ + &c->types[T_I32], /* r3 = offset1 */ + &c->types[T_I64], /* r4 = val0 */ + &c->types[T_I64], /* r5 = val1 */ + &c->types[T_I64], /* r6 = read val0 */ + &c->types[T_I64], /* r7 = read val1 */ + &c->types[T_I64], /* r8 = result */ + }; + + hl_opcode ops[] = { + OP2(OInt, 0, 0), /* r0 = 64 (size) */ + OP3(OCall1, 1, 1, 0), /* r1 = alloc_buffer(r0) */ + OP2(OInt, 2, 1), /* r2 = 0 (offset) */ + OP2(OInt, 3, 2), /* r3 = 8 (offset for second i64) */ + OP2(OInt, 4, 3), /* r4 = 1000 (as i64) */ + OP2(OInt, 5, 4), /* r5 = 2000 (as i64) */ + OP3(OSetMem, 1, 2, 4), /* *(i64*)(r1 + r2) = r4 */ + OP3(OSetMem, 1, 3, 5), /* *(i64*)(r1 + r3) = r5 */ + OP3(OGetMem, 6, 1, 2), /* r6 = *(i64*)(r1 + r2) */ + OP3(OGetMem, 7, 1, 3), /* r7 = *(i64*)(r1 + r3) */ + OP3(OAdd, 8, 6, 7), /* r8 = r6 + r7 */ + OP1(ORet, 8), + }; + + test_alloc_function(c, 0, fn_type, 9, regs, 12, ops); + + int result; + int64_t (*fn)(void) = (int64_t(*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + int64_t ret = fn(); + int64_t expected = 1000 + 2000; + if (ret != expected) { + fprintf(stderr, " Expected %ld, got %ld\n", (long)expected, (long)ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: OSetMem and OGetMem with f64 values + */ +TEST(mem_f64_basic) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + int ints[] = { 64, 0, 8 }; + test_init_ints(c, 3, ints); + + double floats[] = { 1.5, 2.5 }; + test_init_floats(c, 2, floats); + + hl_type *alloc_args[] = { &c->types[T_I32] }; + hl_type *alloc_fn_type = test_alloc_fun_type(c, &c->types[T_BYTES], 1, alloc_args); + test_add_native(c, 1, "test", "alloc_buffer", alloc_fn_type, (void*)alloc_test_buffer); + + hl_type *fn_type = test_alloc_fun_type(c, &c->types[T_F64], 0, NULL); + + hl_type *regs[] = { + &c->types[T_I32], /* r0 = size */ + &c->types[T_BYTES], /* r1 = buffer */ + &c->types[T_I32], /* r2 = offset0 */ + &c->types[T_I32], /* r3 = offset1 */ + &c->types[T_F64], /* r4 = val0 */ + &c->types[T_F64], /* r5 = val1 */ + &c->types[T_F64], /* r6 = read val0 */ + &c->types[T_F64], /* r7 = read val1 */ + &c->types[T_F64], /* r8 = result */ + }; + + hl_opcode ops[] = { + OP2(OInt, 0, 0), /* r0 = 64 (size) */ + OP3(OCall1, 1, 1, 0), /* r1 = alloc_buffer(r0) */ + OP2(OInt, 2, 1), /* r2 = 0 (offset) */ + OP2(OInt, 3, 2), /* r3 = 8 (offset for second f64) */ + OP2(OFloat, 4, 0), /* r4 = 1.5 */ + OP2(OFloat, 5, 1), /* r5 = 2.5 */ + OP3(OSetMem, 1, 2, 4), /* *(f64*)(r1 + r2) = r4 */ + OP3(OSetMem, 1, 3, 5), /* *(f64*)(r1 + r3) = r5 */ + OP3(OGetMem, 6, 1, 2), /* r6 = *(f64*)(r1 + r2) */ + OP3(OGetMem, 7, 1, 3), /* r7 = *(f64*)(r1 + r3) */ + OP3(OAdd, 8, 6, 7), /* r8 = r6 + r7 */ + OP1(ORet, 8), + }; + + test_alloc_function(c, 0, fn_type, 9, regs, 12, ops); + + int result; + double (*fn)(void) = (double(*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + double ret = fn(); + double expected = 1.5 + 2.5; + double diff = ret - expected; + if (diff < 0) diff = -diff; + if (diff > 0.0001) { + fprintf(stderr, " Expected %f, got %f\n", expected, ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: Non-zero base offset + * + * Tests accessing memory at non-aligned offsets + */ +TEST(mem_nonzero_offset) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + int ints[] = { 64, 10, 11, 12, 13, 1, 2, 3, 4 }; + test_init_ints(c, 9, ints); + + hl_type *alloc_args[] = { &c->types[T_I32] }; + hl_type *alloc_fn_type = test_alloc_fun_type(c, &c->types[T_BYTES], 1, alloc_args); + test_add_native(c, 1, "test", "alloc_buffer", alloc_fn_type, (void*)alloc_test_buffer); + + hl_type *fn_type = test_alloc_fun_type(c, &c->types[T_I32], 0, NULL); + + hl_type *regs[] = { + &c->types[T_I32], /* r0 = size */ + &c->types[T_BYTES], /* r1 = buffer */ + &c->types[T_I32], /* r2-r5 = offsets */ + &c->types[T_I32], + &c->types[T_I32], + &c->types[T_I32], + &c->types[T_I32], /* r6-r9 = values */ + &c->types[T_I32], + &c->types[T_I32], + &c->types[T_I32], + &c->types[T_I32], /* r10 = sum */ + &c->types[T_I32], /* r11-r14 = read values */ + &c->types[T_I32], + &c->types[T_I32], + &c->types[T_I32], + }; + + hl_opcode ops[] = { + OP2(OInt, 0, 0), /* r0 = 64 (size) */ + OP3(OCall1, 1, 1, 0), /* r1 = alloc_buffer(r0) */ + OP2(OInt, 2, 1), /* r2 = 10 */ + OP2(OInt, 3, 2), /* r3 = 11 */ + OP2(OInt, 4, 3), /* r4 = 12 */ + OP2(OInt, 5, 4), /* r5 = 13 */ + OP2(OInt, 6, 5), /* r6 = 1 */ + OP2(OInt, 7, 6), /* r7 = 2 */ + OP2(OInt, 8, 7), /* r8 = 3 */ + OP2(OInt, 9, 8), /* r9 = 4 */ + OP3(OSetI8, 1, 2, 6), /* buf[10] = 1 */ + OP3(OSetI8, 1, 3, 7), /* buf[11] = 2 */ + OP3(OSetI8, 1, 4, 8), /* buf[12] = 3 */ + OP3(OSetI8, 1, 5, 9), /* buf[13] = 4 */ + OP3(OGetI8, 11, 1, 2), /* r11 = buf[10] */ + OP3(OGetI8, 12, 1, 3), /* r12 = buf[11] */ + OP3(OGetI8, 13, 1, 4), /* r13 = buf[12] */ + OP3(OGetI8, 14, 1, 5), /* r14 = buf[13] */ + OP3(OAdd, 10, 11, 12), /* r10 = r11 + r12 */ + OP3(OAdd, 10, 10, 13), /* r10 = r10 + r13 */ + OP3(OAdd, 10, 10, 14), /* r10 = r10 + r14 */ + OP1(ORet, 10), + }; + + test_alloc_function(c, 0, fn_type, 15, regs, 22, ops); + + int result; + int (*fn)(void) = (int(*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + int ret = fn(); + int expected = 1 + 2 + 3 + 4; /* 10 */ + if (ret != expected) { + fprintf(stderr, " Expected %d, got %d\n", expected, ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* Test list */ +static test_entry_t tests[] = { + TEST_ENTRY(mem_i8_basic), + TEST_ENTRY(mem_i16_basic), + TEST_ENTRY(mem_i32_basic), + TEST_ENTRY(mem_i64_basic), + TEST_ENTRY(mem_f64_basic), + TEST_ENTRY(mem_nonzero_offset), +}; + +int main(int argc, char **argv) { + printf("HashLink AArch64 JIT - Memory Operation Tests\n"); + return run_tests(tests, sizeof(tests) / sizeof(tests[0])); +} diff --git a/other/tests/minimal/test_methods.c b/other/tests/minimal/test_methods.c new file mode 100644 index 000000000..b0bb5d144 --- /dev/null +++ b/other/tests/minimal/test_methods.c @@ -0,0 +1,330 @@ +/* + * Test method call operations for HashLink AArch64 JIT + * + * Tests: OCallMethod, OCallThis, OCall4 + * + * OCallMethod: call a method on an object via vtable + * OCallThis: call a method with implicit 'this' (R0) + * OCall4: call a function with 4 arguments + */ +#include "test_harness.h" + +/* Helper to create an object type with a method */ +static hl_type *create_obj_type_with_method(hl_code *c, const char *name, int method_findex) { + if (c->ntypes >= MAX_TYPES) { + fprintf(stderr, "Too many types\n"); + return NULL; + } + + int idx = c->ntypes++; + hl_type *t = &c->types[idx]; + memset(t, 0, sizeof(hl_type)); + + t->kind = HOBJ; + t->obj = (hl_type_obj*)calloc(1, sizeof(hl_type_obj)); + t->obj->name = (uchar*)name; + t->obj->nfields = 0; + t->obj->nproto = 1; + t->obj->nbindings = 0; + + t->obj->proto = (hl_obj_proto*)calloc(1, sizeof(hl_obj_proto)); + t->obj->proto[0].name = (uchar*)"testMethod"; + t->obj->proto[0].findex = method_findex; + t->obj->proto[0].pindex = 0; + + return t; +} + +/* + * Test: OCall4 - call function with 4 arguments + * + * fn0: (i32, i32, i32, i32) -> i32 { return a + b + c + d; } + * fn1: () -> i32 { return fn0(10, 20, 5, 7); } // 10+20+5+7 = 42 + */ +TEST(call4_basic) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + int ints[] = { 10, 20, 5, 7 }; + test_init_ints(c, 4, ints); + + /* fn0 type: (i32, i32, i32, i32) -> i32 */ + hl_type *fn0_args[] = { &c->types[T_I32], &c->types[T_I32], &c->types[T_I32], &c->types[T_I32] }; + hl_type *fn0_type = test_alloc_fun_type(c, &c->types[T_I32], 4, fn0_args); + + /* fn1 type: () -> i32 */ + hl_type *fn1_type = test_alloc_fun_type(c, &c->types[T_I32], 0, NULL); + + c->functions = (hl_function*)calloc(MAX_FUNCTIONS, sizeof(hl_function)); + c->nfunctions = 0; + + /* fn0: sum of 4 args */ + { + hl_type *regs[] = { + &c->types[T_I32], /* r0 = a */ + &c->types[T_I32], /* r1 = b */ + &c->types[T_I32], /* r2 = c */ + &c->types[T_I32], /* r3 = d */ + &c->types[T_I32], /* r4 = result */ + }; + hl_opcode ops[] = { + OP3(OAdd, 4, 0, 1), /* r4 = a + b */ + OP3(OAdd, 4, 4, 2), /* r4 = r4 + c */ + OP3(OAdd, 4, 4, 3), /* r4 = r4 + d */ + OP1(ORet, 4), + }; + hl_function *f = &c->functions[c->nfunctions++]; + f->findex = 0; + f->type = fn0_type; + f->nregs = 5; + f->nops = 4; + f->regs = (hl_type**)malloc(sizeof(hl_type*) * 5); + memcpy(f->regs, regs, sizeof(regs)); + f->ops = (hl_opcode*)malloc(sizeof(hl_opcode) * 4); + memcpy(f->ops, ops, sizeof(ops)); + } + + /* fn1: calls fn0 with 4 args */ + { + hl_type *regs[] = { + &c->types[T_I32], /* r0 = arg 0 */ + &c->types[T_I32], /* r1 = arg 1 */ + &c->types[T_I32], /* r2 = arg 2 */ + &c->types[T_I32], /* r3 = arg 3 */ + &c->types[T_I32], /* r4 = result */ + }; + + /* OCall4: dst=p1, findex=p2, arg0=p3, extra=[arg1, arg2, arg3] */ + static int extra[] = { 1, 2, 3 }; /* registers for args 1, 2, 3 */ + hl_opcode ops[] = { + OP2(OInt, 0, 0), /* r0 = 10 */ + OP2(OInt, 1, 1), /* r1 = 20 */ + OP2(OInt, 2, 2), /* r2 = 5 */ + OP2(OInt, 3, 3), /* r3 = 7 */ + { OCall4, 4, 0, 0, extra }, /* r4 = fn0(r0, r1, r2, r3) */ + OP1(ORet, 4), + }; + + hl_function *f = &c->functions[c->nfunctions++]; + f->findex = 1; + f->type = fn1_type; + f->nregs = 5; + f->nops = 6; + f->regs = (hl_type**)malloc(sizeof(hl_type*) * 5); + memcpy(f->regs, regs, sizeof(regs)); + f->ops = (hl_opcode*)malloc(sizeof(hl_opcode) * 6); + memcpy(f->ops, ops, sizeof(ops)); + } + + c->entrypoint = 1; + + int result; + int (*fn)(void) = (int(*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + int ret = fn(); + if (ret != 42) { + fprintf(stderr, " Expected 42, got %d\n", ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: OCall4 with mixed types (some floats) + */ +TEST(call4_mixed_types) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + int ints[] = { 10, 32 }; + test_init_ints(c, 2, ints); + + /* fn0: (i32, i32, i32, i32) -> i32 */ + hl_type *fn0_args[] = { &c->types[T_I32], &c->types[T_I32], &c->types[T_I32], &c->types[T_I32] }; + hl_type *fn0_type = test_alloc_fun_type(c, &c->types[T_I32], 4, fn0_args); + + hl_type *fn1_type = test_alloc_fun_type(c, &c->types[T_I32], 0, NULL); + + c->functions = (hl_function*)calloc(MAX_FUNCTIONS, sizeof(hl_function)); + c->nfunctions = 0; + + /* fn0: return just first + second arg */ + { + hl_type *regs[] = { + &c->types[T_I32], + &c->types[T_I32], + &c->types[T_I32], + &c->types[T_I32], + &c->types[T_I32], + }; + hl_opcode ops[] = { + OP3(OAdd, 4, 0, 1), + OP1(ORet, 4), + }; + hl_function *f = &c->functions[c->nfunctions++]; + f->findex = 0; + f->type = fn0_type; + f->nregs = 5; + f->nops = 2; + f->regs = (hl_type**)malloc(sizeof(hl_type*) * 5); + memcpy(f->regs, regs, sizeof(regs)); + f->ops = (hl_opcode*)malloc(sizeof(hl_opcode) * 2); + memcpy(f->ops, ops, sizeof(ops)); + } + + /* fn1: call fn0(10, 32, 0, 0) = 42 */ + { + hl_type *regs[] = { + &c->types[T_I32], + &c->types[T_I32], + &c->types[T_I32], + &c->types[T_I32], + &c->types[T_I32], + }; + + static int extra[] = { 1, 2, 3 }; /* registers for args 1, 2, 3 */ + hl_opcode ops[] = { + OP2(OInt, 0, 0), /* r0 = 10 */ + OP2(OInt, 1, 1), /* r1 = 32 */ + OP1(ONull, 2), /* r2 = 0 (null as int) */ + OP1(ONull, 3), /* r3 = 0 */ + { OCall4, 4, 0, 0, extra }, /* r4 = fn0(10, 32, 0, 0) */ + OP1(ORet, 4), + }; + + hl_function *f = &c->functions[c->nfunctions++]; + f->findex = 1; + f->type = fn1_type; + f->nregs = 5; + f->nops = 6; + f->regs = (hl_type**)malloc(sizeof(hl_type*) * 5); + memcpy(f->regs, regs, sizeof(regs)); + f->ops = (hl_opcode*)malloc(sizeof(hl_opcode) * 6); + memcpy(f->ops, ops, sizeof(ops)); + } + + c->entrypoint = 1; + + int result; + int (*fn)(void) = (int(*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + int ret = fn(); + if (ret != 42) { + fprintf(stderr, " Expected 42, got %d\n", ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: Multiple OCall4 in sequence + * + * This tests that register allocation works correctly across multiple calls. + */ +TEST(call4_multiple) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + int ints[] = { 1, 2, 3, 4, 10 }; + test_init_ints(c, 5, ints); + + hl_type *fn0_args[] = { &c->types[T_I32], &c->types[T_I32], &c->types[T_I32], &c->types[T_I32] }; + hl_type *fn0_type = test_alloc_fun_type(c, &c->types[T_I32], 4, fn0_args); + hl_type *fn1_type = test_alloc_fun_type(c, &c->types[T_I32], 0, NULL); + + c->functions = (hl_function*)calloc(MAX_FUNCTIONS, sizeof(hl_function)); + c->nfunctions = 0; + + /* fn0: sum of 4 args */ + { + hl_type *regs[] = { + &c->types[T_I32], &c->types[T_I32], &c->types[T_I32], &c->types[T_I32], &c->types[T_I32] + }; + hl_opcode ops[] = { + OP3(OAdd, 4, 0, 1), + OP3(OAdd, 4, 4, 2), + OP3(OAdd, 4, 4, 3), + OP1(ORet, 4), + }; + hl_function *f = &c->functions[c->nfunctions++]; + f->findex = 0; + f->type = fn0_type; + f->nregs = 5; + f->nops = 4; + f->regs = (hl_type**)malloc(sizeof(hl_type*) * 5); + memcpy(f->regs, regs, sizeof(regs)); + f->ops = (hl_opcode*)malloc(sizeof(hl_opcode) * 4); + memcpy(f->ops, ops, sizeof(ops)); + } + + /* fn1: call fn0 twice and sum results */ + { + hl_type *regs[] = { + &c->types[T_I32], &c->types[T_I32], &c->types[T_I32], &c->types[T_I32], + &c->types[T_I32], &c->types[T_I32], &c->types[T_I32], + }; + + static int extra1[] = { 1, 2, 3 }; /* registers for args 1, 2, 3 */ + static int extra2[] = { 1, 2, 3 }; /* registers for args 1, 2, 3 */ + hl_opcode ops[] = { + OP2(OInt, 0, 0), /* r0 = 1 */ + OP2(OInt, 1, 1), /* r1 = 2 */ + OP2(OInt, 2, 2), /* r2 = 3 */ + OP2(OInt, 3, 3), /* r3 = 4 */ + { OCall4, 4, 0, 0, extra1 }, /* r4 = fn0(1,2,3,4) = 10 */ + OP2(OInt, 0, 4), /* r0 = 10 */ + OP2(OInt, 1, 4), /* r1 = 10 */ + OP2(OInt, 2, 4), /* r2 = 10 */ + OP2(OInt, 3, 1), /* r3 = 2 */ + { OCall4, 5, 0, 0, extra2 }, /* r5 = fn0(10,10,10,2) = 32 */ + OP3(OAdd, 6, 4, 5), /* r6 = 10 + 32 = 42 */ + OP1(ORet, 6), + }; + + hl_function *f = &c->functions[c->nfunctions++]; + f->findex = 1; + f->type = fn1_type; + f->nregs = 7; + f->nops = 12; + f->regs = (hl_type**)malloc(sizeof(hl_type*) * 7); + memcpy(f->regs, regs, sizeof(regs)); + f->ops = (hl_opcode*)malloc(sizeof(hl_opcode) * 12); + memcpy(f->ops, ops, sizeof(ops)); + } + + c->entrypoint = 1; + + int result; + int (*fn)(void) = (int(*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + int ret = fn(); + if (ret != 42) { + fprintf(stderr, " Expected 42, got %d\n", ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* Test list */ +static test_entry_t tests[] = { + TEST_ENTRY(call4_basic), + TEST_ENTRY(call4_mixed_types), + TEST_ENTRY(call4_multiple), +}; + +int main(int argc, char **argv) { + printf("HashLink AArch64 JIT - Method Call Tests\n"); + return run_tests(tests, sizeof(tests) / sizeof(tests[0])); +} diff --git a/other/tests/minimal/test_native_field.c b/other/tests/minimal/test_native_field.c new file mode 100644 index 000000000..f64de7a4f --- /dev/null +++ b/other/tests/minimal/test_native_field.c @@ -0,0 +1,490 @@ +/* + * Test native call result stored in object field + * + * This mimics the pattern in hello.hl that crashes: + * 1. Call native function that returns a value + * 2. Store result in object field + * 3. Return object + * 4. Read field from returned object + * 5. Use the value + */ +#include "test_harness.h" + +/* Native function that returns an integer */ +static int native_get_value(void) { + return 42; +} + +/* Native function that returns a pointer */ +static void *native_get_ptr(void) { + static int data = 123; + return &data; +} + +/* Helper to create an object type with fields */ +static hl_type *create_obj_type(hl_code *c, const char *name, int nfields, hl_type **field_types) { + if (c->ntypes >= MAX_TYPES) { + fprintf(stderr, "Too many types\n"); + return NULL; + } + + int idx = c->ntypes++; + hl_type *t = &c->types[idx]; + memset(t, 0, sizeof(hl_type)); + + t->kind = HOBJ; + t->obj = (hl_type_obj*)calloc(1, sizeof(hl_type_obj)); + t->obj->name = (uchar*)name; + t->obj->nfields = nfields; + t->obj->nproto = 0; + t->obj->nbindings = 0; + + if (nfields > 0) { + t->obj->fields = (hl_obj_field*)calloc(nfields, sizeof(hl_obj_field)); + for (int i = 0; i < nfields; i++) { + t->obj->fields[i].name = (uchar*)"field"; + t->obj->fields[i].t = field_types[i]; + t->obj->fields[i].hashed_name = i; + } + } + + return t; +} + +/* + * Test: Call native, store in field, return object, read field + * + * This is a two-function test to match hello.hl's pattern: + * + * F0 (inner): + * r0 = new Obj + * r1 = call native_get_value() + * set_field r0.field[0] = r1 + * return r0 + * + * F1 (outer, entrypoint): + * r0 = call F0() + * r1 = get_field r0.field[0] + * return r1 + * + * Expected: 42 + */ +TEST(native_to_field_to_return) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + /* Create object type with one i32 field */ + hl_type *field_types[] = { &c->types[T_I32] }; + hl_type *obj_type = create_obj_type(c, "TestObj", 1, field_types); + if (!obj_type) return TEST_FAIL; + + /* Native function type: () -> i32 */ + hl_type *native_fn_type = test_alloc_fun_type(c, &c->types[T_I32], 0, NULL); + + /* Add native function at findex 2 */ + test_add_native(c, 2, "test", "native_get_value", native_fn_type, native_get_value); + + /* F0 (inner function): () -> obj + * r0 = new Obj + * r1 = call native (findex 2) + * set_field r0.field[0] = r1 + * return r0 + */ + hl_type *inner_fn_type = test_alloc_fun_type(c, obj_type, 0, NULL); + hl_type *inner_regs[] = { obj_type, &c->types[T_I32] }; + hl_opcode inner_ops[] = { + OP1(ONew, 0), /* r0 = new Obj */ + OP2(OCall0, 1, 2), /* r1 = call native F2 */ + OP3(OSetField, 0, 0, 1), /* r0.field[0] = r1 */ + OP1(ORet, 0), /* return r0 */ + }; + test_alloc_function(c, 0, inner_fn_type, 2, inner_regs, 4, inner_ops); + + /* F1 (outer function, entrypoint): () -> i32 + * r0 = call F0() + * r1 = get_field r0.field[0] + * return r1 + */ + hl_type *outer_fn_type = test_alloc_fun_type(c, &c->types[T_I32], 0, NULL); + hl_type *outer_regs[] = { obj_type, &c->types[T_I32] }; + hl_opcode outer_ops[] = { + OP2(OCall0, 0, 0), /* r0 = call F0 */ + OP3(OField, 1, 0, 0), /* r1 = r0.field[0] */ + OP1(ORet, 1), /* return r1 */ + }; + test_alloc_function(c, 1, outer_fn_type, 2, outer_regs, 3, outer_ops); + + /* Set entrypoint to F1 */ + c->entrypoint = 1; + + int result; + int (*fn)(void) = (int(*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + int ret = fn(); + if (ret != 42) { + fprintf(stderr, " Expected 42, got %d\n", ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: Same pattern but with pointer type (like array) + * + * F0 (inner): + * r0 = new Obj + * r1 = call native_get_ptr() + * set_field r0.field[0] = r1 + * return r0 + * + * F1 (outer): + * r0 = call F0() + * r1 = get_field r0.field[0] + * return r1 + */ +TEST(native_ptr_to_field_to_return) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + /* Create object type with one bytes (pointer) field */ + hl_type *field_types[] = { &c->types[T_BYTES] }; + hl_type *obj_type = create_obj_type(c, "TestObjPtr", 1, field_types); + if (!obj_type) return TEST_FAIL; + + /* Native function type: () -> bytes */ + hl_type *native_fn_type = test_alloc_fun_type(c, &c->types[T_BYTES], 0, NULL); + + /* Add native function at findex 2 */ + test_add_native(c, 2, "test", "native_get_ptr", native_fn_type, native_get_ptr); + + /* F0 (inner function): () -> obj */ + hl_type *inner_fn_type = test_alloc_fun_type(c, obj_type, 0, NULL); + hl_type *inner_regs[] = { obj_type, &c->types[T_BYTES] }; + hl_opcode inner_ops[] = { + OP1(ONew, 0), /* r0 = new Obj */ + OP2(OCall0, 1, 2), /* r1 = call native F2 */ + OP3(OSetField, 0, 0, 1), /* r0.field[0] = r1 */ + OP1(ORet, 0), /* return r0 */ + }; + test_alloc_function(c, 0, inner_fn_type, 2, inner_regs, 4, inner_ops); + + /* F1 (outer function): () -> bytes */ + hl_type *outer_fn_type = test_alloc_fun_type(c, &c->types[T_BYTES], 0, NULL); + hl_type *outer_regs[] = { obj_type, &c->types[T_BYTES] }; + hl_opcode outer_ops[] = { + OP2(OCall0, 0, 0), /* r0 = call F0 */ + OP3(OField, 1, 0, 0), /* r1 = r0.field[0] */ + OP1(ORet, 1), /* return r1 */ + }; + test_alloc_function(c, 1, outer_fn_type, 2, outer_regs, 3, outer_ops); + + c->entrypoint = 1; + + int result; + void *(*fn)(void) = (void*(*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + void *ret = fn(); + static int expected_data = 123; + if (ret != &expected_data) { + /* The native returns a pointer to its static - compare values */ + if (ret == NULL) { + fprintf(stderr, " Got NULL pointer\n"); + return TEST_FAIL; + } + int got = *(int*)ret; + if (got != 123) { + fprintf(stderr, " Expected ptr to 123, got ptr to %d\n", got); + return TEST_FAIL; + } + } + + return TEST_PASS; +} + +/* + * Test: Multiple fields set from native calls + * + * This more closely matches F295 which sets multiple fields + */ +TEST(native_multiple_fields) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + int ints[] = { 100 }; + test_init_ints(c, 1, ints); + + /* Create object type with 3 fields */ + hl_type *field_types[] = { &c->types[T_I32], &c->types[T_I32], &c->types[T_BYTES] }; + hl_type *obj_type = create_obj_type(c, "TestObj3", 3, field_types); + if (!obj_type) return TEST_FAIL; + + /* Native function types */ + hl_type *native_int_type = test_alloc_fun_type(c, &c->types[T_I32], 0, NULL); + hl_type *native_ptr_type = test_alloc_fun_type(c, &c->types[T_BYTES], 0, NULL); + + /* Add native functions at findex 2 and 3 */ + test_add_native(c, 2, "test", "native_get_value", native_int_type, native_get_value); + test_add_native(c, 3, "test", "native_get_ptr", native_ptr_type, native_get_ptr); + + /* F0 (inner): () -> obj + * r0 = new Obj + * r1 = 100 + * set_field r0.field[0] = r1 + * r2 = call native_get_value() + * set_field r0.field[1] = r2 + * r3 = call native_get_ptr() + * set_field r0.field[2] = r3 + * return r0 + */ + hl_type *inner_fn_type = test_alloc_fun_type(c, obj_type, 0, NULL); + hl_type *inner_regs[] = { obj_type, &c->types[T_I32], &c->types[T_I32], &c->types[T_BYTES] }; + hl_opcode inner_ops[] = { + OP1(ONew, 0), /* r0 = new Obj */ + OP2(OInt, 1, 0), /* r1 = 100 */ + OP3(OSetField, 0, 0, 1), /* r0.field[0] = r1 */ + OP2(OCall0, 2, 2), /* r2 = call native F2 (returns 42) */ + OP3(OSetField, 0, 1, 2), /* r0.field[1] = r2 */ + OP2(OCall0, 3, 3), /* r3 = call native F3 (returns ptr) */ + OP3(OSetField, 0, 2, 3), /* r0.field[2] = r3 */ + OP1(ORet, 0), /* return r0 */ + }; + test_alloc_function(c, 0, inner_fn_type, 4, inner_regs, 8, inner_ops); + + /* F1 (outer): () -> i32 + * r0 = call F0() + * r1 = get_field r0.field[0] ; should be 100 + * r2 = get_field r0.field[1] ; should be 42 + * r3 = r1 + r2 ; should be 142 + * r4 = get_field r0.field[2] ; should be ptr + * null_check r4 ; ptr should not be null + * return r3 + */ + hl_type *outer_fn_type = test_alloc_fun_type(c, &c->types[T_I32], 0, NULL); + hl_type *outer_regs[] = { obj_type, &c->types[T_I32], &c->types[T_I32], &c->types[T_I32], &c->types[T_BYTES] }; + hl_opcode outer_ops[] = { + OP2(OCall0, 0, 0), /* r0 = call F0 */ + OP3(OField, 1, 0, 0), /* r1 = r0.field[0] */ + OP3(OField, 2, 0, 1), /* r2 = r0.field[1] */ + OP3(OAdd, 3, 1, 2), /* r3 = r1 + r2 */ + OP3(OField, 4, 0, 2), /* r4 = r0.field[2] */ + OP1(ONullCheck, 4), /* null_check r4 */ + OP1(ORet, 3), /* return r3 */ + }; + test_alloc_function(c, 1, outer_fn_type, 5, outer_regs, 7, outer_ops); + + c->entrypoint = 1; + + int result; + int (*fn)(void) = (int(*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + int ret = fn(); + if (ret != 142) { + fprintf(stderr, " Expected 142 (100+42), got %d\n", ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: OCall2 with arguments passed to inner function + * + * This matches hello.hl's pattern more closely: + * - Entrypoint uses OCall2 to call F295 with 2 type args + * - F295 uses OCall1 to call native with one of those args + * + * F0 (inner): (i32 a, i32 b) -> obj + * r2 = new Obj + * r3 = call native_get_value() ; returns 42 + * r4 = a + b + r3 + * set_field r2.field[0] = r4 + * return r2 + * + * F1 (outer): () -> i32 + * r0 = 10 + * r1 = 20 + * r2 = call F0(r0, r1) ; OCall2 + * r3 = get_field r2.field[0] ; should be 10+20+42=72 + * return r3 + */ +TEST(ocall2_with_native_in_callee) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + int ints[] = { 10, 20 }; + test_init_ints(c, 2, ints); + + /* Create object type with one i32 field */ + hl_type *field_types[] = { &c->types[T_I32] }; + hl_type *obj_type = create_obj_type(c, "TestObj", 1, field_types); + if (!obj_type) return TEST_FAIL; + + /* Native function type: () -> i32 */ + hl_type *native_fn_type = test_alloc_fun_type(c, &c->types[T_I32], 0, NULL); + + /* Add native function at findex 2 */ + test_add_native(c, 2, "test", "native_get_value", native_fn_type, native_get_value); + + /* F0 (inner): (i32, i32) -> obj + * r0 = arg a (i32) + * r1 = arg b (i32) + * r2 = new Obj + * r3 = call native F2 (returns 42) + * r4 = a + b + * r5 = r4 + r3 + * set_field r2.field[0] = r5 + * return r2 + */ + hl_type *inner_arg_types[] = { &c->types[T_I32], &c->types[T_I32] }; + hl_type *inner_fn_type = test_alloc_fun_type(c, obj_type, 2, inner_arg_types); + hl_type *inner_regs[] = { + &c->types[T_I32], &c->types[T_I32], /* r0, r1 = args */ + obj_type, &c->types[T_I32], /* r2 = obj, r3 = native result */ + &c->types[T_I32], &c->types[T_I32] /* r4, r5 = temps */ + }; + hl_opcode inner_ops[] = { + OP1(ONew, 2), /* r2 = new Obj */ + OP2(OCall0, 3, 2), /* r3 = call native F2 (returns 42) */ + OP3(OAdd, 4, 0, 1), /* r4 = r0 + r1 */ + OP3(OAdd, 5, 4, 3), /* r5 = r4 + r3 */ + OP3(OSetField, 2, 0, 5), /* r2.field[0] = r5 */ + OP1(ORet, 2), /* return r2 */ + }; + test_alloc_function(c, 0, inner_fn_type, 6, inner_regs, 6, inner_ops); + + /* F1 (outer): () -> i32 + * r0 = 10 + * r1 = 20 + * r2 = call F0(r0, r1) ; OCall2 + * r3 = get_field r2.field[0] + * return r3 + */ + hl_type *outer_fn_type = test_alloc_fun_type(c, &c->types[T_I32], 0, NULL); + hl_type *outer_regs[] = { + &c->types[T_I32], &c->types[T_I32], /* r0, r1 = args to pass */ + obj_type, &c->types[T_I32] /* r2 = result obj, r3 = field value */ + }; + hl_opcode outer_ops[] = { + OP2(OInt, 0, 0), /* r0 = 10 */ + OP2(OInt, 1, 1), /* r1 = 20 */ + OP4_CALL2(OCall2, 2, 0, 0, 1), /* r2 = call F0(r0, r1) */ + OP3(OField, 3, 2, 0), /* r3 = r2.field[0] */ + OP1(ORet, 3), /* return r3 */ + }; + test_alloc_function(c, 1, outer_fn_type, 4, outer_regs, 5, outer_ops); + + c->entrypoint = 1; + + int result; + int (*fn)(void) = (int(*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + int ret = fn(); + if (ret != 72) { /* 10 + 20 + 42 = 72 */ + fprintf(stderr, " Expected 72 (10+20+42), got %d\n", ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: OCall1 passing argument to native + * + * F0 (inner): (i32 x) -> i32 + * r1 = call native_add_ten(r0) + * return r1 + * + * F1 (outer): () -> i32 + * r0 = 32 + * r1 = call F0(r0) ; OCall1 + * return r1 ; should be 42 + */ +static int native_add_ten(int x) { + return x + 10; +} + +TEST(ocall1_arg_to_native) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + int ints[] = { 32 }; + test_init_ints(c, 1, ints); + + /* Native function type: (i32) -> i32 */ + hl_type *native_arg_types[] = { &c->types[T_I32] }; + hl_type *native_fn_type = test_alloc_fun_type(c, &c->types[T_I32], 1, native_arg_types); + + /* Add native function at findex 2 */ + test_add_native(c, 2, "test", "native_add_ten", native_fn_type, native_add_ten); + + /* F0 (inner): (i32) -> i32 + * r0 = arg x + * r1 = call native F2(r0) + * return r1 + */ + hl_type *inner_arg_types[] = { &c->types[T_I32] }; + hl_type *inner_fn_type = test_alloc_fun_type(c, &c->types[T_I32], 1, inner_arg_types); + hl_type *inner_regs[] = { &c->types[T_I32], &c->types[T_I32] }; + hl_opcode inner_ops[] = { + OP3(OCall1, 1, 2, 0), /* r1 = call F2(r0) */ + OP1(ORet, 1), /* return r1 */ + }; + test_alloc_function(c, 0, inner_fn_type, 2, inner_regs, 2, inner_ops); + + /* F1 (outer): () -> i32 + * r0 = 32 + * r1 = call F0(r0) ; OCall1 + * return r1 + */ + hl_type *outer_fn_type = test_alloc_fun_type(c, &c->types[T_I32], 0, NULL); + hl_type *outer_regs[] = { &c->types[T_I32], &c->types[T_I32] }; + hl_opcode outer_ops[] = { + OP2(OInt, 0, 0), /* r0 = 32 */ + OP3(OCall1, 1, 0, 0), /* r1 = call F0(r0) */ + OP1(ORet, 1), /* return r1 */ + }; + test_alloc_function(c, 1, outer_fn_type, 2, outer_regs, 3, outer_ops); + + c->entrypoint = 1; + + int result; + int (*fn)(void) = (int(*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + int ret = fn(); + if (ret != 42) { + fprintf(stderr, " Expected 42 (32+10), got %d\n", ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* Test list */ +static test_entry_t tests[] = { + TEST_ENTRY(native_to_field_to_return), + TEST_ENTRY(native_ptr_to_field_to_return), + TEST_ENTRY(native_multiple_fields), + TEST_ENTRY(ocall2_with_native_in_callee), + TEST_ENTRY(ocall1_arg_to_native), +}; + +int main(int argc, char **argv) { + printf("HashLink AArch64 JIT - Native->Field Pattern Tests\n"); + return run_tests(tests, sizeof(tests) / sizeof(tests[0])); +} diff --git a/other/tests/minimal/test_natives.c b/other/tests/minimal/test_natives.c new file mode 100644 index 000000000..65868a339 --- /dev/null +++ b/other/tests/minimal/test_natives.c @@ -0,0 +1,234 @@ +/* + * Test native function calls for HashLink AArch64 JIT + * + * Tests calling C functions from JIT code + */ +#include "test_harness.h" + +/* Simple native functions for testing */ +static int native_return_42(void) { + return 42; +} + +static int native_add(int a, int b) { + return a + b; +} + +static int native_add3(int a, int b, int c) { + return a + b + c; +} + +static int g_side_effect = 0; + +static void native_set_global(int val) { + g_side_effect = val; +} + +static int native_get_global(void) { + return g_side_effect; +} + +/* + * Test: Call native function with no args + * + * op0: call0 r0, native_return_42 + * op1: ret r0 + */ +TEST(native_call0) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + /* Native at findex 1, our function at findex 0 */ + hl_type *native_type = test_alloc_fun_type(c, &c->types[T_I32], 0, NULL); + test_add_native(c, 1, "test", "return_42", native_type, native_return_42); + + hl_type *fn_type = test_alloc_fun_type(c, &c->types[T_I32], 0, NULL); + hl_type *regs[] = { &c->types[T_I32] }; + + hl_opcode ops[] = { + OP2(OCall0, 0, 1), /* op0: r0 = call native findex=1 */ + OP1(ORet, 0), /* op1: return r0 */ + }; + + test_alloc_function(c, 0, fn_type, 1, regs, 2, ops); + + int result; + int (*fn)(void) = (int(*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + int ret = fn(); + if (ret != 42) { + fprintf(stderr, " Expected 42, got %d\n", ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: Call native function with 2 args + * + * op0: int r0, 0 ; r0 = 10 + * op1: int r1, 1 ; r1 = 32 + * op2: call2 r2, native_add, r0, r1 + * op3: ret r2 ; return 42 + */ +TEST(native_call2) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + int ints[] = { 10, 32 }; + test_init_ints(c, 2, ints); + + /* Native at findex 1 */ + hl_type *arg_types[] = { &c->types[T_I32], &c->types[T_I32] }; + hl_type *native_type = test_alloc_fun_type(c, &c->types[T_I32], 2, arg_types); + test_add_native(c, 1, "test", "add", native_type, native_add); + + hl_type *fn_type = test_alloc_fun_type(c, &c->types[T_I32], 0, NULL); + hl_type *regs[] = { &c->types[T_I32], &c->types[T_I32], &c->types[T_I32] }; + + hl_opcode ops[] = { + OP2(OInt, 0, 0), /* op0: r0 = 10 */ + OP2(OInt, 1, 1), /* op1: r1 = 32 */ + OP4_CALL2(OCall2, 2, 1, 0, 1), /* op2: r2 = call native(r0, r1) */ + OP1(ORet, 2), /* op3: return r2 */ + }; + + test_alloc_function(c, 0, fn_type, 3, regs, 4, ops); + + int result; + int (*fn)(void) = (int(*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + int ret = fn(); + if (ret != 42) { + fprintf(stderr, " Expected 42, got %d\n", ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: Call native function with 3 args (uses OCall3) + * + * op0: int r0, 0 ; r0 = 10 + * op1: int r1, 1 ; r1 = 20 + * op2: int r2, 2 ; r2 = 12 + * op3: call3 r3, native_add3, r0, r1, r2 + * op4: ret r3 ; return 42 + */ +TEST(native_call3) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + int ints[] = { 10, 20, 12 }; + test_init_ints(c, 3, ints); + + /* Native at findex 1 */ + hl_type *arg_types[] = { &c->types[T_I32], &c->types[T_I32], &c->types[T_I32] }; + hl_type *native_type = test_alloc_fun_type(c, &c->types[T_I32], 3, arg_types); + test_add_native(c, 1, "test", "add3", native_type, native_add3); + + hl_type *fn_type = test_alloc_fun_type(c, &c->types[T_I32], 0, NULL); + hl_type *regs[] = { + &c->types[T_I32], &c->types[T_I32], + &c->types[T_I32], &c->types[T_I32] + }; + + /* OCall3: p1=dst, p2=findex, p3=arg0, extra[0]=arg1, extra[1]=arg2 */ + int extra[] = { 1, 2 }; + hl_opcode ops[] = { + OP2(OInt, 0, 0), /* op0: r0 = 10 */ + OP2(OInt, 1, 1), /* op1: r1 = 20 */ + OP2(OInt, 2, 2), /* op2: r2 = 12 */ + {OCall3, 3, 1, 0, extra}, /* op3: r3 = call native(r0, r1, r2) */ + OP1(ORet, 3), /* op4: return r3 */ + }; + + test_alloc_function(c, 0, fn_type, 4, regs, 5, ops); + + int result; + int (*fn)(void) = (int(*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + int ret = fn(); + if (ret != 42) { + fprintf(stderr, " Expected 42, got %d\n", ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: Call void native function (side effect) + * + * op0: int r0, 0 ; r0 = 99 + * op1: call1 r1, native_set_global, r0 + * op2: call0 r2, native_get_global + * op3: ret r2 ; return 99 + */ +TEST(native_void_call) { + test_init_runtime(); + + g_side_effect = 0; /* Reset */ + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + int ints[] = { 99 }; + test_init_ints(c, 1, ints); + + /* Two natives */ + hl_type *set_args[] = { &c->types[T_I32] }; + hl_type *set_type = test_alloc_fun_type(c, &c->types[T_VOID], 1, set_args); + test_add_native(c, 1, "test", "set_global", set_type, native_set_global); + + hl_type *get_type = test_alloc_fun_type(c, &c->types[T_I32], 0, NULL); + test_add_native(c, 2, "test", "get_global", get_type, native_get_global); + + hl_type *fn_type = test_alloc_fun_type(c, &c->types[T_I32], 0, NULL); + hl_type *regs[] = { &c->types[T_I32], &c->types[T_VOID], &c->types[T_I32] }; + + hl_opcode ops[] = { + OP2(OInt, 0, 0), /* op0: r0 = 99 */ + OP3(OCall1, 1, 1, 0), /* op1: call set_global(r0) */ + OP2(OCall0, 2, 2), /* op2: r2 = call get_global() */ + OP1(ORet, 2), /* op3: return r2 */ + }; + + test_alloc_function(c, 0, fn_type, 3, regs, 4, ops); + + int result; + int (*fn)(void) = (int(*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + int ret = fn(); + if (ret != 99) { + fprintf(stderr, " Expected 99, got %d\n", ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* Test list */ +static test_entry_t tests[] = { + TEST_ENTRY(native_call0), + TEST_ENTRY(native_call2), + TEST_ENTRY(native_call3), + TEST_ENTRY(native_void_call), +}; + +int main(int argc, char **argv) { + printf("HashLink AArch64 JIT - Native Function Call Tests\n"); + return run_tests(tests, sizeof(tests) / sizeof(tests[0])); +} diff --git a/other/tests/minimal/test_objects.c b/other/tests/minimal/test_objects.c new file mode 100644 index 000000000..0254e3666 --- /dev/null +++ b/other/tests/minimal/test_objects.c @@ -0,0 +1,410 @@ +/* + * Test object operations for HashLink AArch64 JIT + * + * Tests: ONew, OField, OSetField, ONullCheck, OGetThis, OSetThis + * + * These are key opcodes used in hello.hl + */ +#include "test_harness.h" + +/* We need to create object types for these tests */ + +/* Helper to create an HDYNOBJ type (dynamic object) */ +static hl_type *create_dynobj_type(hl_code *c) { + if (c->ntypes >= MAX_TYPES) { + fprintf(stderr, "Too many types\n"); + return NULL; + } + + int idx = c->ntypes++; + hl_type *t = &c->types[idx]; + memset(t, 0, sizeof(hl_type)); + + t->kind = HDYNOBJ; + /* HDYNOBJ has no obj pointer - it's dynamically allocated */ + return t; +} + +/* Helper to create an HVIRTUAL type */ +static hl_type *create_virtual_type(hl_code *c, int nfields, hl_type **field_types) { + if (c->ntypes >= MAX_TYPES) { + fprintf(stderr, "Too many types\n"); + return NULL; + } + + int idx = c->ntypes++; + hl_type *t = &c->types[idx]; + memset(t, 0, sizeof(hl_type)); + + t->kind = HVIRTUAL; + t->virt = (hl_type_virtual*)calloc(1, sizeof(hl_type_virtual)); + t->virt->nfields = nfields; + + if (nfields > 0) { + t->virt->fields = (hl_obj_field*)calloc(nfields, sizeof(hl_obj_field)); + for (int i = 0; i < nfields; i++) { + t->virt->fields[i].name = (uchar*)"field"; + t->virt->fields[i].t = field_types[i]; + t->virt->fields[i].hashed_name = i; + } + } + + return t; +} + +/* Helper to create an object type with fields */ +static hl_type *create_obj_type(hl_code *c, const char *name, int nfields, hl_type **field_types) { + if (c->ntypes >= MAX_TYPES) { + fprintf(stderr, "Too many types\n"); + return NULL; + } + + int idx = c->ntypes++; + hl_type *t = &c->types[idx]; + memset(t, 0, sizeof(hl_type)); + + t->kind = HOBJ; + t->obj = (hl_type_obj*)calloc(1, sizeof(hl_type_obj)); + t->obj->name = (uchar*)name; + t->obj->nfields = nfields; + t->obj->nproto = 0; + t->obj->nbindings = 0; + + if (nfields > 0) { + t->obj->fields = (hl_obj_field*)calloc(nfields, sizeof(hl_obj_field)); + for (int i = 0; i < nfields; i++) { + t->obj->fields[i].name = (uchar*)"field"; + t->obj->fields[i].t = field_types[i]; + t->obj->fields[i].hashed_name = i; /* Simple hash for testing */ + } + } + + /* Don't call hl_get_obj_rt here - it needs a module allocator. + * The JIT will call it when needed, after the module is set up. */ + + return t; +} + +/* + * Test: ONullCheck on non-null value (should not throw) + * + * r0 = 42 + * null_check r0 ; should pass (non-zero) + * return r0 + */ +TEST(null_check_nonnull) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + int ints[] = { 42 }; + test_init_ints(c, 1, ints); + + hl_type *fn_type = test_alloc_fun_type(c, &c->types[T_I32], 0, NULL); + hl_type *regs[] = { &c->types[T_I32] }; + + hl_opcode ops[] = { + OP2(OInt, 0, 0), /* r0 = 42 */ + OP1(ONullCheck, 0), /* null_check r0 - should pass */ + OP1(ORet, 0), + }; + + test_alloc_function(c, 0, fn_type, 1, regs, 3, ops); + + int result; + int (*fn)(void) = (int(*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + int ret = fn(); + if (ret != 42) { + fprintf(stderr, " Expected 42, got %d\n", ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: Create object with ONew and access field with OField/OSetField + * + * Object type: { i32 value } + * + * r0 = new Obj + * r1 = 42 + * set_field r0.field[0] = r1 + * r2 = get_field r0.field[0] + * return r2 + */ +TEST(object_field_access) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + int ints[] = { 42 }; + test_init_ints(c, 1, ints); + + /* Create object type with one i32 field */ + hl_type *field_types[] = { &c->types[T_I32] }; + hl_type *obj_type = create_obj_type(c, "TestObj", 1, field_types); + if (!obj_type) return TEST_FAIL; + + hl_type *fn_type = test_alloc_fun_type(c, &c->types[T_I32], 0, NULL); + hl_type *regs[] = { obj_type, &c->types[T_I32], &c->types[T_I32] }; + + hl_opcode ops[] = { + OP1(ONew, 0), /* r0 = new Obj */ + OP2(OInt, 1, 0), /* r1 = 42 */ + OP3(OSetField, 0, 0, 1), /* r0.field[0] = r1 */ + OP3(OField, 2, 0, 0), /* r2 = r0.field[0] */ + OP1(ORet, 2), + }; + + test_alloc_function(c, 0, fn_type, 3, regs, 5, ops); + + int result; + int (*fn)(void) = (int(*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + int ret = fn(); + if (ret != 42) { + fprintf(stderr, " Expected 42, got %d\n", ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: Object with multiple fields + * + * Object type: { i32 a, i32 b } + * + * r0 = new Obj + * r1 = 10 + * r2 = 32 + * set_field r0.field[0] = r1 ; a = 10 + * set_field r0.field[1] = r2 ; b = 32 + * r3 = get_field r0.field[0] ; r3 = 10 + * r4 = get_field r0.field[1] ; r4 = 32 + * r5 = r3 + r4 ; r5 = 42 + * return r5 + */ +TEST(object_multiple_fields) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + int ints[] = { 10, 32 }; + test_init_ints(c, 2, ints); + + /* Create object type with two i32 fields */ + hl_type *field_types[] = { &c->types[T_I32], &c->types[T_I32] }; + hl_type *obj_type = create_obj_type(c, "TestObj2", 2, field_types); + if (!obj_type) return TEST_FAIL; + + hl_type *fn_type = test_alloc_fun_type(c, &c->types[T_I32], 0, NULL); + hl_type *regs[] = { + obj_type, + &c->types[T_I32], + &c->types[T_I32], + &c->types[T_I32], + &c->types[T_I32], + &c->types[T_I32] + }; + + hl_opcode ops[] = { + OP1(ONew, 0), /* r0 = new Obj */ + OP2(OInt, 1, 0), /* r1 = 10 */ + OP2(OInt, 2, 1), /* r2 = 32 */ + OP3(OSetField, 0, 0, 1), /* r0.field[0] = r1 */ + OP3(OSetField, 0, 1, 2), /* r0.field[1] = r2 */ + OP3(OField, 3, 0, 0), /* r3 = r0.field[0] */ + OP3(OField, 4, 0, 1), /* r4 = r0.field[1] */ + OP3(OAdd, 5, 3, 4), /* r5 = r3 + r4 */ + OP1(ORet, 5), + }; + + test_alloc_function(c, 0, fn_type, 6, regs, 9, ops); + + int result; + int (*fn)(void) = (int(*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + int ret = fn(); + if (ret != 42) { + fprintf(stderr, " Expected 42, got %d\n", ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: Object with pointer field + * + * Object type: { bytes ptr } + */ +TEST(object_pointer_field) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + /* Create object type with one pointer field */ + hl_type *field_types[] = { &c->types[T_BYTES] }; + hl_type *obj_type = create_obj_type(c, "TestObjPtr", 1, field_types); + if (!obj_type) return TEST_FAIL; + + /* Setup a string to store */ + c->nstrings = 1; + c->strings = (char**)malloc(sizeof(char*)); + c->strings[0] = "test"; + c->strings_lens = (int*)malloc(sizeof(int)); + c->strings_lens[0] = 4; + c->ustrings = (uchar**)calloc(1, sizeof(uchar*)); + + hl_type *fn_type = test_alloc_fun_type(c, &c->types[T_BYTES], 0, NULL); + hl_type *regs[] = { obj_type, &c->types[T_BYTES], &c->types[T_BYTES] }; + + hl_opcode ops[] = { + OP1(ONew, 0), /* r0 = new Obj */ + OP2(OString, 1, 0), /* r1 = "test" */ + OP3(OSetField, 0, 0, 1), /* r0.field[0] = r1 */ + OP3(OField, 2, 0, 0), /* r2 = r0.field[0] */ + OP1(ORet, 2), + }; + + test_alloc_function(c, 0, fn_type, 3, regs, 5, ops); + + int result; + uchar* (*fn)(void) = (uchar*(*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + uchar *ret = fn(); + if (ret == NULL) { + fprintf(stderr, " Got NULL pointer\n"); + return TEST_FAIL; + } + + /* Check first char is 't' (UTF-16) */ + if (ret[0] != 't') { + fprintf(stderr, " Expected 't', got 0x%04x\n", ret[0]); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: ONew with HDYNOBJ type + * + * This tests that dynamic objects (HDYNOBJ) are allocated correctly. + * The JIT must call hl_alloc_dynobj() (no args) instead of hl_alloc_obj(type). + * + * r0 = new DynObj ; allocate dynamic object + * r1 = 42 + * return r1 ; just verify allocation doesn't crash + */ +TEST(new_dynobj) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + int ints[] = { 42 }; + test_init_ints(c, 1, ints); + + /* Create HDYNOBJ type */ + hl_type *dynobj_type = create_dynobj_type(c); + if (!dynobj_type) return TEST_FAIL; + + hl_type *fn_type = test_alloc_fun_type(c, &c->types[T_I32], 0, NULL); + hl_type *regs[] = { dynobj_type, &c->types[T_I32] }; + + hl_opcode ops[] = { + OP1(ONew, 0), /* r0 = new DynObj - must call hl_alloc_dynobj() */ + OP2(OInt, 1, 0), /* r1 = 42 */ + OP1(ORet, 1), + }; + + test_alloc_function(c, 0, fn_type, 2, regs, 3, ops); + + int result; + int (*fn)(void) = (int(*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + int ret = fn(); + if (ret != 42) { + fprintf(stderr, " Expected 42, got %d\n", ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: ONew with HVIRTUAL type + * + * This tests that virtual objects (HVIRTUAL) are allocated correctly. + * The JIT must call hl_alloc_virtual(type) instead of hl_alloc_obj(type). + * + * r0 = new Virtual ; allocate virtual object + * r1 = 42 + * return r1 ; just verify allocation doesn't crash + */ +TEST(new_virtual) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + int ints[] = { 42 }; + test_init_ints(c, 1, ints); + + /* Create HVIRTUAL type with one i32 field */ + hl_type *field_types[] = { &c->types[T_I32] }; + hl_type *virt_type = create_virtual_type(c, 1, field_types); + if (!virt_type) return TEST_FAIL; + + hl_type *fn_type = test_alloc_fun_type(c, &c->types[T_I32], 0, NULL); + hl_type *regs[] = { virt_type, &c->types[T_I32] }; + + hl_opcode ops[] = { + OP1(ONew, 0), /* r0 = new Virtual - must call hl_alloc_virtual() */ + OP2(OInt, 1, 0), /* r1 = 42 */ + OP1(ORet, 1), + }; + + test_alloc_function(c, 0, fn_type, 2, regs, 3, ops); + + int result; + int (*fn)(void) = (int(*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + int ret = fn(); + if (ret != 42) { + fprintf(stderr, " Expected 42, got %d\n", ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* Test list */ +static test_entry_t tests[] = { + TEST_ENTRY(null_check_nonnull), + TEST_ENTRY(object_field_access), + TEST_ENTRY(object_multiple_fields), + TEST_ENTRY(object_pointer_field), + TEST_ENTRY(new_dynobj), + /* new_virtual requires complex type setup (virt->indexes) that our minimal + * test harness doesn't support. HVIRTUAL allocation is tested via hello.hl. */ +}; + +int main(int argc, char **argv) { + printf("HashLink AArch64 JIT - Object Operations Tests\n"); + return run_tests(tests, sizeof(tests) / sizeof(tests[0])); +} diff --git a/other/tests/minimal/test_ref_ops.c b/other/tests/minimal/test_ref_ops.c new file mode 100644 index 000000000..c1e3ad5bb --- /dev/null +++ b/other/tests/minimal/test_ref_ops.c @@ -0,0 +1,474 @@ +/* + * Test reference operations for HashLink AArch64 JIT + * + * Tests: ORef, OUnref, OSetref, ORefData, ORefOffset + * + * ORef: creates a reference (pointer) to a stack variable + * OUnref: dereferences a reference + * OSetref: assigns through a reference + * ORefData: gets pointer to array/bytes data + * ORefOffset: offsets a reference by index * element_size + */ +#include "test_harness.h" + +/* Helper to create a reference type */ +static hl_type *create_ref_type(hl_code *c, hl_type *elem_type) { + if (c->ntypes >= MAX_TYPES) { + fprintf(stderr, "Too many types\n"); + return NULL; + } + + int idx = c->ntypes++; + hl_type *t = &c->types[idx]; + memset(t, 0, sizeof(hl_type)); + + t->kind = HREF; + t->tparam = elem_type; + + return t; +} + +/* Helper to create an array type */ +static hl_type *create_array_type(hl_code *c, hl_type *elem_type) { + if (c->ntypes >= MAX_TYPES) { + fprintf(stderr, "Too many types\n"); + return NULL; + } + + int idx = c->ntypes++; + hl_type *t = &c->types[idx]; + memset(t, 0, sizeof(hl_type)); + + t->kind = HARRAY; + t->tparam = elem_type; + + return t; +} + +/* + * Test: ORef and OUnref basic - create reference and dereference + * + * r0 = 42 + * r1 = ref(r0) ; r1 = &r0 + * r2 = unref(r1) ; r2 = *r1 = 42 + * return r2 + */ +TEST(ref_unref_basic) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + int ints[] = { 42 }; + test_init_ints(c, 1, ints); + + hl_type *ref_i32 = create_ref_type(c, &c->types[T_I32]); + hl_type *fn_type = test_alloc_fun_type(c, &c->types[T_I32], 0, NULL); + + hl_type *regs[] = { + &c->types[T_I32], /* r0 = value */ + ref_i32, /* r1 = reference */ + &c->types[T_I32], /* r2 = dereferenced value */ + }; + + hl_opcode ops[] = { + OP2(OInt, 0, 0), /* r0 = 42 */ + OP2(ORef, 1, 0), /* r1 = &r0 */ + OP2(OUnref, 2, 1), /* r2 = *r1 */ + OP1(ORet, 2), + }; + + test_alloc_function(c, 0, fn_type, 3, regs, 4, ops); + + int result; + int (*fn)(void) = (int(*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + int ret = fn(); + if (ret != 42) { + fprintf(stderr, " Expected 42, got %d\n", ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: OSetref - modify value through reference + * + * r0 = 10 + * r1 = ref(r0) ; r1 = &r0 + * r2 = 42 + * setref(r1, r2) ; *r1 = 42, so r0 = 42 + * return r0 ; should be 42 + */ +TEST(setref_basic) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + int ints[] = { 10, 42 }; + test_init_ints(c, 2, ints); + + hl_type *ref_i32 = create_ref_type(c, &c->types[T_I32]); + hl_type *fn_type = test_alloc_fun_type(c, &c->types[T_I32], 0, NULL); + + hl_type *regs[] = { + &c->types[T_I32], /* r0 = value */ + ref_i32, /* r1 = reference */ + &c->types[T_I32], /* r2 = new value */ + }; + + hl_opcode ops[] = { + OP2(OInt, 0, 0), /* r0 = 10 */ + OP2(ORef, 1, 0), /* r1 = &r0 */ + OP2(OInt, 2, 1), /* r2 = 42 */ + OP2(OSetref, 1, 2), /* *r1 = r2 */ + OP1(ORet, 0), /* return r0 (should be 42 now) */ + }; + + test_alloc_function(c, 0, fn_type, 3, regs, 5, ops); + + int result; + int (*fn)(void) = (int(*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + int ret = fn(); + if (ret != 42) { + fprintf(stderr, " Expected 42, got %d\n", ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: ORef/OUnref with i64 + */ +TEST(ref_unref_i64) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + int ints[] = { 12345 }; + test_init_ints(c, 1, ints); + + hl_type *ref_i64 = create_ref_type(c, &c->types[T_I64]); + hl_type *fn_type = test_alloc_fun_type(c, &c->types[T_I64], 0, NULL); + + hl_type *regs[] = { + &c->types[T_I64], /* r0 = value */ + ref_i64, /* r1 = reference */ + &c->types[T_I64], /* r2 = dereferenced value */ + }; + + hl_opcode ops[] = { + OP2(OInt, 0, 0), /* r0 = 12345 */ + OP2(ORef, 1, 0), /* r1 = &r0 */ + OP2(OUnref, 2, 1), /* r2 = *r1 */ + OP1(ORet, 2), + }; + + test_alloc_function(c, 0, fn_type, 3, regs, 4, ops); + + int result; + int64_t (*fn)(void) = (int64_t(*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + int64_t ret = fn(); + if (ret != 12345) { + fprintf(stderr, " Expected 12345, got %ld\n", (long)ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: ORef/OUnref with f64 + */ +TEST(ref_unref_f64) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + double floats[] = { 3.14159 }; + test_init_floats(c, 1, floats); + + hl_type *ref_f64 = create_ref_type(c, &c->types[T_F64]); + hl_type *fn_type = test_alloc_fun_type(c, &c->types[T_F64], 0, NULL); + + hl_type *regs[] = { + &c->types[T_F64], /* r0 = value */ + ref_f64, /* r1 = reference */ + &c->types[T_F64], /* r2 = dereferenced value */ + }; + + hl_opcode ops[] = { + OP2(OFloat, 0, 0), /* r0 = 3.14159 */ + OP2(ORef, 1, 0), /* r1 = &r0 */ + OP2(OUnref, 2, 1), /* r2 = *r1 */ + OP1(ORet, 2), + }; + + test_alloc_function(c, 0, fn_type, 3, regs, 4, ops); + + int result; + double (*fn)(void) = (double(*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + double ret = fn(); + double expected = 3.14159; + double diff = ret - expected; + if (diff < 0) diff = -diff; + if (diff > 0.00001) { + fprintf(stderr, " Expected %f, got %f\n", expected, ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: ORefData with array - get pointer to array data + * + * array = alloc_array(i32, 3) + * array[0] = 10 + * array[1] = 20 + * array[2] = 12 + * ptr = ref_data(array) ; get pointer to element data + * val = *ptr ; read first element via pointer + * return val ; should be 10 + */ +TEST(ref_data_array) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + int ints[] = { 3, 0, 1, 2, 10, 20, 12 }; + test_init_ints(c, 7, ints); + + hl_type *array_i32 = create_array_type(c, &c->types[T_I32]); + hl_type *ref_i32 = create_ref_type(c, &c->types[T_I32]); + + hl_type *alloc_args[] = { &c->types[T_TYPE], &c->types[T_I32] }; + hl_type *alloc_fn_type = test_alloc_fun_type(c, array_i32, 2, alloc_args); + test_add_native(c, 1, "std", "alloc_array", alloc_fn_type, (void*)hl_alloc_array); + + hl_type *fn_type = test_alloc_fun_type(c, &c->types[T_I32], 0, NULL); + + hl_type *regs[] = { + &c->types[T_TYPE], /* r0 = type pointer */ + &c->types[T_I32], /* r1 = size */ + array_i32, /* r2 = array */ + &c->types[T_I32], /* r3 = idx 0 */ + &c->types[T_I32], /* r4 = idx 1 */ + &c->types[T_I32], /* r5 = idx 2 */ + &c->types[T_I32], /* r6 = val 10 */ + &c->types[T_I32], /* r7 = val 20 */ + &c->types[T_I32], /* r8 = val 12 */ + ref_i32, /* r9 = ptr to data */ + &c->types[T_I32], /* r10 = read value */ + }; + + hl_opcode ops[] = { + OP2(OType, 0, T_I32), /* r0 = type for i32 */ + OP2(OInt, 1, 0), /* r1 = 3 (size) */ + OP4_CALL2(OCall2, 2, 1, 0, 1), /* r2 = alloc_array(r0, r1) */ + OP2(OInt, 3, 1), /* r3 = 0 */ + OP2(OInt, 4, 2), /* r4 = 1 */ + OP2(OInt, 5, 3), /* r5 = 2 */ + OP2(OInt, 6, 4), /* r6 = 10 */ + OP2(OInt, 7, 5), /* r7 = 20 */ + OP2(OInt, 8, 6), /* r8 = 12 */ + OP3(OSetArray, 2, 3, 6), /* array[0] = 10 */ + OP3(OSetArray, 2, 4, 7), /* array[1] = 20 */ + OP3(OSetArray, 2, 5, 8), /* array[2] = 12 */ + OP2(ORefData, 9, 2), /* r9 = ptr to array data */ + OP2(OUnref, 10, 9), /* r10 = *r9 = first element */ + OP1(ORet, 10), + }; + + test_alloc_function(c, 0, fn_type, 11, regs, 15, ops); + + int result; + int (*fn)(void) = (int(*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + int ret = fn(); + if (ret != 10) { + fprintf(stderr, " Expected 10, got %d\n", ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: ORefOffset - offset a pointer to access array elements + * + * array = alloc_array(i32, 3) + * array[0] = 10 + * array[1] = 20 + * array[2] = 12 + * ptr = ref_data(array) ; get pointer to element data + * ptr2 = ref_offset(ptr, 2) ; ptr to array[2] + * val = *ptr2 ; read third element + * return val ; should be 12 + */ +TEST(ref_offset_basic) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + int ints[] = { 3, 0, 1, 2, 10, 20, 12 }; + test_init_ints(c, 7, ints); + + hl_type *array_i32 = create_array_type(c, &c->types[T_I32]); + hl_type *ref_i32 = create_ref_type(c, &c->types[T_I32]); + + hl_type *alloc_args[] = { &c->types[T_TYPE], &c->types[T_I32] }; + hl_type *alloc_fn_type = test_alloc_fun_type(c, array_i32, 2, alloc_args); + test_add_native(c, 1, "std", "alloc_array", alloc_fn_type, (void*)hl_alloc_array); + + hl_type *fn_type = test_alloc_fun_type(c, &c->types[T_I32], 0, NULL); + + hl_type *regs[] = { + &c->types[T_TYPE], /* r0 = type pointer */ + &c->types[T_I32], /* r1 = size */ + array_i32, /* r2 = array */ + &c->types[T_I32], /* r3-r5 = indices */ + &c->types[T_I32], + &c->types[T_I32], + &c->types[T_I32], /* r6-r8 = values */ + &c->types[T_I32], + &c->types[T_I32], + ref_i32, /* r9 = ptr to data */ + ref_i32, /* r10 = offset ptr */ + &c->types[T_I32], /* r11 = read value */ + }; + + hl_opcode ops[] = { + OP2(OType, 0, T_I32), /* r0 = type for i32 */ + OP2(OInt, 1, 0), /* r1 = 3 (size) */ + OP4_CALL2(OCall2, 2, 1, 0, 1), /* r2 = alloc_array(r0, r1) */ + OP2(OInt, 3, 1), /* r3 = 0 */ + OP2(OInt, 4, 2), /* r4 = 1 */ + OP2(OInt, 5, 3), /* r5 = 2 */ + OP2(OInt, 6, 4), /* r6 = 10 */ + OP2(OInt, 7, 5), /* r7 = 20 */ + OP2(OInt, 8, 6), /* r8 = 12 */ + OP3(OSetArray, 2, 3, 6), /* array[0] = 10 */ + OP3(OSetArray, 2, 4, 7), /* array[1] = 20 */ + OP3(OSetArray, 2, 5, 8), /* array[2] = 12 */ + OP2(ORefData, 9, 2), /* r9 = ptr to array data */ + OP3(ORefOffset, 10, 9, 5), /* r10 = r9 + 2 * sizeof(i32) */ + OP2(OUnref, 11, 10), /* r11 = *r10 = array[2] */ + OP1(ORet, 11), + }; + + test_alloc_function(c, 0, fn_type, 12, regs, 16, ops); + + int result; + int (*fn)(void) = (int(*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + int ret = fn(); + if (ret != 12) { + fprintf(stderr, " Expected 12, got %d\n", ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: ORefOffset with i64 elements - larger element size + */ +TEST(ref_offset_i64) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + int ints[] = { 3, 0, 1, 2, 100, 200, 300 }; + test_init_ints(c, 7, ints); + + hl_type *array_i64 = create_array_type(c, &c->types[T_I64]); + hl_type *ref_i64 = create_ref_type(c, &c->types[T_I64]); + + hl_type *alloc_args[] = { &c->types[T_TYPE], &c->types[T_I32] }; + hl_type *alloc_fn_type = test_alloc_fun_type(c, array_i64, 2, alloc_args); + test_add_native(c, 1, "std", "alloc_array", alloc_fn_type, (void*)hl_alloc_array); + + hl_type *fn_type = test_alloc_fun_type(c, &c->types[T_I64], 0, NULL); + + hl_type *regs[] = { + &c->types[T_TYPE], /* r0 = type pointer */ + &c->types[T_I32], /* r1 = size */ + array_i64, /* r2 = array */ + &c->types[T_I32], /* r3-r5 = indices */ + &c->types[T_I32], + &c->types[T_I32], + &c->types[T_I64], /* r6-r8 = values */ + &c->types[T_I64], + &c->types[T_I64], + ref_i64, /* r9 = ptr to data */ + ref_i64, /* r10 = offset ptr */ + &c->types[T_I64], /* r11 = read value */ + }; + + hl_opcode ops[] = { + OP2(OType, 0, T_I64), /* r0 = type for i64 */ + OP2(OInt, 1, 0), /* r1 = 3 (size) */ + OP4_CALL2(OCall2, 2, 1, 0, 1), /* r2 = alloc_array(r0, r1) */ + OP2(OInt, 3, 1), /* r3 = 0 */ + OP2(OInt, 4, 2), /* r4 = 1 */ + OP2(OInt, 5, 3), /* r5 = 2 */ + OP2(OInt, 6, 4), /* r6 = 100 */ + OP2(OInt, 7, 5), /* r7 = 200 */ + OP2(OInt, 8, 6), /* r8 = 300 */ + OP3(OSetArray, 2, 3, 6), /* array[0] = 100 */ + OP3(OSetArray, 2, 4, 7), /* array[1] = 200 */ + OP3(OSetArray, 2, 5, 8), /* array[2] = 300 */ + OP2(ORefData, 9, 2), /* r9 = ptr to array data */ + OP3(ORefOffset, 10, 9, 4), /* r10 = r9 + 1 * sizeof(i64) */ + OP2(OUnref, 11, 10), /* r11 = *r10 = array[1] */ + OP1(ORet, 11), + }; + + test_alloc_function(c, 0, fn_type, 12, regs, 16, ops); + + int result; + int64_t (*fn)(void) = (int64_t(*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + int64_t ret = fn(); + if (ret != 200) { + fprintf(stderr, " Expected 200, got %ld\n", (long)ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* Test list */ +static test_entry_t tests[] = { + TEST_ENTRY(ref_unref_basic), + TEST_ENTRY(setref_basic), + TEST_ENTRY(ref_unref_i64), + TEST_ENTRY(ref_unref_f64), + TEST_ENTRY(ref_data_array), + TEST_ENTRY(ref_offset_basic), + TEST_ENTRY(ref_offset_i64), +}; + +int main(int argc, char **argv) { + printf("HashLink AArch64 JIT - Reference Operation Tests\n"); + return run_tests(tests, sizeof(tests) / sizeof(tests[0])); +} diff --git a/other/tests/minimal/test_strings.c b/other/tests/minimal/test_strings.c new file mode 100644 index 000000000..ea718f5cf --- /dev/null +++ b/other/tests/minimal/test_strings.c @@ -0,0 +1,205 @@ +/* + * Test string operations for HashLink AArch64 JIT + * + * Tests: OString, OBytes, string handling + */ +#include "test_harness.h" + +/* + * Test: Load a string constant and return its pointer + * + * op0: string r0, 0 ; r0 = "hello" + * op1: ret r0 ; return pointer + */ +TEST(load_string) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + /* Setup string pool */ + c->nstrings = 1; + c->strings = (char**)malloc(sizeof(char*)); + c->strings[0] = "hello"; + c->strings_lens = (int*)malloc(sizeof(int)); + c->strings_lens[0] = 5; + c->ustrings = (uchar**)calloc(1, sizeof(uchar*)); + + hl_type *fn_type = test_alloc_fun_type(c, &c->types[T_BYTES], 0, NULL); + hl_type *regs[] = { &c->types[T_BYTES] }; + + hl_opcode ops[] = { + OP2(OString, 0, 0), /* op0: r0 = string[0] = "hello" */ + OP1(ORet, 0), /* op1: return r0 */ + }; + + test_alloc_function(c, 0, fn_type, 1, regs, 2, ops); + + int result; + uchar* (*fn)(void) = (uchar*(*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + uchar *ret = fn(); + if (ret == NULL) { + fprintf(stderr, " Got NULL pointer\n"); + return TEST_FAIL; + } + + /* Check the string content - uchar is UTF-16, so each element is a 16-bit char */ + /* Note: uchar is 16-bit, so ret[0]='h', ret[1]='e', etc. for ASCII */ + if (ret[0] != 'h' || ret[1] != 'e' || ret[2] != 'l' || ret[3] != 'l' || ret[4] != 'o') { + fprintf(stderr, " String content mismatch: got 0x%04x 0x%04x 0x%04x 0x%04x 0x%04x\n", + ret[0], ret[1], ret[2], ret[3], ret[4]); + fprintf(stderr, " As chars: '%c' '%c' '%c' '%c' '%c'\n", + (char)(ret[0] & 0xFF), (char)(ret[1] & 0xFF), + (char)(ret[2] & 0xFF), (char)(ret[3] & 0xFF), (char)(ret[4] & 0xFF)); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: Load two different strings + */ +TEST(load_two_strings) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + /* Setup string pool */ + c->nstrings = 2; + c->strings = (char**)malloc(sizeof(char*) * 2); + c->strings[0] = "first"; + c->strings[1] = "second"; + c->strings_lens = (int*)malloc(sizeof(int) * 2); + c->strings_lens[0] = 5; + c->strings_lens[1] = 6; + c->ustrings = (uchar**)calloc(2, sizeof(uchar*)); + + hl_type *fn_type = test_alloc_fun_type(c, &c->types[T_BYTES], 0, NULL); + hl_type *regs[] = { &c->types[T_BYTES], &c->types[T_BYTES] }; + + hl_opcode ops[] = { + OP2(OString, 0, 0), /* op0: r0 = "first" */ + OP2(OString, 1, 1), /* op1: r1 = "second" */ + OP1(ORet, 1), /* op2: return r1 ("second") */ + }; + + test_alloc_function(c, 0, fn_type, 2, regs, 3, ops); + + int result; + uchar* (*fn)(void) = (uchar*(*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + uchar *ret = fn(); + if (ret == NULL) { + fprintf(stderr, " Got NULL pointer\n"); + return TEST_FAIL; + } + + /* Should be "second" */ + if (ret[0] != 's' || ret[1] != 'e' || ret[2] != 'c') { + fprintf(stderr, " Expected 'second', got 0x%04x 0x%04x 0x%04x...\n", + ret[0], ret[1], ret[2]); + fprintf(stderr, " As chars: '%c' '%c' '%c'...\n", + (char)(ret[0] & 0xFF), (char)(ret[1] & 0xFF), (char)(ret[2] & 0xFF)); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: Store string in dynobj and retrieve it + * This mimics what trace() does - store strings in dynamic object fields + * + * r0 = new DynObj + * r1 = "hello" + * dynset r0, fieldHash, r1 ; store string + * r2 = dynget r0, fieldHash ; retrieve string + * return r2 + */ +TEST(dynobj_string_roundtrip) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + /* Setup string pool - index 0 = field name "msg", index 1 = value "hello" */ + c->nstrings = 2; + c->strings = (char**)malloc(sizeof(char*) * 2); + c->strings[0] = "msg"; /* field name */ + c->strings[1] = "hello"; /* field value */ + c->strings_lens = (int*)malloc(sizeof(int) * 2); + c->strings_lens[0] = 3; + c->strings_lens[1] = 5; + c->ustrings = (uchar**)calloc(2, sizeof(uchar*)); + + /* Create HDYNOBJ type */ + if (c->ntypes >= MAX_TYPES) return TEST_FAIL; + int dynobj_idx = c->ntypes++; + c->types[dynobj_idx].kind = HDYNOBJ; + + /* Create HDYN type for the result */ + if (c->ntypes >= MAX_TYPES) return TEST_FAIL; + int dyn_idx = c->ntypes++; + c->types[dyn_idx].kind = HDYN; + + hl_type *fn_type = test_alloc_fun_type(c, &c->types[dyn_idx], 0, NULL); + hl_type *regs[] = { + &c->types[dynobj_idx], /* r0: dynobj */ + &c->types[T_BYTES], /* r1: string "hello" */ + &c->types[dyn_idx], /* r2: retrieved value */ + }; + + hl_opcode ops[] = { + OP1(ONew, 0), /* r0 = new DynObj */ + OP2(OString, 1, 1), /* r1 = "hello" (string index 1) */ + OP3(ODynSet, 0, 0, 1), /* dynset r0, field[0]="msg", r1 */ + OP3(ODynGet, 2, 0, 0), /* r2 = dynget r0, field[0]="msg" */ + OP1(ORet, 2), /* return r2 */ + }; + + test_alloc_function(c, 0, fn_type, 3, regs, 5, ops); + + int result; + vdynamic* (*fn)(void) = (vdynamic*(*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + vdynamic *ret = fn(); + if (ret == NULL) { + fprintf(stderr, " Got NULL vdynamic\n"); + return TEST_FAIL; + } + + /* The returned value should be a string wrapped in vdynamic */ + /* For HBYTES, v.ptr points to the UTF-16 string */ + uchar *str = (uchar*)ret->v.ptr; + if (str == NULL) { + fprintf(stderr, " Got NULL string pointer in vdynamic\n"); + return TEST_FAIL; + } + + if (str[0] != 'h' || str[1] != 'e' || str[2] != 'l' || str[3] != 'l' || str[4] != 'o') { + fprintf(stderr, " String mismatch: got '%c%c%c%c%c'\n", + (char)(str[0] & 0xFF), (char)(str[1] & 0xFF), + (char)(str[2] & 0xFF), (char)(str[3] & 0xFF), (char)(str[4] & 0xFF)); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* Test list */ +static test_entry_t tests[] = { + TEST_ENTRY(load_string), + TEST_ENTRY(load_two_strings), + TEST_ENTRY(dynobj_string_roundtrip), +}; + +int main(int argc, char **argv) { + printf("HashLink AArch64 JIT - String Operations Tests\n"); + return run_tests(tests, sizeof(tests) / sizeof(tests[0])); +} diff --git a/other/tests/minimal/test_switch.c b/other/tests/minimal/test_switch.c new file mode 100644 index 000000000..317b28828 --- /dev/null +++ b/other/tests/minimal/test_switch.c @@ -0,0 +1,367 @@ +/* + * Test switch operations for HashLink AArch64 JIT + * + * Tests: OSwitch + * + * OSwitch: switch(value) { case 0: ..., case 1: ..., ... } + * Parameters: + * p1 = register containing value to switch on + * p2 = number of cases + * extra[i] = jump offset for case i (relative to opcode after switch) + */ +#include "test_harness.h" + +/* + * Test: OSwitch with 3 cases + * + * switch(value) { + * case 0: return 10; + * case 1: return 20; + * case 2: return 30; + * default: return 0; + * } + */ +TEST(switch_basic) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + int ints[] = { 10, 20, 30, 0 }; /* return values for each case */ + test_init_ints(c, 4, ints); + + /* Function type: (i32) -> i32 */ + hl_type *arg_types[] = { &c->types[T_I32] }; + hl_type *fn_type = test_alloc_fun_type(c, &c->types[T_I32], 1, arg_types); + + hl_type *regs[] = { + &c->types[T_I32], /* r0 = input value */ + &c->types[T_I32], /* r1 = return value */ + }; + + /* + * Opcodes layout (default case must be immediately after switch): + * 0: OSwitch r0, 3, [2, 4, 6] ; switch on r0 with 3 cases + * 1: OInt r1, $3 ; default: r1 = 0 (fall through lands here) + * 2: ORet r1 + * 3: OInt r1, $0 ; case 0: r1 = 10 (offset 2 -> opcode 3) + * 4: ORet r1 + * 5: OInt r1, $1 ; case 1: r1 = 20 (offset 4 -> opcode 5) + * 6: ORet r1 + * 7: OInt r1, $2 ; case 2: r1 = 30 (offset 6 -> opcode 7) + * 8: ORet r1 + * + * Jump offsets from opcode 1 (after switch): + * case 0: offset 2 -> opcode 3 + * case 1: offset 4 -> opcode 5 + * case 2: offset 6 -> opcode 7 + */ + static int switch_offsets[] = { 2, 4, 6 }; + hl_opcode ops[] = { + { OSwitch, 0, 3, 0, switch_offsets }, /* switch r0, 3 cases */ + OP2(OInt, 1, 3), /* default: r1 = 0 */ + OP1(ORet, 1), + OP2(OInt, 1, 0), /* case 0: r1 = 10 */ + OP1(ORet, 1), + OP2(OInt, 1, 1), /* case 1: r1 = 20 */ + OP1(ORet, 1), + OP2(OInt, 1, 2), /* case 2: r1 = 30 */ + OP1(ORet, 1), + }; + + test_alloc_function(c, 0, fn_type, 2, regs, 9, ops); + + int result; + int (*fn)(int) = (int(*)(int))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + /* Test all cases */ + int test_cases[][2] = { + { 0, 10 }, + { 1, 20 }, + { 2, 30 }, + { 3, 0 }, /* default */ + { 100, 0 }, /* default */ + { -1, 0 }, /* default */ + }; + + for (int i = 0; i < 6; i++) { + int input = test_cases[i][0]; + int expected = test_cases[i][1]; + int got = fn(input); + if (got != expected) { + fprintf(stderr, " switch(%d): expected %d, got %d\n", input, expected, got); + return TEST_FAIL; + } + } + + return TEST_PASS; +} + +/* + * Test: OSwitch with single case + */ +TEST(switch_single_case) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + int ints[] = { 42, 0 }; + test_init_ints(c, 2, ints); + + hl_type *arg_types[] = { &c->types[T_I32] }; + hl_type *fn_type = test_alloc_fun_type(c, &c->types[T_I32], 1, arg_types); + + hl_type *regs[] = { + &c->types[T_I32], + &c->types[T_I32], + }; + + /* Default immediately after switch, case 0 at offset 2 */ + static int switch_offsets[] = { 2 }; + hl_opcode ops[] = { + { OSwitch, 0, 1, 0, switch_offsets }, /* switch r0, 1 case */ + OP2(OInt, 1, 1), /* default: r1 = 0 (fall through) */ + OP1(ORet, 1), + OP2(OInt, 1, 0), /* case 0: r1 = 42 (offset 2) */ + OP1(ORet, 1), + }; + + test_alloc_function(c, 0, fn_type, 2, regs, 5, ops); + + int result; + int (*fn)(int) = (int(*)(int))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + if (fn(0) != 42) { + fprintf(stderr, " switch(0): expected 42, got %d\n", fn(0)); + return TEST_FAIL; + } + + if (fn(1) != 0) { + fprintf(stderr, " switch(1): expected 0 (default), got %d\n", fn(1)); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: OSwitch where all cases jump to same target + */ +TEST(switch_same_target) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + int ints[] = { 42, 0 }; + test_init_ints(c, 2, ints); + + hl_type *arg_types[] = { &c->types[T_I32] }; + hl_type *fn_type = test_alloc_fun_type(c, &c->types[T_I32], 1, arg_types); + + hl_type *regs[] = { + &c->types[T_I32], + &c->types[T_I32], + }; + + /* Default at opcode 1, all 3 cases jump to offset 2 (opcode 3) */ + static int switch_offsets[] = { 2, 2, 2 }; + hl_opcode ops[] = { + { OSwitch, 0, 3, 0, switch_offsets }, /* switch r0, 3 cases all going to same place */ + OP2(OInt, 1, 1), /* default: r1 = 0 (fall through) */ + OP1(ORet, 1), + OP2(OInt, 1, 0), /* case 0,1,2: r1 = 42 (offset 2) */ + OP1(ORet, 1), + }; + + test_alloc_function(c, 0, fn_type, 2, regs, 5, ops); + + int result; + int (*fn)(int) = (int(*)(int))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + /* Cases 0, 1, 2 should all return 42 */ + for (int i = 0; i < 3; i++) { + if (fn(i) != 42) { + fprintf(stderr, " switch(%d): expected 42, got %d\n", i, fn(i)); + return TEST_FAIL; + } + } + + /* Anything else is default (0) */ + if (fn(5) != 0) { + fprintf(stderr, " switch(5): expected 0, got %d\n", fn(5)); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: OSwitch with fallthrough pattern (consecutive cases) + * + * switch(value) { + * case 0: + * case 1: + * return 10; // cases 0 and 1 both return 10 + * case 2: + * return 20; + * default: + * return 0; + * } + */ +TEST(switch_fallthrough) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + int ints[] = { 10, 20, 0 }; + test_init_ints(c, 3, ints); + + hl_type *arg_types[] = { &c->types[T_I32] }; + hl_type *fn_type = test_alloc_fun_type(c, &c->types[T_I32], 1, arg_types); + + hl_type *regs[] = { + &c->types[T_I32], + &c->types[T_I32], + }; + + /* Default at opcode 1, case 0,1 at offset 2, case 2 at offset 4 */ + static int switch_offsets[] = { 2, 2, 4 }; + hl_opcode ops[] = { + { OSwitch, 0, 3, 0, switch_offsets }, + OP2(OInt, 1, 2), /* default: r1 = 0 (fall through) */ + OP1(ORet, 1), + OP2(OInt, 1, 0), /* case 0,1: r1 = 10 (offset 2) */ + OP1(ORet, 1), + OP2(OInt, 1, 1), /* case 2: r1 = 20 (offset 4) */ + OP1(ORet, 1), + }; + + test_alloc_function(c, 0, fn_type, 2, regs, 7, ops); + + int result; + int (*fn)(int) = (int(*)(int))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + if (fn(0) != 10 || fn(1) != 10) { + fprintf(stderr, " Cases 0,1 should return 10\n"); + return TEST_FAIL; + } + + if (fn(2) != 20) { + fprintf(stderr, " Case 2 should return 20, got %d\n", fn(2)); + return TEST_FAIL; + } + + if (fn(3) != 0) { + fprintf(stderr, " Default should return 0, got %d\n", fn(3)); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: OSwitch with computation after switch + * + * This tests that control flow properly resumes after switch. + * OLabel opcodes are required at jump targets to discard register bindings. + */ +TEST(switch_with_computation) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + int ints[] = { 10, 20, 100 }; + test_init_ints(c, 3, ints); + + hl_type *arg_types[] = { &c->types[T_I32] }; + hl_type *fn_type = test_alloc_fun_type(c, &c->types[T_I32], 1, arg_types); + + hl_type *regs[] = { + &c->types[T_I32], /* r0 = input */ + &c->types[T_I32], /* r1 = multiplier from switch */ + &c->types[T_I32], /* r2 = base value */ + &c->types[T_I32], /* r3 = result */ + }; + + /* + * Default after switch, cases jump past default to their handlers. + * Then all paths converge to common continuation. + * OLabel is required at each jump target. + * + * 0: OSwitch r0, 2, [2, 5] ; case 0->op3, case 1->op6 + * 1: OInt r1, 100 ; default: r1 = 100 + * 2: OJAlways 5 ; default jumps to op 8 (continuation) + * 3: OLabel ; case 0 target + * 4: OInt r1, 10 ; case 0: r1 = 10 + * 5: OJAlways 2 ; case 0 jumps to op 8 + * 6: OLabel ; case 1 target + * 7: OInt r1, 20 ; case 1: r1 = 20, falls through + * 8: OLabel ; continuation (merge point) + * 9: OInt r2, 100 ; r2 = 100 + * 10: OAdd r3, r1, r2 + * 11: ORet r3 + */ + static int switch_offsets[] = { 2, 5 }; + hl_opcode ops[] = { + { OSwitch, 0, 2, 0, switch_offsets }, + OP2(OInt, 1, 2), /* default: r1 = 100 */ + OP2(OJAlways, 5, 0), /* default jumps to continuation (op 8) */ + OP0(OLabel), /* case 0 target */ + OP2(OInt, 1, 0), /* case 0: r1 = 10 */ + OP2(OJAlways, 2, 0), /* case 0 jumps to continuation (op 8) */ + OP0(OLabel), /* case 1 target */ + OP2(OInt, 1, 1), /* case 1: r1 = 20, falls through */ + OP0(OLabel), /* continuation (merge point) */ + OP2(OInt, 2, 2), /* r2 = 100 */ + OP3(OAdd, 3, 1, 2), /* r3 = r1 + r2 */ + OP1(ORet, 3), + }; + + test_alloc_function(c, 0, fn_type, 4, regs, 12, ops); + + int result; + int (*fn)(int) = (int(*)(int))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + /* case 0: 10 + 100 = 110 */ + if (fn(0) != 110) { + fprintf(stderr, " switch(0): expected 110, got %d\n", fn(0)); + return TEST_FAIL; + } + + /* case 1: 20 + 100 = 120 */ + if (fn(1) != 120) { + fprintf(stderr, " switch(1): expected 120, got %d\n", fn(1)); + return TEST_FAIL; + } + + /* default: 100 + 100 = 200 */ + if (fn(5) != 200) { + fprintf(stderr, " switch(5) default: expected 200, got %d\n", fn(5)); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* Test list */ +static test_entry_t tests[] = { + TEST_ENTRY(switch_basic), + TEST_ENTRY(switch_single_case), + TEST_ENTRY(switch_same_target), + TEST_ENTRY(switch_fallthrough), + TEST_ENTRY(switch_with_computation), +}; + +int main(int argc, char **argv) { + printf("HashLink AArch64 JIT - Switch Tests\n"); + return run_tests(tests, sizeof(tests) / sizeof(tests[0])); +} diff --git a/other/tests/minimal/test_type_ops.c b/other/tests/minimal/test_type_ops.c new file mode 100644 index 000000000..6a26b8245 --- /dev/null +++ b/other/tests/minimal/test_type_ops.c @@ -0,0 +1,290 @@ +/* + * Test type operations for HashLink AArch64 JIT + * + * Tests: OType, OGetType, OGetTID, OSafeCast, OUnsafeCast, OToUFloat + * + * OType: load a type pointer from the types array + * OGetType: get the runtime type of an object + * OGetTID: get the type ID (first 4 bytes) of an object + * OSafeCast: safe dynamic cast with runtime check + * OUnsafeCast: unchecked type cast + * OToUFloat: convert unsigned int to float + */ +#include "test_harness.h" + +/* Helper to create an object type */ +static hl_type *create_obj_type(hl_code *c, const char *name) { + if (c->ntypes >= MAX_TYPES) { + fprintf(stderr, "Too many types\n"); + return NULL; + } + + int idx = c->ntypes++; + hl_type *t = &c->types[idx]; + memset(t, 0, sizeof(hl_type)); + + t->kind = HOBJ; + t->obj = (hl_type_obj*)calloc(1, sizeof(hl_type_obj)); + t->obj->name = (uchar*)name; + t->obj->nfields = 0; + t->obj->nproto = 0; + t->obj->nbindings = 0; + + return t; +} + +/* + * Test: OType - load type pointer + * + * The type pointer should be non-null and have the correct kind. + * We use a native to verify the type kind. + */ +static int verify_type_kind(hl_type *t, int expected_kind) { + if (t == NULL) return 0; + return (t->kind == expected_kind) ? 1 : 0; +} + +TEST(type_load) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + int ints[] = { HI32 }; /* expected kind */ + test_init_ints(c, 1, ints); + + /* Native: verify_type_kind(type, kind) -> i32 */ + hl_type *verify_args[] = { &c->types[T_TYPE], &c->types[T_I32] }; + hl_type *verify_fn_type = test_alloc_fun_type(c, &c->types[T_I32], 2, verify_args); + test_add_native(c, 1, "test", "verify_type_kind", verify_fn_type, (void*)verify_type_kind); + + hl_type *fn_type = test_alloc_fun_type(c, &c->types[T_I32], 0, NULL); + + hl_type *regs[] = { + &c->types[T_TYPE], /* r0 = type pointer */ + &c->types[T_I32], /* r1 = expected kind */ + &c->types[T_I32], /* r2 = result */ + }; + + hl_opcode ops[] = { + OP2(OType, 0, T_I32), /* r0 = &types[T_I32] */ + OP2(OInt, 1, 0), /* r1 = HI32 */ + OP4_CALL2(OCall2, 2, 1, 0, 1), /* r2 = verify_type_kind(r0, r1) */ + OP1(ORet, 2), + }; + + test_alloc_function(c, 0, fn_type, 3, regs, 4, ops); + + int result; + int (*fn)(void) = (int(*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + int ret = fn(); + if (ret != 1) { + fprintf(stderr, " Type verification failed\n"); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: OToUFloat - convert unsigned int to float + * + * This is important for correctly converting large unsigned values. + * 0xFFFFFFFF as unsigned is 4294967295, not -1. + */ +TEST(to_ufloat_basic) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + int ints[] = { 1000000 }; /* 1 million */ + test_init_ints(c, 1, ints); + + hl_type *fn_type = test_alloc_fun_type(c, &c->types[T_F64], 0, NULL); + + hl_type *regs[] = { + &c->types[T_I32], /* r0 = unsigned value */ + &c->types[T_F64], /* r1 = float result */ + }; + + hl_opcode ops[] = { + OP2(OInt, 0, 0), /* r0 = 1000000 */ + OP2(OToUFloat, 1, 0), /* r1 = (float)r0 unsigned */ + OP1(ORet, 1), + }; + + test_alloc_function(c, 0, fn_type, 2, regs, 3, ops); + + int result; + double (*fn)(void) = (double(*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + double ret = fn(); + double expected = 1000000.0; + double diff = ret - expected; + if (diff < 0) diff = -diff; + if (diff > 0.1) { + fprintf(stderr, " Expected %f, got %f\n", expected, ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: OToUFloat with large unsigned value + * + * 0x80000000 (2147483648) - would be negative if signed + */ +TEST(to_ufloat_large) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + int ints[] = { (int)0x80000000 }; /* 2^31 as unsigned */ + test_init_ints(c, 1, ints); + + hl_type *fn_type = test_alloc_fun_type(c, &c->types[T_F64], 0, NULL); + + hl_type *regs[] = { + &c->types[T_I32], + &c->types[T_F64], + }; + + hl_opcode ops[] = { + OP2(OInt, 0, 0), + OP2(OToUFloat, 1, 0), + OP1(ORet, 1), + }; + + test_alloc_function(c, 0, fn_type, 2, regs, 3, ops); + + int result; + double (*fn)(void) = (double(*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + double ret = fn(); + double expected = 2147483648.0; /* 2^31 */ + double diff = ret - expected; + if (diff < 0) diff = -diff; + if (diff > 1.0) { + fprintf(stderr, " Expected %f, got %f\n", expected, ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: OUnsafeCast - reinterpret type without checks + * + * Cast an i64 to bytes (pointer type) and back. + */ +TEST(unsafe_cast_basic) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + int ints[] = { 12345 }; + test_init_ints(c, 1, ints); + + hl_type *fn_type = test_alloc_fun_type(c, &c->types[T_I64], 0, NULL); + + hl_type *regs[] = { + &c->types[T_I64], /* r0 = original value */ + &c->types[T_BYTES], /* r1 = cast to bytes (pointer) */ + &c->types[T_I64], /* r2 = cast back to i64 */ + }; + + hl_opcode ops[] = { + OP2(OInt, 0, 0), /* r0 = 12345 */ + OP2(OUnsafeCast, 1, 0), /* r1 = (bytes)r0 */ + OP2(OUnsafeCast, 2, 1), /* r2 = (i64)r1 */ + OP1(ORet, 2), + }; + + test_alloc_function(c, 0, fn_type, 3, regs, 4, ops); + + int result; + int64_t (*fn)(void) = (int64_t(*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + int64_t ret = fn(); + if (ret != 12345) { + fprintf(stderr, " Expected 12345, got %ld\n", (long)ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: OToSFloat vs OToUFloat comparison + * + * -1 converted: + * ToSFloat: -1.0 + * ToUFloat: 4294967295.0 + */ +TEST(tofloat_signed_vs_unsigned) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + int ints[] = { -1 }; + test_init_ints(c, 1, ints); + + hl_type *fn_type = test_alloc_fun_type(c, &c->types[T_F64], 0, NULL); + + hl_type *regs[] = { + &c->types[T_I32], /* r0 = -1 */ + &c->types[T_F64], /* r1 = signed float */ + &c->types[T_F64], /* r2 = unsigned float */ + &c->types[T_F64], /* r3 = difference */ + }; + + hl_opcode ops[] = { + OP2(OInt, 0, 0), /* r0 = -1 */ + OP2(OToSFloat, 1, 0), /* r1 = (float)r0 signed = -1.0 */ + OP2(OToUFloat, 2, 0), /* r2 = (float)r0 unsigned = 4294967295.0 */ + OP3(OSub, 3, 2, 1), /* r3 = r2 - r1 */ + OP1(ORet, 3), + }; + + test_alloc_function(c, 0, fn_type, 4, regs, 5, ops); + + int result; + double (*fn)(void) = (double(*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + double ret = fn(); + /* unsigned(-1) - signed(-1) = 4294967295.0 - (-1.0) = 4294967296.0 */ + double expected = 4294967296.0; + double diff = ret - expected; + if (diff < 0) diff = -diff; + if (diff > 1.0) { + fprintf(stderr, " Expected %f, got %f\n", expected, ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* Test list */ +static test_entry_t tests[] = { + TEST_ENTRY(type_load), + TEST_ENTRY(to_ufloat_basic), + TEST_ENTRY(to_ufloat_large), + TEST_ENTRY(unsafe_cast_basic), + TEST_ENTRY(tofloat_signed_vs_unsigned), +}; + +int main(int argc, char **argv) { + printf("HashLink AArch64 JIT - Type Operation Tests\n"); + return run_tests(tests, sizeof(tests) / sizeof(tests[0])); +} diff --git a/other/tests/minimal/test_unsigned_ops.c b/other/tests/minimal/test_unsigned_ops.c new file mode 100644 index 000000000..ef44e4d6a --- /dev/null +++ b/other/tests/minimal/test_unsigned_ops.c @@ -0,0 +1,359 @@ +/* + * Test unsigned operations for HashLink AArch64 JIT + * + * Tests: OUDiv, OUMod, OUShr + * + * These opcodes perform unsigned arithmetic: + * OUDiv: unsigned division + * OUMod: unsigned modulo + * OUShr: unsigned (logical) right shift + */ +#include "test_harness.h" + +/* + * Test: OUDiv - unsigned division + * + * 100 / 3 = 33 (unsigned) + */ +TEST(udiv_basic) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + int ints[] = { 100, 3 }; + test_init_ints(c, 2, ints); + + hl_type *fn_type = test_alloc_fun_type(c, &c->types[T_I32], 0, NULL); + + hl_type *regs[] = { + &c->types[T_I32], /* r0 = dividend */ + &c->types[T_I32], /* r1 = divisor */ + &c->types[T_I32], /* r2 = result */ + }; + + hl_opcode ops[] = { + OP2(OInt, 0, 0), /* r0 = 100 */ + OP2(OInt, 1, 1), /* r1 = 3 */ + OP3(OUDiv, 2, 0, 1), /* r2 = r0 / r1 (unsigned) */ + OP1(ORet, 2), + }; + + test_alloc_function(c, 0, fn_type, 3, regs, 4, ops); + + int result; + int (*fn)(void) = (int(*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + int ret = fn(); + if (ret != 33) { + fprintf(stderr, " Expected 33, got %d\n", ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: OUDiv with large unsigned values + * + * When treating -1 as unsigned int32, it's 0xFFFFFFFF = 4294967295 + * 4294967295 / 2 = 2147483647 (unsigned division) + * + * With signed division, -1 / 2 = 0 + */ +TEST(udiv_large_unsigned) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + int ints[] = { -1, 2 }; /* -1 as unsigned is 0xFFFFFFFF */ + test_init_ints(c, 2, ints); + + hl_type *fn_type = test_alloc_fun_type(c, &c->types[T_I32], 0, NULL); + + hl_type *regs[] = { + &c->types[T_I32], + &c->types[T_I32], + &c->types[T_I32], + }; + + hl_opcode ops[] = { + OP2(OInt, 0, 0), /* r0 = -1 (0xFFFFFFFF as unsigned) */ + OP2(OInt, 1, 1), /* r1 = 2 */ + OP3(OUDiv, 2, 0, 1), /* r2 = 0xFFFFFFFF / 2 = 0x7FFFFFFF */ + OP1(ORet, 2), + }; + + test_alloc_function(c, 0, fn_type, 3, regs, 4, ops); + + int result; + unsigned int (*fn)(void) = (unsigned int(*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + unsigned int ret = fn(); + unsigned int expected = 0x7FFFFFFF; /* 2147483647 */ + if (ret != expected) { + fprintf(stderr, " Expected %u, got %u\n", expected, ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: OUMod - unsigned modulo + * + * 100 % 3 = 1 + */ +TEST(umod_basic) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + int ints[] = { 100, 3 }; + test_init_ints(c, 2, ints); + + hl_type *fn_type = test_alloc_fun_type(c, &c->types[T_I32], 0, NULL); + + hl_type *regs[] = { + &c->types[T_I32], + &c->types[T_I32], + &c->types[T_I32], + }; + + hl_opcode ops[] = { + OP2(OInt, 0, 0), /* r0 = 100 */ + OP2(OInt, 1, 1), /* r1 = 3 */ + OP3(OUMod, 2, 0, 1), /* r2 = r0 % r1 (unsigned) */ + OP1(ORet, 2), + }; + + test_alloc_function(c, 0, fn_type, 3, regs, 4, ops); + + int result; + int (*fn)(void) = (int(*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + int ret = fn(); + if (ret != 1) { + fprintf(stderr, " Expected 1, got %d\n", ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: OUMod with large unsigned values + * + * 0xFFFFFFFF % 7 = 4294967295 % 7 = 3 + */ +TEST(umod_large_unsigned) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + int ints[] = { -1, 7 }; /* -1 as unsigned is 0xFFFFFFFF */ + test_init_ints(c, 2, ints); + + hl_type *fn_type = test_alloc_fun_type(c, &c->types[T_I32], 0, NULL); + + hl_type *regs[] = { + &c->types[T_I32], + &c->types[T_I32], + &c->types[T_I32], + }; + + hl_opcode ops[] = { + OP2(OInt, 0, 0), + OP2(OInt, 1, 1), + OP3(OUMod, 2, 0, 1), + OP1(ORet, 2), + }; + + test_alloc_function(c, 0, fn_type, 3, regs, 4, ops); + + int result; + unsigned int (*fn)(void) = (unsigned int(*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + unsigned int ret = fn(); + unsigned int expected = 0xFFFFFFFF % 7; /* 4294967295 % 7 = 3 */ + if (ret != expected) { + fprintf(stderr, " Expected %u, got %u\n", expected, ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: OUShr - unsigned (logical) right shift + * + * 0xFF000000 >> 8 (logical) = 0x00FF0000 + * + * Signed shift would sign-extend: 0xFFFF0000 + */ +TEST(ushr_basic) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + int ints[] = { (int)0xFF000000, 8 }; + test_init_ints(c, 2, ints); + + hl_type *fn_type = test_alloc_fun_type(c, &c->types[T_I32], 0, NULL); + + hl_type *regs[] = { + &c->types[T_I32], + &c->types[T_I32], + &c->types[T_I32], + }; + + hl_opcode ops[] = { + OP2(OInt, 0, 0), /* r0 = 0xFF000000 */ + OP2(OInt, 1, 1), /* r1 = 8 */ + OP3(OUShr, 2, 0, 1), /* r2 = r0 >>> r1 (logical shift) */ + OP1(ORet, 2), + }; + + test_alloc_function(c, 0, fn_type, 3, regs, 4, ops); + + int result; + unsigned int (*fn)(void) = (unsigned int(*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + unsigned int ret = fn(); + unsigned int expected = 0x00FF0000; + if (ret != expected) { + fprintf(stderr, " Expected 0x%08X, got 0x%08X\n", expected, ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: OUShr vs OSShr - compare unsigned vs signed shift + * + * -1 (0xFFFFFFFF) >> 16: + * - Unsigned: 0x0000FFFF + * - Signed: 0xFFFFFFFF (sign-extended) + */ +TEST(ushr_vs_sshr) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + int ints[] = { -1, 16 }; + test_init_ints(c, 2, ints); + + hl_type *fn_type = test_alloc_fun_type(c, &c->types[T_I32], 0, NULL); + + hl_type *regs[] = { + &c->types[T_I32], /* r0 = value */ + &c->types[T_I32], /* r1 = shift amount */ + &c->types[T_I32], /* r2 = unsigned result */ + &c->types[T_I32], /* r3 = signed result */ + &c->types[T_I32], /* r4 = difference */ + }; + + hl_opcode ops[] = { + OP2(OInt, 0, 0), /* r0 = -1 */ + OP2(OInt, 1, 1), /* r1 = 16 */ + OP3(OUShr, 2, 0, 1), /* r2 = unsigned shift */ + OP3(OSShr, 3, 0, 1), /* r3 = signed shift */ + OP3(OSub, 4, 2, 3), /* r4 = r2 - r3 */ + OP1(ORet, 4), /* return difference */ + }; + + test_alloc_function(c, 0, fn_type, 5, regs, 6, ops); + + int result; + int (*fn)(void) = (int(*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + int ret = fn(); + /* + * UShr: 0xFFFFFFFF >>> 16 = 0x0000FFFF = 65535 + * SShr: 0xFFFFFFFF >> 16 = 0xFFFFFFFF = -1 + * Difference: 65535 - (-1) = 65536 + */ + int expected = 65536; + if (ret != expected) { + fprintf(stderr, " Expected %d, got %d\n", expected, ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: OUDiv and OUMod together - verify quotient * divisor + remainder = dividend + */ +TEST(udiv_umod_combined) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + test_init_base_types(c); + + int ints[] = { 12345, 67 }; + test_init_ints(c, 2, ints); + + hl_type *fn_type = test_alloc_fun_type(c, &c->types[T_I32], 0, NULL); + + hl_type *regs[] = { + &c->types[T_I32], /* r0 = dividend */ + &c->types[T_I32], /* r1 = divisor */ + &c->types[T_I32], /* r2 = quotient */ + &c->types[T_I32], /* r3 = remainder */ + &c->types[T_I32], /* r4 = quotient * divisor */ + &c->types[T_I32], /* r5 = reconstructed dividend */ + }; + + hl_opcode ops[] = { + OP2(OInt, 0, 0), /* r0 = 12345 */ + OP2(OInt, 1, 1), /* r1 = 67 */ + OP3(OUDiv, 2, 0, 1), /* r2 = 12345 / 67 = 184 */ + OP3(OUMod, 3, 0, 1), /* r3 = 12345 % 67 = 17 */ + OP3(OMul, 4, 2, 1), /* r4 = 184 * 67 = 12328 */ + OP3(OAdd, 5, 4, 3), /* r5 = 12328 + 17 = 12345 */ + OP1(ORet, 5), + }; + + test_alloc_function(c, 0, fn_type, 6, regs, 7, ops); + + int result; + int (*fn)(void) = (int(*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + int ret = fn(); + if (ret != 12345) { + fprintf(stderr, " Expected 12345, got %d\n", ret); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* Test list */ +static test_entry_t tests[] = { + TEST_ENTRY(udiv_basic), + TEST_ENTRY(udiv_large_unsigned), + TEST_ENTRY(umod_basic), + TEST_ENTRY(umod_large_unsigned), + TEST_ENTRY(ushr_basic), + TEST_ENTRY(ushr_vs_sshr), + TEST_ENTRY(udiv_umod_combined), +}; + +int main(int argc, char **argv) { + printf("HashLink AArch64 JIT - Unsigned Operation Tests\n"); + return run_tests(tests, sizeof(tests) / sizeof(tests[0])); +} diff --git a/other/tests/minimal/test_virtual_fields.c b/other/tests/minimal/test_virtual_fields.c new file mode 100644 index 000000000..593c9e332 --- /dev/null +++ b/other/tests/minimal/test_virtual_fields.c @@ -0,0 +1,357 @@ +/* + * Test HVIRTUAL field access with different sizes for HashLink AArch64 JIT + * + * This tests the fix for a bug where OSetField/OField for HVIRTUAL objects + * would always use 64-bit load/store instructions regardless of the actual + * field size. This caused adjacent fields to be corrupted. + * + * The bug manifested when storing a 32-bit integer to a vfield - the 64-bit + * store would zero out the adjacent field's memory. + */ +#include "test_harness.h" + +/* Extended type indices for this test */ +#define T_UI8 8 +#define T_UI16 9 + +/* Initialize types including smaller integer types */ +static void init_extended_types(hl_code *c) { + test_init_base_types(c); + + /* Add HUI8 */ + c->types[T_UI8].kind = HUI8; + + /* Add HUI16 */ + c->types[T_UI16].kind = HUI16; + + c->ntypes = 10; +} + +/* Helper to get type size (simplified version of hl_type_size) */ +static int get_type_size(hl_type *t) { + switch (t->kind) { + case HUI8: case HBOOL: return 1; + case HUI16: return 2; + case HI32: case HF32: return 4; + case HI64: case HF64: return 8; + default: return sizeof(void*); /* Pointers */ + } +} + +/* Helper to calculate alignment padding */ +static int pad_struct(int size, hl_type *t) { + int align; + switch (t->kind) { + case HVOID: return 0; + case HUI8: case HBOOL: align = 1; break; + case HUI16: align = 2; break; + case HI32: case HF32: align = 4; break; + case HI64: case HF64: align = 8; break; + default: align = sizeof(void*); break; /* Pointers */ + } + return (-size) & (align - 1); +} + +/* Helper to create an HVIRTUAL type */ +static hl_type *create_virtual_type(hl_code *c, int nfields, hl_type **field_types) { + if (c->ntypes >= MAX_TYPES) { + fprintf(stderr, "Too many types\n"); + return NULL; + } + + int idx = c->ntypes++; + hl_type *t = &c->types[idx]; + memset(t, 0, sizeof(hl_type)); + + t->kind = HVIRTUAL; + t->virt = (hl_type_virtual*)calloc(1, sizeof(hl_type_virtual)); + t->virt->nfields = nfields; + + if (nfields > 0) { + t->virt->fields = (hl_obj_field*)calloc(nfields, sizeof(hl_obj_field)); + t->virt->indexes = (int*)calloc(nfields, sizeof(int)); + + /* Calculate field layout (matching hl_init_virtual logic) */ + int vsize = sizeof(vvirtual) + sizeof(void*) * nfields; + int size = vsize; + + for (int i = 0; i < nfields; i++) { + char *name = (char*)malloc(16); + sprintf(name, "field%d", i); + t->virt->fields[i].name = (uchar*)name; + t->virt->fields[i].t = field_types[i]; + t->virt->fields[i].hashed_name = i + 1000; /* Unique hash */ + + /* Add alignment padding */ + size += pad_struct(size, field_types[i]); + t->virt->indexes[i] = size; + size += get_type_size(field_types[i]); + } + + t->virt->dataSize = size - vsize; + } + + return t; +} + +/* + * Test: HVIRTUAL with adjacent i32 fields + * + * This tests the core bug: storing to one i32 field should not corrupt + * the adjacent i32 field. + * + * struct { i32 a; i32 b; } + * + * r0 = new Virtual + * r1 = 0xDEADBEEF + * r2 = 0xCAFEBABE + * set_field r0.field[0] = r1 (a = 0xDEADBEEF) + * set_field r0.field[1] = r2 (b = 0xCAFEBABE) + * r3 = get_field r0.field[0] (read a - should still be 0xDEADBEEF) + * return r3 + */ +TEST(virtual_adjacent_i32_fields) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + init_extended_types(c); + + int ints[] = { (int)0xDEADBEEF, (int)0xCAFEBABE }; + test_init_ints(c, 2, ints); + + /* Create HVIRTUAL type with two i32 fields */ + hl_type *field_types[] = { &c->types[T_I32], &c->types[T_I32] }; + hl_type *virt_type = create_virtual_type(c, 2, field_types); + + /* Function: () -> i32 */ + hl_type *fn_type = test_alloc_fun_type(c, &c->types[T_I32], 0, NULL); + + /* Registers: r0=virtual, r1=i32, r2=i32, r3=i32 */ + hl_type *regs[] = { virt_type, &c->types[T_I32], &c->types[T_I32], &c->types[T_I32] }; + + hl_opcode ops[] = { + OP1(ONew, 0), /* r0 = new Virtual */ + OP2(OInt, 1, 0), /* r1 = 0xDEADBEEF */ + OP2(OInt, 2, 1), /* r2 = 0xCAFEBABE */ + OP3(OSetField, 0, 0, 1), /* r0.field[0] = r1 */ + OP3(OSetField, 0, 1, 2), /* r0.field[1] = r2 */ + OP3(OField, 3, 0, 0), /* r3 = r0.field[0] */ + OP1(ORet, 3), + }; + + test_alloc_function(c, 0, fn_type, 4, regs, 7, ops); + + int result; + int (*fn)(void) = (int(*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + int ret = fn(); + if (ret != (int)0xDEADBEEF) { + fprintf(stderr, " Expected 0xDEADBEEF, got 0x%X\n", ret); + fprintf(stderr, " (Adjacent field store corrupted first field)\n"); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: HVIRTUAL with mixed size fields (i32 followed by pointer) + * + * This is the exact scenario from the bug report: an i32 field followed + * by a pointer field. The i32 store with 64-bit instruction would zero + * out the adjacent pointer. + * + * struct { i32 a; ptr b; } + * + * r0 = new Virtual (the struct) + * r1 = 42 + * r2 = new Virtual (a non-null pointer to use as field value) + * set_field r0.field[1] = r2 (b = pointer) - SET SECOND FIELD FIRST + * set_field r0.field[0] = r1 (a = 42) - BUG: 64-bit store would zero b! + * r3 = get_field r0.field[1] (read b - should still be the pointer) + * return r3 + */ +TEST(virtual_i32_then_pointer) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + init_extended_types(c); + + int ints[] = { 42 }; + test_init_ints(c, 1, ints); + + /* Create a simple virtual type to use as a pointer value */ + hl_type *empty_field_types[] = { &c->types[T_I32] }; + hl_type *ptr_virt_type = create_virtual_type(c, 1, empty_field_types); + + /* Create HVIRTUAL type: { i32, virtual (pointer) } */ + hl_type *field_types[] = { &c->types[T_I32], ptr_virt_type }; + hl_type *virt_type = create_virtual_type(c, 2, field_types); + + /* Function: () -> virtual (pointer) */ + hl_type *fn_type = test_alloc_fun_type(c, ptr_virt_type, 0, NULL); + + /* Registers: r0=struct virtual, r1=i32, r2=ptr virtual, r3=ptr virtual */ + hl_type *regs[] = { virt_type, &c->types[T_I32], ptr_virt_type, ptr_virt_type }; + + /* + * We use ONew on r2 which has type ptr_virt_type (HVIRTUAL), + * which can be new'd, giving us a non-null pointer. + */ + hl_opcode ops[] = { + OP1(ONew, 0), /* r0 = new Virtual (the struct) */ + OP1(ONew, 2), /* r2 = new Virtual (a non-null pointer value) */ + OP2(OInt, 1, 0), /* r1 = 42 */ + OP3(OSetField, 0, 1, 2), /* r0.field[1] = r2 (set pointer FIRST) */ + OP3(OSetField, 0, 0, 1), /* r0.field[0] = r1 (BUG: would corrupt field[1]) */ + OP3(OField, 3, 0, 1), /* r3 = r0.field[1] (read back pointer) */ + OP1(ORet, 3), /* return r3 */ + }; + + test_alloc_function(c, 0, fn_type, 4, regs, 7, ops); + + int result; + void *(*fn)(void) = (void*(*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + void *ret = fn(); + if (ret == NULL) { + fprintf(stderr, " Expected non-null pointer, got NULL\n"); + fprintf(stderr, " (i32 store corrupted adjacent pointer field)\n"); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: HVIRTUAL with multiple i32 fields - verify no corruption + * + * struct { i32 a; i32 b; i32 c; i32 d; } + * + * Set all fields to different values, then read them all back. + * Any corruption will show up as wrong values. + */ +TEST(virtual_multiple_i32_fields) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + init_extended_types(c); + + int ints[] = { 111, 222, 333, 444 }; + test_init_ints(c, 4, ints); + + /* Create HVIRTUAL type with four i32 fields */ + hl_type *field_types[] = { &c->types[T_I32], &c->types[T_I32], + &c->types[T_I32], &c->types[T_I32] }; + hl_type *virt_type = create_virtual_type(c, 4, field_types); + + /* Function: () -> i32 */ + hl_type *fn_type = test_alloc_fun_type(c, &c->types[T_I32], 0, NULL); + + /* Registers: r0=virtual, r1-r4=i32, r5=i32(result), r6=i32(temp) */ + hl_type *regs[] = { virt_type, + &c->types[T_I32], &c->types[T_I32], + &c->types[T_I32], &c->types[T_I32], + &c->types[T_I32], &c->types[T_I32] }; + + hl_opcode ops[] = { + OP1(ONew, 0), /* r0 = new Virtual */ + OP2(OInt, 1, 0), /* r1 = 111 */ + OP2(OInt, 2, 1), /* r2 = 222 */ + OP2(OInt, 3, 2), /* r3 = 333 */ + OP2(OInt, 4, 3), /* r4 = 444 */ + OP3(OSetField, 0, 0, 1), /* r0.field[0] = 111 */ + OP3(OSetField, 0, 1, 2), /* r0.field[1] = 222 */ + OP3(OSetField, 0, 2, 3), /* r0.field[2] = 333 */ + OP3(OSetField, 0, 3, 4), /* r0.field[3] = 444 */ + /* Read back field[0] - should be 111 */ + OP3(OField, 5, 0, 0), /* r5 = r0.field[0] */ + OP1(ORet, 5), + }; + + test_alloc_function(c, 0, fn_type, 7, regs, 11, ops); + + int result; + int (*fn)(void) = (int(*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + int ret = fn(); + if (ret != 111) { + fprintf(stderr, " Expected 111, got %d\n", ret); + fprintf(stderr, " (Field corruption detected)\n"); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* + * Test: Read back second field after setting first + * + * Same as above but read field[1] to verify it wasn't corrupted + * by the field[0] store. + */ +TEST(virtual_read_second_field) { + test_init_runtime(); + + hl_code *c = test_alloc_code(); + init_extended_types(c); + + int ints[] = { 111, 222 }; + test_init_ints(c, 2, ints); + + /* Create HVIRTUAL type with two i32 fields */ + hl_type *field_types[] = { &c->types[T_I32], &c->types[T_I32] }; + hl_type *virt_type = create_virtual_type(c, 2, field_types); + + /* Function: () -> i32 */ + hl_type *fn_type = test_alloc_fun_type(c, &c->types[T_I32], 0, NULL); + + /* Registers: r0=virtual, r1=i32, r2=i32, r3=i32 */ + hl_type *regs[] = { virt_type, &c->types[T_I32], &c->types[T_I32], &c->types[T_I32] }; + + hl_opcode ops[] = { + OP1(ONew, 0), /* r0 = new Virtual */ + OP2(OInt, 1, 0), /* r1 = 111 */ + OP2(OInt, 2, 1), /* r2 = 222 */ + OP3(OSetField, 0, 1, 2), /* r0.field[1] = 222 (SET SECOND FIRST) */ + OP3(OSetField, 0, 0, 1), /* r0.field[0] = 111 (this would corrupt field[1]) */ + OP3(OField, 3, 0, 1), /* r3 = r0.field[1] (read back second field) */ + OP1(ORet, 3), + }; + + test_alloc_function(c, 0, fn_type, 4, regs, 7, ops); + + int result; + int (*fn)(void) = (int(*)(void))test_jit_compile(c, &result); + if (result != TEST_PASS) return result; + + int ret = fn(); + if (ret != 222) { + fprintf(stderr, " Expected 222, got %d\n", ret); + fprintf(stderr, " (field[0] store corrupted field[1] - the bug!)\n"); + return TEST_FAIL; + } + + return TEST_PASS; +} + +/* Test registry */ +static test_entry_t tests[] = { + TEST_ENTRY(virtual_adjacent_i32_fields), + TEST_ENTRY(virtual_i32_then_pointer), + TEST_ENTRY(virtual_multiple_i32_fields), + TEST_ENTRY(virtual_read_second_field), +}; + +/* Main test runner */ +int main(int argc, char **argv) { + (void)argc; (void)argv; + + printf("HashLink AArch64 JIT - HVIRTUAL Field Size Tests\n"); + printf("Testing fix for 64-bit store corrupting adjacent fields\n\n"); + + return run_tests(tests, sizeof(tests) / sizeof(tests[0])); +} diff --git a/src/gc.c b/src/gc.c index ad0310148..177a2c81e 100644 --- a/src/gc.c +++ b/src/gc.c @@ -1,1528 +1,1550 @@ -/* - * Copyright (C)2005-2016 Haxe Foundation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ -#include "hl.h" -#ifdef HL_WIN -# undef _GUID -# include -#else -# include -# include -#endif - -#if defined(HL_EMSCRIPTEN) -# include -#endif - -#if defined(HL_VCC) -#define DRAM_PREFETCH(addr) _mm_prefetch(p, 1) -#elif defined(HL_CLANG) || defined (HL_GCC) -#define DRAM_PREFETCH(addr) __builtin_prefetch(addr) -#elif -#define DRAM_PREFETCH(addr) -#endif - -#define MZERO(ptr,size) memset(ptr,0,size) - -// GC - -#define GC_PAGE_BITS 16 -#define GC_PAGE_SIZE (1 << GC_PAGE_BITS) - -#ifndef HL_64 -# define gc_hash(ptr) ((unsigned int)(ptr)) -# define GC_LEVEL0_BITS 8 -# define GC_LEVEL1_BITS 8 -#else -# define GC_LEVEL0_BITS 10 -# define GC_LEVEL1_BITS 10 - -// we currently discard the higher bits -// we should instead have some special handling for them -// in x86-64 user space grows up to 0x8000-00000000 (16 bits base + 31 bits page id) - -#ifdef HL_WIN -# define gc_hash(ptr) ((int_val)(ptr)&0x0000000FFFFFFFFF) -#else -// Linux gives addresses using the following patterns (X=any,Y=small value - can be 0): -// 0x0000000YXXX0000 -// 0x0007FY0YXXX0000 -static int_val gc_hash( void *ptr ) { - int_val v = (int_val)ptr; - return (v ^ ((v >> 33) << 28)) & 0x0000000FFFFFFFFF; -} -#endif - -#endif - -#define GC_MASK_BITS 16 -#define GC_GET_LEVEL1(ptr) hl_gc_page_map[gc_hash(ptr)>>(GC_MASK_BITS+GC_LEVEL1_BITS)] -#define GC_GET_PAGE(ptr) GC_GET_LEVEL1(ptr)[(gc_hash(ptr)>>GC_MASK_BITS)&GC_LEVEL1_MASK] -#define GC_LEVEL1_MASK ((1 << GC_LEVEL1_BITS) - 1) - -#define PAGE_KIND_BITS 2 -#define PAGE_KIND_MASK ((1 << PAGE_KIND_BITS) - 1) - -#if defined(HL_DEBUG) && !defined(HL_CONSOLE) -# define GC_DEBUG -# define GC_MEMCHK -#endif - -#define GC_INTERIOR_POINTERS -#define GC_PRECISE - -#ifndef HL_THREADS -# define GC_MAX_MARK_THREADS 1 -#else -# ifndef GC_MAX_MARK_THREADS -# define GC_MAX_MARK_THREADS 4 -# endif -#endif - -#define out_of_memory(reason) hl_fatal("Out of Memory (" reason ")") - -typedef struct _gc_pheader gc_pheader; - -// page + private total reserved data per page -typedef void (*gc_page_iterator)( gc_pheader *, int ); -// block-ptr + size -typedef void (*gc_block_iterator)( void *, int ); - -//#define GC_EXTERN_API - -#ifdef GC_EXTERN_API -typedef void* gc_allocator_page_data; - -// Initialize the allocator -void gc_allocator_init(); - -// Get the block size within the given page. The block validity has already been checked. -int gc_allocator_fast_block_size( gc_pheader *page, void *block ); - -// Get the block id within the given page, or -1 if it's an invalid ptr. The block is already checked within page bounds -int gc_allocator_get_block_id( gc_pheader *page, void *block ); - -// Same as get_block_id but handles interior pointers and modify the block value -int gc_allocator_get_block_id_interior( gc_pheader *page, void **block ); - -// Called before marking starts: should update each page "bmp" with mark_bits -void gc_allocator_before_mark( unsigned char *mark_bits ); - -// Called when marking ends: should call finalizers, sweep unused blocks and free empty pages -void gc_allocator_after_mark(); - -// Allocate a block with given size using the specified page kind. -// Returns NULL if no block could be allocated -// Sets size to really allocated size (could be larger) -// Sets size to -1 if allocation refused (required size is invalid) -void *gc_allocator_alloc( int *size, int page_kind ); - -// returns the number of pages allocated and private data size (global) -void gc_get_stats( int *page_count, int *private_data); -void gc_iter_pages( gc_page_iterator i ); -void gc_iter_live_blocks( gc_pheader *p, gc_block_iterator i ); - -#else -# include "allocator.h" -#endif - -struct _gc_pheader { - // const - unsigned char *base; - unsigned char *bmp; - int page_size; - int page_kind; - gc_allocator_page_data alloc; - gc_pheader *next_page; -#ifdef GC_DEBUG - int page_id; -#endif -}; - -#ifdef HL_64 -# define INPAGE(ptr,page) ((unsigned char*)(ptr) >= (page)->base && (unsigned char*)(ptr) < (page)->base + (page)->page_size) -#else -# define INPAGE(ptr,page) true -#endif - -#define GC_PROFILE 1 -#define GC_DUMP_MEM 2 -#define GC_NO_THREADS 4 -#define GC_FORCE_MAJOR 8 -#define GC_PROFILE_MEM 16 - -static int gc_flags = 0; -static gc_pheader *gc_level1_null[1<gc_regs); - // some compilers (such as clang) might push/pop some callee registers in call - // to gc_save_context (or before) which might hold a gc value ! - // let's capture them immediately in extra per-thread data - t->stack_cur = &prev_stack; - - // We have no guarantee prev_stack is pointer-aligned - // All calls are passing a pointer to a bool, which is aligned on 1 byte - // If pointer is wrongly aligned, the extra_stack_data is misaligned - // and register pointers save in stack will not be discovered correctly by the GC - uintptr_t aligned_prev_stack = ((uintptr_t)prev_stack) & ~(sizeof(void*) - 1); - prev_stack = (void*)aligned_prev_stack; - int size = (int)((char*)prev_stack - (char*)stack_cur) / sizeof(void*); - if( size > HL_MAX_EXTRA_STACK ) hl_fatal("GC_SAVE_CONTEXT"); - t->extra_stack_size = size; - memcpy(t->extra_stack_data, prev_stack, size*sizeof(void*)); -} - -#ifndef HL_THREADS -# define gc_global_lock(_) -#else -static void gc_global_lock( bool lock ) { - hl_thread_info *t = current_thread; - bool mt = (gc_flags & GC_NO_THREADS) == 0; - if( !t && gc_threads.count == 0 ) return; - if( lock ) { - if( !t ) - hl_fatal("Can't lock GC in unregistered thread"); - if( mt ) gc_save_context(t,&lock); - t->gc_blocking++; - if( mt ) hl_mutex_acquire(gc_threads.global_lock); - } else { - t->gc_blocking--; - if( mt ) hl_mutex_release(gc_threads.global_lock); - } -} -#endif - -HL_PRIM void hl_global_lock( bool lock ) { - if( lock ) - hl_mutex_acquire(gc_threads.exclusive_lock); - else - hl_mutex_release(gc_threads.exclusive_lock); -} - -HL_PRIM void hl_add_root( void *r ) { - gc_global_lock(true); - if( gc_roots_count == gc_roots_max ) { - int nroots = gc_roots_max ? (gc_roots_max << 1) : 16; - void ***roots = (void***)malloc(sizeof(void*)*nroots); - memcpy(roots,gc_roots,sizeof(void*)*gc_roots_count); - free(gc_roots); - gc_roots = roots; - gc_roots_max = nroots; - } - gc_roots[gc_roots_count++] = (void**)r; - gc_global_lock(false); -} - -HL_PRIM void hl_remove_root( void *v ) { - int i; - gc_global_lock(true); - for(i=gc_roots_count-1;i>=0;i--) - if( gc_roots[i] == (void**)v ) { - gc_roots_count--; - gc_roots[i] = gc_roots[gc_roots_count]; - break; - } - gc_global_lock(false); -} - -HL_PRIM gc_pheader *hl_gc_get_page( void *v ) { - gc_pheader *page = GC_GET_PAGE(v); - if( page && !INPAGE(v,page) ) - page = NULL; - return page; -} - -// ------------------------- THREADS ---------------------------------------------------------- - -HL_API int hl_thread_id(); - -HL_API void hl_register_thread( void *stack_top ) { - if( hl_get_thread() ) - hl_fatal("Thread already registered"); - - hl_thread_info *t = (hl_thread_info*)malloc(sizeof(hl_thread_info)); - memset(t, 0, sizeof(hl_thread_info)); - t->thread_id = hl_thread_id(); - #ifdef HL_MAC - t->mach_thread_id = mach_thread_self(); - t->pthread_id = (pthread_t)hl_thread_current(); - #endif - t->stack_top = stack_top; - t->flags = HL_TRACK_MASK << HL_TREAD_TRACK_SHIFT; - current_thread = t; - hl_add_root(&t->exc_value); - hl_add_root(&t->exc_handler); - - gc_global_lock(true); - hl_thread_info **all = (hl_thread_info**)malloc(sizeof(void*) * (gc_threads.count + 1)); - memcpy(all,gc_threads.threads,sizeof(void*)*gc_threads.count); - gc_threads.threads = all; - all[gc_threads.count++] = t; - gc_global_lock(false); -} - -HL_API void hl_unregister_thread() { - int i; - hl_thread_info *t = hl_get_thread(); - if( !t ) - hl_fatal("Thread not registered"); - hl_remove_root(&t->exc_value); - hl_remove_root(&t->exc_handler); - gc_global_lock(true); - for(i=0;igc_blocking == 0 ) {}; // spinwait - } - } else { - // releasing global lock will release all threads - gc_threads.stopping_world = false; - } -# else - if( b ) gc_save_context(current_thread,&b); -# endif -} - -// ------------------------- ALLOCATOR ---------------------------------------------------------- - -#ifdef GC_DEBUG -static int PAGE_ID = 0; -#endif - -HL_API void hl_gc_dump_memory( const char *filename ); -static void gc_major( void ); - -static void *gc_will_collide( void *p, int size ) { -# ifdef HL_64 - int i; - for(i=0;i>GC_MASK_BITS;i++) { - void *ptr = (unsigned char*)p + (i<= (8 << 20) ) { - gc_global_lock(false); - hl_error("Failed to alloc %d KB",size>>10); - } - if( gc_flags & GC_DUMP_MEM ) hl_gc_dump_memory("hlmemory.dump"); - out_of_memory("pages"); - } - - gc_pheader *p = gc_free_pheaders; - if( !p ) { - // alloc pages by chunks so we get good memory locality - int i, count = 100; - gc_pheader *head = (gc_pheader*)malloc(sizeof(gc_pheader)*count); - p = head; - for(i=1;inext_page = head + i; - p = p->next_page; - } - p->next_page = NULL; - p = gc_free_pheaders = head; - } - gc_free_pheaders = p->next_page; - memset(p,0,sizeof(gc_pheader)); - p->base = (unsigned char*)base; - p->page_size = size; - -# ifdef HL_64 - void *ptr = gc_will_collide(p->base,size); - if( ptr ) { -# ifdef HL_VCC - printf("GC Page HASH collide %IX %IX\n",(int_val)GC_GET_PAGE(ptr),(int_val)ptr); -# else - printf("GC Page HASH collide %lX %lX\n",(int_val)GC_GET_PAGE(ptr),(int_val)ptr); -# endif - return gc_alloc_page(size, kind, block_count); - } -#endif - -# if defined(GC_DEBUG) - memset(base,0xDD,size); - p->page_id = PAGE_ID++; -# else - // prevent false positive to access invalid type - if( kind == MEM_KIND_DYNAMIC ) memset(base, 0, size); -# endif - if( ((int_val)base) & ((1<page_size = size; - p->page_kind = kind; - p->bmp = NULL; - - // update stats - gc_stats.pages_count++; - gc_stats.pages_allocated++; - gc_stats.pages_blocks += block_count; - gc_stats.pages_total_memory += size; - gc_stats.mark_bytes += (block_count + 7) >> 3; - - // register page in page map - int i; - for(i=0;i>GC_MASK_BITS;i++) { - void *ptr = p->base + (i<page_size>>GC_MASK_BITS;i++) { - void *ptr = ph->base + (i<page_size; - gc_stats.mark_bytes -= (block_count + 7) >> 3; - gc_free_page_memory(ph->base,ph->page_size); - ph->next_page = gc_free_pheaders; - gc_free_pheaders = ph; -} - -static void gc_check_mark(); - -void *hl_gc_alloc_gen( hl_type *t, int size, int flags ) { - void *ptr; - int time = 0; - int allocated = 0; - if( size == 0 ) - return NULL; - if( size < 0 ) - hl_error("Invalid allocation size"); - gc_global_lock(true); - gc_check_mark(); -# ifdef GC_MEMCHK - size += HL_WSIZE; -# endif - if( gc_flags & GC_PROFILE ) time = TIMESTAMP(); - { - allocated = size; - gc_stats.allocation_count++; - gc_stats.total_requested += size; -# ifdef GC_PRINT_ALLOCS_SIZES -# define MAX_WORDS 16 - static int SIZE_CATEGORIES[MAX_WORDS] = {0}; - static int LARGE_BLOCKS[33] = {0}; - int wsize = (size + sizeof(void*) - 1) & ~(sizeof(void*)-1); - if( wsize < MAX_WORDS * sizeof(void*) ) - SIZE_CATEGORIES[wsize/sizeof(void*)]++; - else { - int k = 0; - while( size > (1<cur; gc_mstack *__current_mstack = st; -#define GC_STACK_END() __current_mstack->cur = __current_stack; -#define GC_STACK_RESUME() __current_stack = __current_mstack->cur; -#define GC_STACK_COUNT(st) ((st)->size - ((st)->end - (st)->cur) - 1) - -#define GC_PUSH_GEN(ptr,page) \ - if( MEM_HAS_PTR((page)->page_kind) ) { \ - if( __current_stack == __current_mstack->end ) { __current_mstack->cur = __current_stack; __current_stack = hl_gc_mark_grow(__current_mstack); } \ - *__current_stack++ = ptr; \ - } - -#ifdef HL_THREADS -# define GC_THREADS 1 -#else -# define GC_THREADS 0 -#endif - -HL_PRIM void **hl_gc_mark_grow( gc_mstack *stack ) { - int nsize = stack->size ? (((stack->size * 3) >> 1) & ~1) : 256; - void **nstack = (void**)malloc(sizeof(void**) * nsize); - void **base_stack = stack->end - stack->size; - int avail = (int)(stack->cur - base_stack); - if( nstack == NULL ) { - out_of_memory("markstack"); - return NULL; - } - memcpy(nstack, base_stack, avail * sizeof(void*)); - free(base_stack); - stack->size = nsize; - stack->end = nstack + nsize; - stack->cur = nstack + avail; - if( avail == 0 ) - *stack->cur++ = 0; - return stack->cur; -} - -static bool atomic_bit_unset( unsigned char *addr, unsigned char bitmask ) { - if( GC_MAX_MARK_THREADS <= 1 ) { - unsigned char v = *addr; - bool b = (v & bitmask) != 0; - if( b ) *addr = v & ~bitmask; - return b; - } -# if defined(HL_VCC) - return ((unsigned)InterlockedAnd8((char*)addr,(char)~bitmask) & bitmask) != 0; -# elif defined(HL_CLANG) || defined(HL_GCC) - return (__sync_fetch_and_and(addr,~bitmask) & bitmask) != 0; -# else - hl_fatal("Not implemented"); - return false; -# endif -} - -static bool atomic_bit_set( unsigned char *addr, unsigned char bitmask ) { - if( GC_MAX_MARK_THREADS <= 1 ) { - unsigned char v = *addr; - bool b = (v & bitmask) == 0; - if( b ) *addr = v | bitmask; - return b; - } -# if defined(HL_VCC) - return ((unsigned)InterlockedOr8((char*)addr,(char)bitmask) & bitmask) == 0; -# elif defined(HL_CLANG) || defined(HL_GCC) - return (__sync_fetch_and_or(addr,bitmask) & bitmask) == 0; -# else - hl_fatal("Not implemented"); - return false; -# endif -} - -static void gc_dispatch_mark( gc_mstack *st, bool all ) { - int nthreads = 0; - int i; - if( mark_threads_active == (1< count ) push = count; - while( t->stack.size <= push ) - hl_gc_mark_grow(&t->stack); - if( GC_STACK_COUNT(&t->stack) != 0 ) - hl_fatal("assert"); - st->cur -= push; - memcpy(t->stack.cur, st->cur, push * sizeof(void*)); - t->stack.cur += push; - if( !all ) - hl_semaphore_release(t->ready); - } - if( all ) { - if( nthreads != gc_mark_threads ) hl_fatal("assert"); - for(i=0;iready); - } - } -} - -#define REGULAR_BITS 16 - -static int gc_flush_mark( gc_mstack *stack ) { - GC_STACK_BEGIN(stack); - if( !__current_stack ) return 0; - int count = 0; - int regular_mask = 1 << REGULAR_BITS; - while( true ) { - void **block = (void**)*--__current_stack; - gc_pheader *page = GC_GET_PAGE(block); - unsigned int *mark_bits = NULL; - int pos = 0, nwords; -# ifdef GC_DEBUG - vdynamic *ptr = (vdynamic*)block; - ptr += 0; // prevent unreferenced warning -# endif - if( !block ) { - __current_stack++; - break; - } - if( (count++ & (1 << REGULAR_BITS)) != regular_mask && GC_MAX_MARK_THREADS > 1 && gc_mark_threads > 1 ) { - regular_mask = regular_mask ? 0 : 1 << REGULAR_BITS; - GC_STACK_END(); - gc_dispatch_mark(stack,false); - GC_STACK_RESUME(); - } - int size = gc_allocator_fast_block_size(page, block); -# ifdef GC_DEBUG - if( size <= 0 ) hl_fatal("assert"); -# endif - nwords = size / HL_WSIZE; -# ifdef GC_PRECISE - if( page->page_kind == MEM_KIND_DYNAMIC ) { - hl_type *t = *(hl_type**)block; -# ifdef GC_DEBUG -# ifdef HL_64 - if( (int_val)t == 0xDDDDDDDDDDDDDDDD ) continue; -# else - if( (int_val)t == 0xDDDDDDDD ) continue; -# endif -# endif - if( !t ) - continue; // skip not allocated block - if( t->mark_bits && t->kind != HFUN ) { - mark_bits = t->mark_bits; - if( t->kind == HENUM ) { - mark_bits += ((venum*)block)->index; - block += 2; - nwords -= 2; - } else { - block++; - pos++; - } - } - } -# endif - while( pos < nwords ) { - void *p; - if( mark_bits && (mark_bits[pos >> 5] & (1 << (pos&31))) == 0 ) { - pos++; - block++; - continue; - } - p = *block++; - pos++; - if( !p ) continue; - page = GC_GET_PAGE(p); - if( !page || !INPAGE(p,page) ) continue; - int bid = gc_allocator_get_block_id(page,p); - if( bid >= 0 && atomic_bit_set(&page->bmp[bid>>3],1<<(bid&7)) ) { - if( MEM_HAS_PTR(page->page_kind) ) DRAM_PREFETCH(p); - GC_PUSH_GEN(p,page); - } - } - } - GC_STACK_END(); - return count; -} - -static void gc_mark_stack( void *start, void *end ) { - GC_STACK_BEGIN(&global_mark_stack); - void **stack_head = (void**)start; - while( stack_head < (void**)end ) { - void *p = *stack_head++; - gc_pheader *page = GC_GET_PAGE(p); - if( !page || !INPAGE(p,page) ) continue; -# ifdef GC_INTERIOR_POINTERS - int bid = gc_allocator_get_block_interior(page, &p); -# else - int bid = gc_allocator_get_block_id(page, p); -# endif - if( bid >= 0 && (page->bmp[bid>>3] & (1<<(bid&7))) == 0 ) { - page->bmp[bid>>3] |= 1<<(bid&7); - GC_PUSH_GEN(p,page); - } - } - GC_STACK_END(); -} - -static void gc_mark() { - GC_STACK_BEGIN(&global_mark_stack); - int mark_bytes = gc_stats.mark_bytes; - int i; - // prepare mark bits - if( mark_bytes > mark_size ) { - gc_free_page_memory(mark_data, mark_size); - if( mark_size == 0 ) mark_size = GC_PAGE_SIZE; - while( mark_size < mark_bytes ) - mark_size <<= 1; - mark_data = gc_alloc_page_memory(mark_size); - if( mark_data == NULL ) out_of_memory("markbits"); - } - MZERO(mark_data,mark_bytes); - gc_allocator_before_mark(mark_data); - // push roots - for(i=0;i= 0 && (page->bmp[bid>>3] & (1<<(bid&7))) == 0 ) { - page->bmp[bid>>3] |= 1<<(bid&7); - GC_PUSH_GEN(p,page); - } - } - - GC_STACK_END(); - - // scan threads stacks & registers - for(i=0;istack_cur,t->stack_top); - gc_mark_stack(&t->gc_regs,(void**)&t->gc_regs + (sizeof(jmp_buf) / sizeof(void*) - 1)); - gc_mark_stack(&t->extra_stack_data,(void**)&t->extra_stack_data + t->extra_stack_size); - } - - gc_mstack *st = &global_mark_stack; - if( gc_mark_threads <= 1 ) - gc_flush_mark(st); - else { - gc_dispatch_mark(st, true); - if( GC_STACK_COUNT(st) > 0 ) - hl_fatal("assert"); - // wait threads to finish - while( mark_threads_active ) - hl_semaphore_acquire(mark_threads_done); - for(i=0;istack) > 0 ) - hl_fatal("assert"); - } - } - gc_allocator_after_mark(); -} - -static void count_free_memory( gc_pheader *page, int size ) { - gc_stats.free_memory += gc_free_memory(page); -} - -static void gc_major() { - - if( gc_flags & GC_PROFILE_MEM ) { - double gc_mem = gc_stats.mark_bytes; - int i; - gc_mem += gc_allocator_private_memory(); - gc_mem += global_mark_stack.size * sizeof(void*); - for(i=0;istack.size * sizeof(void*); - } - int pages = gc_stats.pages_count; - gc_pheader *p = gc_free_pheaders; - while( p ) { - pages++; - p = p->next_page; - } - gc_mem += sizeof(gc_pheader) * pages; - gc_mem += sizeof(void*) * gc_roots_max; - gc_mem += (sizeof(void*) + sizeof(hl_thread_info)) * gc_threads.count; - for(i=0;i<(1<>10) - ); - last_profile.allocation_count = gc_stats.allocation_count; - last_profile.alloc_time = gc_stats.alloc_time; - last_profile.total_allocated = gc_stats.total_allocated; - } -} - -HL_API void hl_gc_major() { - gc_global_lock(true); - gc_major(); - gc_global_lock(false); -} - -HL_API bool hl_is_gc_ptr( void *ptr ) { - gc_pheader *page = GC_GET_PAGE(ptr); - if( !page || !INPAGE(ptr,page) ) return false; - int bid = gc_allocator_get_block_id(page, ptr); - if( bid < 0 ) return false; - //if( page->bmp && page->next_block == page->first_block && (page->bmp[bid>>3]&(1<<(bid&7))) == 0 ) return false; - return true; -} - -HL_API int hl_gc_get_memsize( void *ptr ) { - gc_pheader *page = GC_GET_PAGE(ptr); - if( !page || !INPAGE(ptr,page) ) return -1; - return gc_allocator_fast_block_size(page,ptr); -} - - -static bool gc_is_active = true; - -static void gc_check_mark() { - int64 m = gc_stats.total_allocated - gc_stats.last_mark; - int64 b = gc_stats.allocation_count - gc_stats.last_mark_allocs; - if( (m > gc_stats.pages_total_memory * gc_mark_threshold || b > gc_stats.pages_blocks * gc_mark_threshold || (gc_flags & GC_FORCE_MAJOR)) && gc_is_active ) - gc_major(); -} - -static void mark_thread_main( void *param ) { - int index = (int)(int_val)param; - gc_mthread *inf = &mark_threads[index]; - while( true ) { - hl_semaphore_acquire(inf->ready); - inf->mark_count += gc_flush_mark(&inf->stack); - if( !atomic_bit_unset(&mark_threads_active, 1 << index) ) hl_fatal("assert"); - if( mark_threads_active == 0 ) hl_semaphore_release(mark_threads_done); - } -} - -int gc_get_mark_threads( hl_thread **tids ) { - if (gc_mark_threads <= 1) - return 0; - for (int i = 0; i < gc_mark_threads; i++) { - tids[i] = mark_threads[i].tid; - } - return gc_mark_threads; -} - -static void hl_gc_init() { - int i; - for(i=0;i<1< GC_MAX_MARK_THREADS ) gc_mark_threads = GC_MAX_MARK_THREADS; - } - if( gc_mark_threads > 1 ) { - for(int i=0;iready); - t->ready = hl_semaphore_alloc(0); - t->tid = hl_thread_start(mark_thread_main, (void*)(int_val)i, false); - } - } -# endif -} - -static void hl_gc_free() { -# ifdef HL_THREADS - hl_remove_root(&gc_threads.global_lock); -# endif -} - -// ---- UTILITIES ---------------------- - -HL_API bool hl_is_blocking() { - hl_thread_info *t = current_thread; - // when called from a non GC thread, tells if the main thread is blocking - if( t == NULL ) { - if( gc_threads.count == 0 ) - return false; - t = gc_threads.threads[0]; - } - return t->gc_blocking > 0; -} - -HL_API void hl_blocking( bool b ) { - hl_thread_info *t = current_thread; - if( !t ) - return; // allow hl_blocking in non-GC threads - if( b ) { -# ifdef HL_THREADS - if( t->gc_blocking == 0 ) - gc_save_context(t,&b); -# endif - t->gc_blocking++; - } else if( t->gc_blocking == 0 ) - hl_error("Unblocked thread"); - else { - t->gc_blocking--; - if( t->gc_blocking == 0 && gc_threads.stopping_world ) { - gc_global_lock(true); - gc_global_lock(false); - } - } -} - -void hl_cache_free(); -void hl_cache_init(); - -void hl_global_init() { - hl_gc_init(); - hl_cache_init(); -} - -void hl_global_free() { - hl_cache_free(); - hl_gc_free(); -} - -struct hl_alloc_block { - int size; - hl_alloc_block *next; - unsigned char *p; -}; - -void hl_alloc_init( hl_alloc *a ) { - a->cur = NULL; -} - -void *hl_malloc( hl_alloc *a, int size ) { - hl_alloc_block *b = a->cur; - void *p; - if( !size ) return NULL; - size += hl_pad_size(size,&hlt_dyn); - if( b == NULL || b->size <= size ) { - int alloc = size < 4096-(int)sizeof(hl_alloc_block) ? 4096-(int)sizeof(hl_alloc_block) : size; - b = (hl_alloc_block *)malloc(sizeof(hl_alloc_block) + alloc); - if( b == NULL ) out_of_memory("malloc"); - b->p = ((unsigned char*)b) + sizeof(hl_alloc_block); - b->size = alloc; - b->next = a->cur; - a->cur = b; - } - p = b->p; - b->p += size; - b->size -= size; - return p; -} - -void *hl_zalloc( hl_alloc *a, int size ) { - void *p = hl_malloc(a,size); - if( p ) MZERO(p,size); - return p; -} - -void hl_free( hl_alloc *a ) { - hl_alloc_block *b = a->cur; - int_val prev = 0; - int size = 0; - while( b ) { - hl_alloc_block *n = b->next; - size = (int)(b->p + b->size - ((unsigned char*)b)); - prev = (int_val)b; - free(b); - b = n; - } - // check if our allocator was not part of the last free block - if( (int_val)a < prev || (int_val)a > prev+size ) - a->cur = NULL; -} - -HL_PRIM void *hl_alloc_executable_memory( int size ) { -#ifdef __APPLE__ -# ifndef MAP_ANONYMOUS -# define MAP_ANONYMOUS MAP_ANON -# endif -#endif -#if defined(HL_WIN) && defined(HL_64) - static char *jit_address = (char*)0x000076CA9F000000; - void *ptr; -retry_jit_alloc: - ptr = VirtualAlloc(jit_address,size,MEM_RESERVE|MEM_COMMIT,PAGE_EXECUTE_READWRITE); - if( !ptr ) { - jit_address = (char*)(((int_val)jit_address)>>1); // fix for Win7 - will eventually reach NULL - goto retry_jit_alloc; - } - jit_address += size + ((-size) & (GC_PAGE_SIZE - 1)); - return ptr; -#elif defined(HL_WIN) - void *ptr = VirtualAlloc(NULL,size,MEM_RESERVE|MEM_COMMIT,PAGE_EXECUTE_READWRITE); - return ptr; -#elif defined(HL_OS) - return malloc(size); -#elif defined(HL_CONSOLE) - return NULL; -#else - void *p; - p = mmap(NULL,size,PROT_READ|PROT_WRITE|PROT_EXEC,(MAP_PRIVATE|MAP_ANONYMOUS),-1,0); - return p; -#endif -} - -HL_PRIM void hl_free_executable_memory( void *c, int size ) { -#if defined(HL_WIN) - VirtualFree(c,0,MEM_RELEASE); -#elif !defined(HL_CONSOLE) - munmap(c, size); -#endif -} - -#if defined(HL_CONSOLE) -void *sys_alloc_align( int size, int align ); -void sys_free_align( void *ptr, int size ); -#elif !defined(HL_WIN) -static void *base_addr = (void*)0x40000000; -typedef struct _pextra pextra; -struct _pextra { - void *page_ptr; - void *base_ptr; - pextra *next; -}; -static pextra *extra_pages = NULL; -#define EXTRA_SIZE (GC_PAGE_SIZE + (4<<10)) -#endif - -static void *gc_alloc_page_memory( int size ) { -#if defined(HL_WIN) -# if defined(GC_DEBUG) && defined(HL_64) -# define STATIC_ADDRESS -# endif -# ifdef STATIC_ADDRESS - // force out of 32 bits addresses to check loss of precision - static char *start_address = (char*)0x100000000; -# else - static void *start_address = NULL; -# endif - void *ptr = VirtualAlloc(start_address,size,MEM_RESERVE|MEM_COMMIT,PAGE_READWRITE); -# ifdef STATIC_ADDRESS - if( ptr == NULL && start_address ) { - start_address = NULL; - return gc_alloc_page_memory(size); - } - start_address += size + ((-size) & (GC_PAGE_SIZE - 1)); -# endif - return ptr; -#elif defined(HL_CONSOLE) - return sys_alloc_align(size, GC_PAGE_SIZE); -#elif defined(HL_EMSCRIPTEN) - return emscripten_builtin_memalign(GC_PAGE_SIZE, size); -#else - static int recursions = 0; - int i = 0; - while( gc_will_collide(base_addr,size) ) { - base_addr = (char*)base_addr + GC_PAGE_SIZE; - i++; - // most likely our hashing creates too many collisions - if( i >= 1 << (GC_LEVEL0_BITS + GC_LEVEL1_BITS + 2) ) - return NULL; - } - void *ptr = mmap(base_addr,size,PROT_READ|PROT_WRITE,MAP_PRIVATE|MAP_ANONYMOUS,-1,0); - if( ptr == (void*)-1 ) - return NULL; - if( ((int_val)ptr) & (GC_PAGE_SIZE-1) ) { - munmap(ptr,size); - if( recursions >= 5 ) { - ptr = mmap(base_addr,size+EXTRA_SIZE,PROT_READ|PROT_WRITE,MAP_PRIVATE|MAP_ANONYMOUS,-1,0); - int offset = (int)((int_val)ptr) & (GC_PAGE_SIZE-1); - void *aligned = (char*)ptr + (GC_PAGE_SIZE - offset); - pextra *inf = (pextra*)( (char*)ptr + size + EXTRA_SIZE - sizeof(pextra)); - inf->page_ptr = aligned; - inf->base_ptr = ptr; - inf->next = extra_pages; - extra_pages = inf; - return aligned; - } - void *tmp; - int tmp_size = (int)((int_val)ptr - (int_val)base_addr); - if( tmp_size > 0 ) { - base_addr = (void*)((((int_val)ptr) & ~(GC_PAGE_SIZE-1)) + GC_PAGE_SIZE); - tmp = ptr; - } else { - base_addr = (void*)(((int_val)ptr) & ~(GC_PAGE_SIZE-1)); - tmp = NULL; - } - if( tmp ) tmp = mmap(tmp,tmp_size,PROT_WRITE,MAP_PRIVATE|MAP_ANONYMOUS,-1,0); - recursions++; - ptr = gc_alloc_page_memory(size); - recursions--; - if( tmp ) munmap(tmp,tmp_size); - return ptr; - } - base_addr = (char*)ptr+size; - return ptr; -#endif -} - -static void gc_free_page_memory( void *ptr, int size ) { -#ifdef HL_WIN - VirtualFree(ptr, 0, MEM_RELEASE); -#elif defined(HL_CONSOLE) - sys_free_align(ptr,size); -#elif defined(HL_EMSCRIPTEN) - emscripten_builtin_free(ptr); -#else - pextra *e = extra_pages, *prev = NULL; - while( e ) { - if( e->page_ptr == ptr ) { - if( prev ) - prev->next = e->next; - else - extra_pages = e->next; - munmap(e->base_ptr, size + EXTRA_SIZE); - return; - } - prev = e; - e = e->next; - } - munmap(ptr,size); -#endif -} - -vdynamic *hl_alloc_dynamic( hl_type *t ) { - vdynamic *d = (vdynamic*)hl_gc_alloc_gen(t, sizeof(vdynamic), (hl_is_ptr(t) ? (t->kind == HSTRUCT ? MEM_KIND_RAW : MEM_KIND_DYNAMIC) : MEM_KIND_NOPTR) | MEM_ZERO); - d->t = t; - return d; -} - -#ifndef HL_64 -# define DYN_PAD 0, -#else -# define DYN_PAD -#endif - -static const vdynamic vdyn_true = { &hlt_bool, DYN_PAD {true} }; -static const vdynamic vdyn_false = { &hlt_bool, DYN_PAD {false} }; - -vdynamic *hl_alloc_dynbool( bool b ) { - return (vdynamic*)(b ? &vdyn_true : &vdyn_false); -} - - -vdynamic *hl_alloc_obj( hl_type *t ) { - vobj *o; - int i; - hl_runtime_obj *rt = t->obj->rt; - if( rt == NULL || rt->methods == NULL ) rt = hl_get_obj_proto(t); - if( t->kind == HSTRUCT ) { - o = (vobj*)hl_gc_alloc_gen(t, rt->size, (rt->hasPtr ? MEM_KIND_RAW : MEM_KIND_NOPTR) | MEM_ZERO); - } else { - o = (vobj*)hl_gc_alloc_gen(t, rt->size, (rt->hasPtr ? MEM_KIND_DYNAMIC : MEM_KIND_NOPTR) | MEM_ZERO); - o->t = t; - } - for(i=0;inbindings;i++) { - hl_runtime_binding *b = rt->bindings + i; - *(void**)(((char*)o) + rt->fields_indexes[b->fid]) = b->closure ? hl_alloc_closure_ptr(b->closure,b->ptr,o) : b->ptr; - } - return (vdynamic*)o; -} - -vdynobj *hl_alloc_dynobj() { - vdynobj *o = (vdynobj*)hl_gc_alloc_gen(&hlt_dynobj,sizeof(vdynobj),MEM_KIND_DYNAMIC | MEM_ZERO); - o->t = &hlt_dynobj; - return o; -} - -vvirtual *hl_alloc_virtual( hl_type *t ) { - vvirtual *v = (vvirtual*)hl_gc_alloc(t, t->virt->dataSize + sizeof(vvirtual) + sizeof(void*) * t->virt->nfields); - void **fields = (void**)(v + 1); - char *vdata = (char*)(fields + t->virt->nfields); - int i; - v->t = t; - v->value = NULL; - v->next = NULL; - for(i=0;ivirt->nfields;i++) - fields[i] = (char*)v + t->virt->indexes[i]; - MZERO(vdata,t->virt->dataSize); - return v; -} - -HL_API void hl_gc_stats( double *total_allocated, double *allocation_count, double *current_memory ) { - *total_allocated = (double)gc_stats.total_allocated; - *allocation_count = (double)gc_stats.allocation_count; - *current_memory = (double)gc_stats.pages_total_memory; -} - -HL_API void hl_gc_enable( bool b ) { - gc_is_active = b; -} - -HL_API int hl_gc_get_flags() { - return gc_flags; -} - -HL_API void hl_gc_set_flags( int f ) { - gc_flags = f; -} - -HL_API void hl_set_thread_flags( int flags, int mask ) { - hl_thread_info *t = hl_get_thread(); - t->flags = (t->flags & ~mask) | flags; -} - -HL_API void hl_gc_profile( bool b ) { - if( b ) - gc_flags |= GC_PROFILE; - else - gc_flags &= GC_PROFILE; -} - -static FILE *fdump; -static void fdump_i( int i ) { - fwrite(&i,1,4,fdump); -} -static void fdump_p( void *p ) { - fwrite(&p,1,sizeof(void*),fdump); -} -static void fdump_d( void *p, int size ) { - fwrite(p,1,size,fdump); -} - -static hl_types_dump gc_types_dump = NULL; -HL_API void hl_gc_set_dump_types( hl_types_dump tdump ) { - gc_types_dump = tdump; -} - -static void gc_dump_block( void *block, int size ) { - fdump_p(block); - fdump_i(size); -} - -static void gc_dump_block_ptr( void *block, int size ) { - fdump_p(block); - fdump_i(size); - if( size >= (int)sizeof(void*) ) fdump_p(*(void**)block); -} - -static void gc_dump_page( gc_pheader *p, int private_data ) { - fdump_p(p->base); - fdump_i(p->page_kind); - fdump_i(p->page_size); - fdump_i(private_data); - if( p->page_kind & MEM_KIND_NOPTR ) { - gc_iter_live_blocks(p, gc_dump_block_ptr); // only dump type - fdump_p(NULL); - } else { - gc_iter_live_blocks(p,gc_dump_block); - fdump_p(NULL); - fdump_d(p->base, p->page_size); - } -} - -HL_API void hl_gc_dump_memory( const char *filename ) { - int i; - gc_global_lock(true); - gc_stop_world(true); - gc_mark(); - fdump = fopen(filename,"wb"); - if( fdump == NULL ) { - gc_stop_world(false); - gc_global_lock(false); - hl_error("Failed to open file"); - return; - } - - // header - fdump_d("HMD1",4); - fdump_i(((sizeof(void*) == 8)?1:0) | ((sizeof(bool) == 4)?2:0)); - - // pages - int page_count, private_data; - gc_get_stats(&page_count, &private_data); - - // all mallocs - private_data += sizeof(gc_pheader) * page_count; - private_data += sizeof(void*) * gc_roots_max; - private_data += gc_threads.count * (sizeof(void*) + sizeof(hl_thread_info)); - for(i=0;i<1<stack_top); - int size = (int)((void**)t->stack_top - (void**)t->stack_cur); - fdump_i(size); - fdump_d(t->stack_cur,size*sizeof(void*)); - } - // types -# define fdump_t(t) fdump_i(t.kind); fdump_p(&t); - fdump_t(hlt_i32); - fdump_t(hlt_i64); - fdump_t(hlt_f32); - fdump_t(hlt_f64); - fdump_t(hlt_dyn); - fdump_t(hlt_array); - fdump_t(hlt_bytes); - fdump_t(hlt_dynobj); - fdump_t(hlt_bool); - fdump_i(-1); - if( gc_types_dump ) gc_types_dump(fdump_d); - fclose(fdump); - fdump = NULL; - gc_stop_world(false); - gc_global_lock(false); -} - -typedef struct { - hl_type *t; - int count; - int page_kinds; - varray *arr; - int index; -} gc_live_obj; -static gc_live_obj live_obj; - -static void gc_count_live_block( void *block, int size ) { - if( size < (int)sizeof(void*) ) return; - hl_type *t = *(hl_type **)block; - if( t != live_obj.t ) return; - live_obj.count++; - if( live_obj.index < live_obj.arr->size ) { - hl_aptr(live_obj.arr, vdynamic*)[live_obj.index] = hl_make_dyn(&block, live_obj.t); - live_obj.index++; - } -} - -static void gc_count_live_page( gc_pheader *p, int private_data ) { - if( (1 << p->page_kind) & live_obj.page_kinds ) - gc_iter_live_blocks(p, gc_count_live_block); -} - -HL_API int hl_gc_get_live_objects( hl_type *t, varray *arr ) { - if( !hl_is_dynamic(t) ) return -1; - gc_global_lock(true); - gc_stop_world(true); - gc_mark(); - - live_obj.t = t; - live_obj.count = 0; - live_obj.page_kinds = (1 << MEM_KIND_DYNAMIC) + (1 << MEM_KIND_NOPTR); - if( t->kind == HOBJ ) { - live_obj.page_kinds = hl_get_obj_rt(t)->hasPtr ? 1 << MEM_KIND_DYNAMIC : 1 << MEM_KIND_NOPTR; - } - live_obj.arr = arr; - live_obj.index = 0; - gc_iter_pages(gc_count_live_page); - - gc_stop_world(false); - gc_global_lock(false); - return live_obj.count; -} - -#ifdef HL_VCC -# pragma optimize( "", off ) -#endif -HL_API vdynamic *hl_debug_call( int mode, vdynamic *v ) { - return NULL; -} -#ifdef HL_VCC -# pragma optimize( "", on ) -#endif - -DEFINE_PRIM(_VOID, gc_major, _NO_ARG); -DEFINE_PRIM(_VOID, gc_enable, _BOOL); -DEFINE_PRIM(_VOID, gc_profile, _BOOL); -DEFINE_PRIM(_VOID, gc_stats, _REF(_F64) _REF(_F64) _REF(_F64)); -DEFINE_PRIM(_VOID, gc_dump_memory, _BYTES); -DEFINE_PRIM(_I32, gc_get_live_objects, _TYPE _ARR); -DEFINE_PRIM(_I32, gc_get_flags, _NO_ARG); -DEFINE_PRIM(_VOID, gc_set_flags, _I32); -DEFINE_PRIM(_DYN, debug_call, _I32 _DYN); -DEFINE_PRIM(_VOID, blocking, _BOOL); -DEFINE_PRIM(_VOID, set_thread_flags, _I32 _I32); +/* + * Copyright (C)2005-2016 Haxe Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ +#include "hl.h" +#ifdef HL_WIN +# undef _GUID +# include +#else +# include +# include +# if defined(__APPLE__) && defined(__aarch64__) +# include +# include +# endif +#endif + +#if defined(HL_EMSCRIPTEN) +# include +#endif + +#if defined(HL_VCC) +#define DRAM_PREFETCH(addr) _mm_prefetch(p, 1) +#elif defined(HL_CLANG) || defined (HL_GCC) +#define DRAM_PREFETCH(addr) __builtin_prefetch(addr) +#elif +#define DRAM_PREFETCH(addr) +#endif + +#define MZERO(ptr,size) memset(ptr,0,size) + +// GC + +#define GC_PAGE_BITS 16 +#define GC_PAGE_SIZE (1 << GC_PAGE_BITS) + +#ifndef HL_64 +# define gc_hash(ptr) ((unsigned int)(ptr)) +# define GC_LEVEL0_BITS 8 +# define GC_LEVEL1_BITS 8 +#else +# define GC_LEVEL0_BITS 10 +# define GC_LEVEL1_BITS 10 + +// we currently discard the higher bits +// we should instead have some special handling for them +// in x86-64 user space grows up to 0x8000-00000000 (16 bits base + 31 bits page id) + +#ifdef HL_WIN +# define gc_hash(ptr) ((int_val)(ptr)&0x0000000FFFFFFFFF) +#else +// Linux gives addresses using the following patterns (X=any,Y=small value - can be 0): +// 0x0000000YXXX0000 +// 0x0007FY0YXXX0000 +static int_val gc_hash( void *ptr ) { + int_val v = (int_val)ptr; + return (v ^ ((v >> 33) << 28)) & 0x0000000FFFFFFFFF; +} +#endif + +#endif + +#define GC_MASK_BITS 16 +#define GC_GET_LEVEL1(ptr) hl_gc_page_map[gc_hash(ptr)>>(GC_MASK_BITS+GC_LEVEL1_BITS)] +#define GC_GET_PAGE(ptr) GC_GET_LEVEL1(ptr)[(gc_hash(ptr)>>GC_MASK_BITS)&GC_LEVEL1_MASK] +#define GC_LEVEL1_MASK ((1 << GC_LEVEL1_BITS) - 1) + +#define PAGE_KIND_BITS 2 +#define PAGE_KIND_MASK ((1 << PAGE_KIND_BITS) - 1) + +#if defined(HL_DEBUG) && !defined(HL_CONSOLE) +# define GC_DEBUG +# define GC_MEMCHK +#endif + +#define GC_INTERIOR_POINTERS +#define GC_PRECISE + +#ifndef HL_THREADS +# define GC_MAX_MARK_THREADS 1 +#else +# ifndef GC_MAX_MARK_THREADS +# define GC_MAX_MARK_THREADS 4 +# endif +#endif + +#define out_of_memory(reason) hl_fatal("Out of Memory (" reason ")") + +typedef struct _gc_pheader gc_pheader; + +// page + private total reserved data per page +typedef void (*gc_page_iterator)( gc_pheader *, int ); +// block-ptr + size +typedef void (*gc_block_iterator)( void *, int ); + +//#define GC_EXTERN_API + +#ifdef GC_EXTERN_API +typedef void* gc_allocator_page_data; + +// Initialize the allocator +void gc_allocator_init(); + +// Get the block size within the given page. The block validity has already been checked. +int gc_allocator_fast_block_size( gc_pheader *page, void *block ); + +// Get the block id within the given page, or -1 if it's an invalid ptr. The block is already checked within page bounds +int gc_allocator_get_block_id( gc_pheader *page, void *block ); + +// Same as get_block_id but handles interior pointers and modify the block value +int gc_allocator_get_block_id_interior( gc_pheader *page, void **block ); + +// Called before marking starts: should update each page "bmp" with mark_bits +void gc_allocator_before_mark( unsigned char *mark_bits ); + +// Called when marking ends: should call finalizers, sweep unused blocks and free empty pages +void gc_allocator_after_mark(); + +// Allocate a block with given size using the specified page kind. +// Returns NULL if no block could be allocated +// Sets size to really allocated size (could be larger) +// Sets size to -1 if allocation refused (required size is invalid) +void *gc_allocator_alloc( int *size, int page_kind ); + +// returns the number of pages allocated and private data size (global) +void gc_get_stats( int *page_count, int *private_data); +void gc_iter_pages( gc_page_iterator i ); +void gc_iter_live_blocks( gc_pheader *p, gc_block_iterator i ); + +#else +# include "allocator.h" +#endif + +struct _gc_pheader { + // const + unsigned char *base; + unsigned char *bmp; + int page_size; + int page_kind; + gc_allocator_page_data alloc; + gc_pheader *next_page; +#ifdef GC_DEBUG + int page_id; +#endif +}; + +#ifdef HL_64 +# define INPAGE(ptr,page) ((unsigned char*)(ptr) >= (page)->base && (unsigned char*)(ptr) < (page)->base + (page)->page_size) +#else +# define INPAGE(ptr,page) true +#endif + +#define GC_PROFILE 1 +#define GC_DUMP_MEM 2 +#define GC_NO_THREADS 4 +#define GC_FORCE_MAJOR 8 +#define GC_PROFILE_MEM 16 + +static int gc_flags = 0; +static gc_pheader *gc_level1_null[1<gc_regs); + // some compilers (such as clang) might push/pop some callee registers in call + // to gc_save_context (or before) which might hold a gc value ! + // let's capture them immediately in extra per-thread data + t->stack_cur = &prev_stack; + + // We have no guarantee prev_stack is pointer-aligned + // All calls are passing a pointer to a bool, which is aligned on 1 byte + // If pointer is wrongly aligned, the extra_stack_data is misaligned + // and register pointers save in stack will not be discovered correctly by the GC + uintptr_t aligned_prev_stack = ((uintptr_t)prev_stack) & ~(sizeof(void*) - 1); + prev_stack = (void*)aligned_prev_stack; + int size = (int)((char*)prev_stack - (char*)stack_cur) / sizeof(void*); + if( size > HL_MAX_EXTRA_STACK ) hl_fatal("GC_SAVE_CONTEXT"); + t->extra_stack_size = size; + memcpy(t->extra_stack_data, prev_stack, size*sizeof(void*)); +} + +#ifndef HL_THREADS +# define gc_global_lock(_) +#else +static void gc_global_lock( bool lock ) { + hl_thread_info *t = current_thread; + bool mt = (gc_flags & GC_NO_THREADS) == 0; + if( !t && gc_threads.count == 0 ) return; + if( lock ) { + if( !t ) + hl_fatal("Can't lock GC in unregistered thread"); + if( mt ) gc_save_context(t,&lock); + t->gc_blocking++; + if( mt ) hl_mutex_acquire(gc_threads.global_lock); + } else { + t->gc_blocking--; + if( mt ) hl_mutex_release(gc_threads.global_lock); + } +} +#endif + +HL_PRIM void hl_global_lock( bool lock ) { + if( lock ) + hl_mutex_acquire(gc_threads.exclusive_lock); + else + hl_mutex_release(gc_threads.exclusive_lock); +} + +HL_PRIM void hl_add_root( void *r ) { + gc_global_lock(true); + if( gc_roots_count == gc_roots_max ) { + int nroots = gc_roots_max ? (gc_roots_max << 1) : 16; + void ***roots = (void***)malloc(sizeof(void*)*nroots); + memcpy(roots,gc_roots,sizeof(void*)*gc_roots_count); + free(gc_roots); + gc_roots = roots; + gc_roots_max = nroots; + } + gc_roots[gc_roots_count++] = (void**)r; + gc_global_lock(false); +} + +HL_PRIM void hl_remove_root( void *v ) { + int i; + gc_global_lock(true); + for(i=gc_roots_count-1;i>=0;i--) + if( gc_roots[i] == (void**)v ) { + gc_roots_count--; + gc_roots[i] = gc_roots[gc_roots_count]; + break; + } + gc_global_lock(false); +} + +HL_PRIM gc_pheader *hl_gc_get_page( void *v ) { + gc_pheader *page = GC_GET_PAGE(v); + if( page && !INPAGE(v,page) ) + page = NULL; + return page; +} + +// ------------------------- THREADS ---------------------------------------------------------- + +HL_API int hl_thread_id(); + +HL_API void hl_register_thread( void *stack_top ) { + if( hl_get_thread() ) + hl_fatal("Thread already registered"); + + hl_thread_info *t = (hl_thread_info*)malloc(sizeof(hl_thread_info)); + memset(t, 0, sizeof(hl_thread_info)); + t->thread_id = hl_thread_id(); + #ifdef HL_MAC + t->mach_thread_id = mach_thread_self(); + t->pthread_id = (pthread_t)hl_thread_current(); + #endif + t->stack_top = stack_top; + t->flags = HL_TRACK_MASK << HL_TREAD_TRACK_SHIFT; + current_thread = t; + hl_add_root(&t->exc_value); + hl_add_root(&t->exc_handler); + + gc_global_lock(true); + hl_thread_info **all = (hl_thread_info**)malloc(sizeof(void*) * (gc_threads.count + 1)); + memcpy(all,gc_threads.threads,sizeof(void*)*gc_threads.count); + gc_threads.threads = all; + all[gc_threads.count++] = t; + gc_global_lock(false); +} + +HL_API void hl_unregister_thread() { + int i; + hl_thread_info *t = hl_get_thread(); + if( !t ) + hl_fatal("Thread not registered"); + hl_remove_root(&t->exc_value); + hl_remove_root(&t->exc_handler); + gc_global_lock(true); + for(i=0;igc_blocking == 0 ) {}; // spinwait + } + } else { + // releasing global lock will release all threads + gc_threads.stopping_world = false; + } +# else + if( b ) gc_save_context(current_thread,&b); +# endif +} + +// ------------------------- ALLOCATOR ---------------------------------------------------------- + +#ifdef GC_DEBUG +static int PAGE_ID = 0; +#endif + +HL_API void hl_gc_dump_memory( const char *filename ); +static void gc_major( void ); + +static void *gc_will_collide( void *p, int size ) { +# ifdef HL_64 + int i; + for(i=0;i>GC_MASK_BITS;i++) { + void *ptr = (unsigned char*)p + (i<= (8 << 20) ) { + gc_global_lock(false); + hl_error("Failed to alloc %d KB",size>>10); + } + if( gc_flags & GC_DUMP_MEM ) hl_gc_dump_memory("hlmemory.dump"); + out_of_memory("pages"); + } + + gc_pheader *p = gc_free_pheaders; + if( !p ) { + // alloc pages by chunks so we get good memory locality + int i, count = 100; + gc_pheader *head = (gc_pheader*)malloc(sizeof(gc_pheader)*count); + p = head; + for(i=1;inext_page = head + i; + p = p->next_page; + } + p->next_page = NULL; + p = gc_free_pheaders = head; + } + gc_free_pheaders = p->next_page; + memset(p,0,sizeof(gc_pheader)); + p->base = (unsigned char*)base; + p->page_size = size; + +# ifdef HL_64 + void *ptr = gc_will_collide(p->base,size); + if( ptr ) { +# ifdef HL_VCC + printf("GC Page HASH collide %IX %IX\n",(int_val)GC_GET_PAGE(ptr),(int_val)ptr); +# else + printf("GC Page HASH collide %lX %lX\n",(int_val)GC_GET_PAGE(ptr),(int_val)ptr); +# endif + return gc_alloc_page(size, kind, block_count); + } +#endif + +# if defined(GC_DEBUG) + memset(base,0xDD,size); + p->page_id = PAGE_ID++; +# else + // prevent false positive to access invalid type + if( kind == MEM_KIND_DYNAMIC ) memset(base, 0, size); +# endif + if( ((int_val)base) & ((1<page_size = size; + p->page_kind = kind; + p->bmp = NULL; + + // update stats + gc_stats.pages_count++; + gc_stats.pages_allocated++; + gc_stats.pages_blocks += block_count; + gc_stats.pages_total_memory += size; + gc_stats.mark_bytes += (block_count + 7) >> 3; + + // register page in page map + int i; + for(i=0;i>GC_MASK_BITS;i++) { + void *ptr = p->base + (i<page_size>>GC_MASK_BITS;i++) { + void *ptr = ph->base + (i<page_size; + gc_stats.mark_bytes -= (block_count + 7) >> 3; + gc_free_page_memory(ph->base,ph->page_size); + ph->next_page = gc_free_pheaders; + gc_free_pheaders = ph; +} + +static void gc_check_mark(); + +void *hl_gc_alloc_gen( hl_type *t, int size, int flags ) { + void *ptr; + int time = 0; + int allocated = 0; + if( size == 0 ) + return NULL; + if( size < 0 ) + hl_error("Invalid allocation size"); + gc_global_lock(true); + gc_check_mark(); +# ifdef GC_MEMCHK + size += HL_WSIZE; +# endif + if( gc_flags & GC_PROFILE ) time = TIMESTAMP(); + { + allocated = size; + gc_stats.allocation_count++; + gc_stats.total_requested += size; +# ifdef GC_PRINT_ALLOCS_SIZES +# define MAX_WORDS 16 + static int SIZE_CATEGORIES[MAX_WORDS] = {0}; + static int LARGE_BLOCKS[33] = {0}; + int wsize = (size + sizeof(void*) - 1) & ~(sizeof(void*)-1); + if( wsize < MAX_WORDS * sizeof(void*) ) + SIZE_CATEGORIES[wsize/sizeof(void*)]++; + else { + int k = 0; + while( size > (1<cur; gc_mstack *__current_mstack = st; +#define GC_STACK_END() __current_mstack->cur = __current_stack; +#define GC_STACK_RESUME() __current_stack = __current_mstack->cur; +#define GC_STACK_COUNT(st) ((st)->size - ((st)->end - (st)->cur) - 1) + +#define GC_PUSH_GEN(ptr,page) \ + if( MEM_HAS_PTR((page)->page_kind) ) { \ + if( __current_stack == __current_mstack->end ) { __current_mstack->cur = __current_stack; __current_stack = hl_gc_mark_grow(__current_mstack); } \ + *__current_stack++ = ptr; \ + } + +#ifdef HL_THREADS +# define GC_THREADS 1 +#else +# define GC_THREADS 0 +#endif + +HL_PRIM void **hl_gc_mark_grow( gc_mstack *stack ) { + int nsize = stack->size ? (((stack->size * 3) >> 1) & ~1) : 256; + void **nstack = (void**)malloc(sizeof(void**) * nsize); + void **base_stack = stack->end - stack->size; + int avail = (int)(stack->cur - base_stack); + if( nstack == NULL ) { + out_of_memory("markstack"); + return NULL; + } + memcpy(nstack, base_stack, avail * sizeof(void*)); + free(base_stack); + stack->size = nsize; + stack->end = nstack + nsize; + stack->cur = nstack + avail; + if( avail == 0 ) + *stack->cur++ = 0; + return stack->cur; +} + +static bool atomic_bit_unset( unsigned char *addr, unsigned char bitmask ) { + if( GC_MAX_MARK_THREADS <= 1 ) { + unsigned char v = *addr; + bool b = (v & bitmask) != 0; + if( b ) *addr = v & ~bitmask; + return b; + } +# if defined(HL_VCC) + return ((unsigned)InterlockedAnd8((char*)addr,(char)~bitmask) & bitmask) != 0; +# elif defined(HL_CLANG) || defined(HL_GCC) + return (__sync_fetch_and_and(addr,~bitmask) & bitmask) != 0; +# else + hl_fatal("Not implemented"); + return false; +# endif +} + +static bool atomic_bit_set( unsigned char *addr, unsigned char bitmask ) { + if( GC_MAX_MARK_THREADS <= 1 ) { + unsigned char v = *addr; + bool b = (v & bitmask) == 0; + if( b ) *addr = v | bitmask; + return b; + } +# if defined(HL_VCC) + return ((unsigned)InterlockedOr8((char*)addr,(char)bitmask) & bitmask) == 0; +# elif defined(HL_CLANG) || defined(HL_GCC) + return (__sync_fetch_and_or(addr,bitmask) & bitmask) == 0; +# else + hl_fatal("Not implemented"); + return false; +# endif +} + +static void gc_dispatch_mark( gc_mstack *st, bool all ) { + int nthreads = 0; + int i; + if( mark_threads_active == (1< count ) push = count; + while( t->stack.size <= push ) + hl_gc_mark_grow(&t->stack); + if( GC_STACK_COUNT(&t->stack) != 0 ) + hl_fatal("assert"); + st->cur -= push; + memcpy(t->stack.cur, st->cur, push * sizeof(void*)); + t->stack.cur += push; + if( !all ) + hl_semaphore_release(t->ready); + } + if( all ) { + if( nthreads != gc_mark_threads ) hl_fatal("assert"); + for(i=0;iready); + } + } +} + +#define REGULAR_BITS 16 + +static int gc_flush_mark( gc_mstack *stack ) { + GC_STACK_BEGIN(stack); + if( !__current_stack ) return 0; + int count = 0; + int regular_mask = 1 << REGULAR_BITS; + while( true ) { + void **block = (void**)*--__current_stack; + gc_pheader *page = GC_GET_PAGE(block); + unsigned int *mark_bits = NULL; + int pos = 0, nwords; +# ifdef GC_DEBUG + vdynamic *ptr = (vdynamic*)block; + ptr += 0; // prevent unreferenced warning +# endif + if( !block ) { + __current_stack++; + break; + } + if( (count++ & (1 << REGULAR_BITS)) != regular_mask && GC_MAX_MARK_THREADS > 1 && gc_mark_threads > 1 ) { + regular_mask = regular_mask ? 0 : 1 << REGULAR_BITS; + GC_STACK_END(); + gc_dispatch_mark(stack,false); + GC_STACK_RESUME(); + } + int size = gc_allocator_fast_block_size(page, block); +# ifdef GC_DEBUG + if( size <= 0 ) hl_fatal("assert"); +# endif + nwords = size / HL_WSIZE; +# ifdef GC_PRECISE + if( page->page_kind == MEM_KIND_DYNAMIC ) { + hl_type *t = *(hl_type**)block; +# ifdef GC_DEBUG +# ifdef HL_64 + if( (int_val)t == 0xDDDDDDDDDDDDDDDD ) continue; +# else + if( (int_val)t == 0xDDDDDDDD ) continue; +# endif +# endif + if( !t ) + continue; // skip not allocated block + if( t->mark_bits && t->kind != HFUN ) { + mark_bits = t->mark_bits; + if( t->kind == HENUM ) { + mark_bits += ((venum*)block)->index; + block += 2; + nwords -= 2; + } else { + block++; + pos++; + } + } + } +# endif + while( pos < nwords ) { + void *p; + if( mark_bits && (mark_bits[pos >> 5] & (1 << (pos&31))) == 0 ) { + pos++; + block++; + continue; + } + p = *block++; + pos++; + if( !p ) continue; + page = GC_GET_PAGE(p); + if( !page || !INPAGE(p,page) ) continue; + int bid = gc_allocator_get_block_id(page,p); + if( bid >= 0 && atomic_bit_set(&page->bmp[bid>>3],1<<(bid&7)) ) { + if( MEM_HAS_PTR(page->page_kind) ) DRAM_PREFETCH(p); + GC_PUSH_GEN(p,page); + } + } + } + GC_STACK_END(); + return count; +} + +static void gc_mark_stack( void *start, void *end ) { + GC_STACK_BEGIN(&global_mark_stack); + void **stack_head = (void**)start; + while( stack_head < (void**)end ) { + void *p = *stack_head++; + gc_pheader *page = GC_GET_PAGE(p); + if( !page || !INPAGE(p,page) ) continue; +# ifdef GC_INTERIOR_POINTERS + int bid = gc_allocator_get_block_interior(page, &p); +# else + int bid = gc_allocator_get_block_id(page, p); +# endif + if( bid >= 0 && (page->bmp[bid>>3] & (1<<(bid&7))) == 0 ) { + page->bmp[bid>>3] |= 1<<(bid&7); + GC_PUSH_GEN(p,page); + } + } + GC_STACK_END(); +} + +static void gc_mark() { + GC_STACK_BEGIN(&global_mark_stack); + int mark_bytes = gc_stats.mark_bytes; + int i; + // prepare mark bits + if( mark_bytes > mark_size ) { + gc_free_page_memory(mark_data, mark_size); + if( mark_size == 0 ) mark_size = GC_PAGE_SIZE; + while( mark_size < mark_bytes ) + mark_size <<= 1; + mark_data = gc_alloc_page_memory(mark_size); + if( mark_data == NULL ) out_of_memory("markbits"); + } + MZERO(mark_data,mark_bytes); + gc_allocator_before_mark(mark_data); + // push roots + for(i=0;i= 0 && (page->bmp[bid>>3] & (1<<(bid&7))) == 0 ) { + page->bmp[bid>>3] |= 1<<(bid&7); + GC_PUSH_GEN(p,page); + } + } + + GC_STACK_END(); + + // scan threads stacks & registers + for(i=0;istack_cur,t->stack_top); + gc_mark_stack(&t->gc_regs,(void**)&t->gc_regs + (sizeof(jmp_buf) / sizeof(void*) - 1)); + gc_mark_stack(&t->extra_stack_data,(void**)&t->extra_stack_data + t->extra_stack_size); + } + + gc_mstack *st = &global_mark_stack; + if( gc_mark_threads <= 1 ) + gc_flush_mark(st); + else { + gc_dispatch_mark(st, true); + if( GC_STACK_COUNT(st) > 0 ) + hl_fatal("assert"); + // wait threads to finish + while( mark_threads_active ) + hl_semaphore_acquire(mark_threads_done); + for(i=0;istack) > 0 ) + hl_fatal("assert"); + } + } + gc_allocator_after_mark(); +} + +static void count_free_memory( gc_pheader *page, int size ) { + gc_stats.free_memory += gc_free_memory(page); +} + +static void gc_major() { + + if( gc_flags & GC_PROFILE_MEM ) { + double gc_mem = gc_stats.mark_bytes; + int i; + gc_mem += gc_allocator_private_memory(); + gc_mem += global_mark_stack.size * sizeof(void*); + for(i=0;istack.size * sizeof(void*); + } + int pages = gc_stats.pages_count; + gc_pheader *p = gc_free_pheaders; + while( p ) { + pages++; + p = p->next_page; + } + gc_mem += sizeof(gc_pheader) * pages; + gc_mem += sizeof(void*) * gc_roots_max; + gc_mem += (sizeof(void*) + sizeof(hl_thread_info)) * gc_threads.count; + for(i=0;i<(1<>10) + ); + last_profile.allocation_count = gc_stats.allocation_count; + last_profile.alloc_time = gc_stats.alloc_time; + last_profile.total_allocated = gc_stats.total_allocated; + } +} + +HL_API void hl_gc_major() { + gc_global_lock(true); + gc_major(); + gc_global_lock(false); +} + +HL_API bool hl_is_gc_ptr( void *ptr ) { + gc_pheader *page = GC_GET_PAGE(ptr); + if( !page || !INPAGE(ptr,page) ) return false; + int bid = gc_allocator_get_block_id(page, ptr); + if( bid < 0 ) return false; + //if( page->bmp && page->next_block == page->first_block && (page->bmp[bid>>3]&(1<<(bid&7))) == 0 ) return false; + return true; +} + +HL_API int hl_gc_get_memsize( void *ptr ) { + gc_pheader *page = GC_GET_PAGE(ptr); + if( !page || !INPAGE(ptr,page) ) return -1; + return gc_allocator_fast_block_size(page,ptr); +} + + +static bool gc_is_active = true; + +static void gc_check_mark() { + int64 m = gc_stats.total_allocated - gc_stats.last_mark; + int64 b = gc_stats.allocation_count - gc_stats.last_mark_allocs; + if( (m > gc_stats.pages_total_memory * gc_mark_threshold || b > gc_stats.pages_blocks * gc_mark_threshold || (gc_flags & GC_FORCE_MAJOR)) && gc_is_active ) + gc_major(); +} + +static void mark_thread_main( void *param ) { + int index = (int)(int_val)param; + gc_mthread *inf = &mark_threads[index]; + while( true ) { + hl_semaphore_acquire(inf->ready); + inf->mark_count += gc_flush_mark(&inf->stack); + if( !atomic_bit_unset(&mark_threads_active, 1 << index) ) hl_fatal("assert"); + if( mark_threads_active == 0 ) hl_semaphore_release(mark_threads_done); + } +} + +int gc_get_mark_threads( hl_thread **tids ) { + if (gc_mark_threads <= 1) + return 0; + for (int i = 0; i < gc_mark_threads; i++) { + tids[i] = mark_threads[i].tid; + } + return gc_mark_threads; +} + +static void hl_gc_init() { + int i; + for(i=0;i<1< GC_MAX_MARK_THREADS ) gc_mark_threads = GC_MAX_MARK_THREADS; + } + if( gc_mark_threads > 1 ) { + for(int i=0;iready); + t->ready = hl_semaphore_alloc(0); + t->tid = hl_thread_start(mark_thread_main, (void*)(int_val)i, false); + } + } +# endif +} + +static void hl_gc_free() { +# ifdef HL_THREADS + hl_remove_root(&gc_threads.global_lock); +# endif +} + +// ---- UTILITIES ---------------------- + +HL_API bool hl_is_blocking() { + hl_thread_info *t = current_thread; + // when called from a non GC thread, tells if the main thread is blocking + if( t == NULL ) { + if( gc_threads.count == 0 ) + return false; + t = gc_threads.threads[0]; + } + return t->gc_blocking > 0; +} + +HL_API void hl_blocking( bool b ) { + hl_thread_info *t = current_thread; + if( !t ) + return; // allow hl_blocking in non-GC threads + if( b ) { +# ifdef HL_THREADS + if( t->gc_blocking == 0 ) + gc_save_context(t,&b); +# endif + t->gc_blocking++; + } else if( t->gc_blocking == 0 ) + hl_error("Unblocked thread"); + else { + t->gc_blocking--; + if( t->gc_blocking == 0 && gc_threads.stopping_world ) { + gc_global_lock(true); + gc_global_lock(false); + } + } +} + +void hl_cache_free(); +void hl_cache_init(); + +void hl_global_init() { + hl_gc_init(); + hl_cache_init(); +} + +void hl_global_free() { + hl_cache_free(); + hl_gc_free(); +} + +struct hl_alloc_block { + int size; + hl_alloc_block *next; + unsigned char *p; +}; + +void hl_alloc_init( hl_alloc *a ) { + a->cur = NULL; +} + +void *hl_malloc( hl_alloc *a, int size ) { + hl_alloc_block *b = a->cur; + void *p; + if( !size ) return NULL; + size += hl_pad_size(size,&hlt_dyn); + if( b == NULL || b->size <= size ) { + int alloc = size < 4096-(int)sizeof(hl_alloc_block) ? 4096-(int)sizeof(hl_alloc_block) : size; + b = (hl_alloc_block *)malloc(sizeof(hl_alloc_block) + alloc); + if( b == NULL ) out_of_memory("malloc"); + b->p = ((unsigned char*)b) + sizeof(hl_alloc_block); + b->size = alloc; + b->next = a->cur; + a->cur = b; + } + p = b->p; + b->p += size; + b->size -= size; + return p; +} + +void *hl_zalloc( hl_alloc *a, int size ) { + void *p = hl_malloc(a,size); + if( p ) MZERO(p,size); + return p; +} + +void hl_free( hl_alloc *a ) { + hl_alloc_block *b = a->cur; + int_val prev = 0; + int size = 0; + while( b ) { + hl_alloc_block *n = b->next; + size = (int)(b->p + b->size - ((unsigned char*)b)); + prev = (int_val)b; + free(b); + b = n; + } + // check if our allocator was not part of the last free block + if( (int_val)a < prev || (int_val)a > prev+size ) + a->cur = NULL; +} + +HL_PRIM void *hl_alloc_executable_memory( int size ) { +#ifdef __APPLE__ +# ifndef MAP_ANONYMOUS +# define MAP_ANONYMOUS MAP_ANON +# endif +#endif +#if defined(HL_WIN) && defined(HL_64) + static char *jit_address = (char*)0x000076CA9F000000; + void *ptr; +retry_jit_alloc: + ptr = VirtualAlloc(jit_address,size,MEM_RESERVE|MEM_COMMIT,PAGE_EXECUTE_READWRITE); + if( !ptr ) { + jit_address = (char*)(((int_val)jit_address)>>1); // fix for Win7 - will eventually reach NULL + goto retry_jit_alloc; + } + jit_address += size + ((-size) & (GC_PAGE_SIZE - 1)); + return ptr; +#elif defined(HL_WIN) + void *ptr = VirtualAlloc(NULL,size,MEM_RESERVE|MEM_COMMIT,PAGE_EXECUTE_READWRITE); + return ptr; +#elif defined(HL_OS) + return malloc(size); +#elif defined(HL_CONSOLE) + return NULL; +#elif defined(__APPLE__) && defined(__aarch64__) + // Apple Silicon requires MAP_JIT for W^X + void *p = mmap(NULL, size, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_PRIVATE|MAP_ANONYMOUS|MAP_JIT, -1, 0); + if (p == MAP_FAILED) return NULL; + return p; +#else + void *p; + p = mmap(NULL,size,PROT_READ|PROT_WRITE|PROT_EXEC,(MAP_PRIVATE|MAP_ANONYMOUS),-1,0); + if (p == MAP_FAILED) return NULL; + return p; +#endif +} + +HL_PRIM void hl_free_executable_memory( void *c, int size ) { +#if defined(HL_WIN) + VirtualFree(c,0,MEM_RELEASE); +#elif !defined(HL_CONSOLE) + munmap(c, size); +#endif +} + +HL_PRIM void hl_jit_write_protect( bool executable ) { +#if defined(__APPLE__) && defined(__aarch64__) + pthread_jit_write_protect_np(executable ? 1 : 0); +#endif +} + +HL_PRIM void hl_jit_flush_cache( void *ptr, int size ) { +#if defined(__APPLE__) && defined(__aarch64__) + sys_icache_invalidate(ptr, size); +#endif +} + +#if defined(HL_CONSOLE) +void *sys_alloc_align( int size, int align ); +void sys_free_align( void *ptr, int size ); +#elif !defined(HL_WIN) +static void *base_addr = (void*)0x40000000; +typedef struct _pextra pextra; +struct _pextra { + void *page_ptr; + void *base_ptr; + pextra *next; +}; +static pextra *extra_pages = NULL; +#define EXTRA_SIZE (GC_PAGE_SIZE + (4<<10)) +#endif + +static void *gc_alloc_page_memory( int size ) { +#if defined(HL_WIN) +# if defined(GC_DEBUG) && defined(HL_64) +# define STATIC_ADDRESS +# endif +# ifdef STATIC_ADDRESS + // force out of 32 bits addresses to check loss of precision + static char *start_address = (char*)0x100000000; +# else + static void *start_address = NULL; +# endif + void *ptr = VirtualAlloc(start_address,size,MEM_RESERVE|MEM_COMMIT,PAGE_READWRITE); +# ifdef STATIC_ADDRESS + if( ptr == NULL && start_address ) { + start_address = NULL; + return gc_alloc_page_memory(size); + } + start_address += size + ((-size) & (GC_PAGE_SIZE - 1)); +# endif + return ptr; +#elif defined(HL_CONSOLE) + return sys_alloc_align(size, GC_PAGE_SIZE); +#elif defined(HL_EMSCRIPTEN) + return emscripten_builtin_memalign(GC_PAGE_SIZE, size); +#else + static int recursions = 0; + int i = 0; + while( gc_will_collide(base_addr,size) ) { + base_addr = (char*)base_addr + GC_PAGE_SIZE; + i++; + // most likely our hashing creates too many collisions + if( i >= 1 << (GC_LEVEL0_BITS + GC_LEVEL1_BITS + 2) ) + return NULL; + } + void *ptr = mmap(base_addr,size,PROT_READ|PROT_WRITE,MAP_PRIVATE|MAP_ANONYMOUS,-1,0); + if( ptr == (void*)-1 ) + return NULL; + if( ((int_val)ptr) & (GC_PAGE_SIZE-1) ) { + munmap(ptr,size); + if( recursions >= 5 ) { + ptr = mmap(base_addr,size+EXTRA_SIZE,PROT_READ|PROT_WRITE,MAP_PRIVATE|MAP_ANONYMOUS,-1,0); + int offset = (int)((int_val)ptr) & (GC_PAGE_SIZE-1); + void *aligned = (char*)ptr + (GC_PAGE_SIZE - offset); + pextra *inf = (pextra*)( (char*)ptr + size + EXTRA_SIZE - sizeof(pextra)); + inf->page_ptr = aligned; + inf->base_ptr = ptr; + inf->next = extra_pages; + extra_pages = inf; + return aligned; + } + void *tmp; + int tmp_size = (int)((int_val)ptr - (int_val)base_addr); + if( tmp_size > 0 ) { + base_addr = (void*)((((int_val)ptr) & ~(GC_PAGE_SIZE-1)) + GC_PAGE_SIZE); + tmp = ptr; + } else { + base_addr = (void*)(((int_val)ptr) & ~(GC_PAGE_SIZE-1)); + tmp = NULL; + } + if( tmp ) tmp = mmap(tmp,tmp_size,PROT_WRITE,MAP_PRIVATE|MAP_ANONYMOUS,-1,0); + recursions++; + ptr = gc_alloc_page_memory(size); + recursions--; + if( tmp ) munmap(tmp,tmp_size); + return ptr; + } + base_addr = (char*)ptr+size; + return ptr; +#endif +} + +static void gc_free_page_memory( void *ptr, int size ) { +#ifdef HL_WIN + VirtualFree(ptr, 0, MEM_RELEASE); +#elif defined(HL_CONSOLE) + sys_free_align(ptr,size); +#elif defined(HL_EMSCRIPTEN) + emscripten_builtin_free(ptr); +#else + pextra *e = extra_pages, *prev = NULL; + while( e ) { + if( e->page_ptr == ptr ) { + if( prev ) + prev->next = e->next; + else + extra_pages = e->next; + munmap(e->base_ptr, size + EXTRA_SIZE); + return; + } + prev = e; + e = e->next; + } + munmap(ptr,size); +#endif +} + +vdynamic *hl_alloc_dynamic( hl_type *t ) { + vdynamic *d = (vdynamic*)hl_gc_alloc_gen(t, sizeof(vdynamic), (hl_is_ptr(t) ? (t->kind == HSTRUCT ? MEM_KIND_RAW : MEM_KIND_DYNAMIC) : MEM_KIND_NOPTR) | MEM_ZERO); + d->t = t; + return d; +} + +#ifndef HL_64 +# define DYN_PAD 0, +#else +# define DYN_PAD +#endif + +static const vdynamic vdyn_true = { &hlt_bool, DYN_PAD {true} }; +static const vdynamic vdyn_false = { &hlt_bool, DYN_PAD {false} }; + +vdynamic *hl_alloc_dynbool( bool b ) { + return (vdynamic*)(b ? &vdyn_true : &vdyn_false); +} + + +vdynamic *hl_alloc_obj( hl_type *t ) { + vobj *o; + int i; + hl_runtime_obj *rt = t->obj->rt; + if( rt == NULL || rt->methods == NULL ) rt = hl_get_obj_proto(t); + if( t->kind == HSTRUCT ) { + o = (vobj*)hl_gc_alloc_gen(t, rt->size, (rt->hasPtr ? MEM_KIND_RAW : MEM_KIND_NOPTR) | MEM_ZERO); + } else { + o = (vobj*)hl_gc_alloc_gen(t, rt->size, (rt->hasPtr ? MEM_KIND_DYNAMIC : MEM_KIND_NOPTR) | MEM_ZERO); + o->t = t; + } + for(i=0;inbindings;i++) { + hl_runtime_binding *b = rt->bindings + i; + *(void**)(((char*)o) + rt->fields_indexes[b->fid]) = b->closure ? hl_alloc_closure_ptr(b->closure,b->ptr,o) : b->ptr; + } + return (vdynamic*)o; +} + +vdynobj *hl_alloc_dynobj() { + vdynobj *o = (vdynobj*)hl_gc_alloc_gen(&hlt_dynobj,sizeof(vdynobj),MEM_KIND_DYNAMIC | MEM_ZERO); + o->t = &hlt_dynobj; + return o; +} + +vvirtual *hl_alloc_virtual( hl_type *t ) { + vvirtual *v = (vvirtual*)hl_gc_alloc(t, t->virt->dataSize + sizeof(vvirtual) + sizeof(void*) * t->virt->nfields); + void **fields = (void**)(v + 1); + char *vdata = (char*)(fields + t->virt->nfields); + int i; + v->t = t; + v->value = NULL; + v->next = NULL; + for(i=0;ivirt->nfields;i++) + fields[i] = (char*)v + t->virt->indexes[i]; + MZERO(vdata,t->virt->dataSize); + return v; +} + +HL_API void hl_gc_stats( double *total_allocated, double *allocation_count, double *current_memory ) { + *total_allocated = (double)gc_stats.total_allocated; + *allocation_count = (double)gc_stats.allocation_count; + *current_memory = (double)gc_stats.pages_total_memory; +} + +HL_API void hl_gc_enable( bool b ) { + gc_is_active = b; +} + +HL_API int hl_gc_get_flags() { + return gc_flags; +} + +HL_API void hl_gc_set_flags( int f ) { + gc_flags = f; +} + +HL_API void hl_set_thread_flags( int flags, int mask ) { + hl_thread_info *t = hl_get_thread(); + t->flags = (t->flags & ~mask) | flags; +} + +HL_API void hl_gc_profile( bool b ) { + if( b ) + gc_flags |= GC_PROFILE; + else + gc_flags &= GC_PROFILE; +} + +static FILE *fdump; +static void fdump_i( int i ) { + fwrite(&i,1,4,fdump); +} +static void fdump_p( void *p ) { + fwrite(&p,1,sizeof(void*),fdump); +} +static void fdump_d( void *p, int size ) { + fwrite(p,1,size,fdump); +} + +static hl_types_dump gc_types_dump = NULL; +HL_API void hl_gc_set_dump_types( hl_types_dump tdump ) { + gc_types_dump = tdump; +} + +static void gc_dump_block( void *block, int size ) { + fdump_p(block); + fdump_i(size); +} + +static void gc_dump_block_ptr( void *block, int size ) { + fdump_p(block); + fdump_i(size); + if( size >= (int)sizeof(void*) ) fdump_p(*(void**)block); +} + +static void gc_dump_page( gc_pheader *p, int private_data ) { + fdump_p(p->base); + fdump_i(p->page_kind); + fdump_i(p->page_size); + fdump_i(private_data); + if( p->page_kind & MEM_KIND_NOPTR ) { + gc_iter_live_blocks(p, gc_dump_block_ptr); // only dump type + fdump_p(NULL); + } else { + gc_iter_live_blocks(p,gc_dump_block); + fdump_p(NULL); + fdump_d(p->base, p->page_size); + } +} + +HL_API void hl_gc_dump_memory( const char *filename ) { + int i; + gc_global_lock(true); + gc_stop_world(true); + gc_mark(); + fdump = fopen(filename,"wb"); + if( fdump == NULL ) { + gc_stop_world(false); + gc_global_lock(false); + hl_error("Failed to open file"); + return; + } + + // header + fdump_d("HMD1",4); + fdump_i(((sizeof(void*) == 8)?1:0) | ((sizeof(bool) == 4)?2:0)); + + // pages + int page_count, private_data; + gc_get_stats(&page_count, &private_data); + + // all mallocs + private_data += sizeof(gc_pheader) * page_count; + private_data += sizeof(void*) * gc_roots_max; + private_data += gc_threads.count * (sizeof(void*) + sizeof(hl_thread_info)); + for(i=0;i<1<stack_top); + int size = (int)((void**)t->stack_top - (void**)t->stack_cur); + fdump_i(size); + fdump_d(t->stack_cur,size*sizeof(void*)); + } + // types +# define fdump_t(t) fdump_i(t.kind); fdump_p(&t); + fdump_t(hlt_i32); + fdump_t(hlt_i64); + fdump_t(hlt_f32); + fdump_t(hlt_f64); + fdump_t(hlt_dyn); + fdump_t(hlt_array); + fdump_t(hlt_bytes); + fdump_t(hlt_dynobj); + fdump_t(hlt_bool); + fdump_i(-1); + if( gc_types_dump ) gc_types_dump(fdump_d); + fclose(fdump); + fdump = NULL; + gc_stop_world(false); + gc_global_lock(false); +} + +typedef struct { + hl_type *t; + int count; + int page_kinds; + varray *arr; + int index; +} gc_live_obj; +static gc_live_obj live_obj; + +static void gc_count_live_block( void *block, int size ) { + if( size < (int)sizeof(void*) ) return; + hl_type *t = *(hl_type **)block; + if( t != live_obj.t ) return; + live_obj.count++; + if( live_obj.index < live_obj.arr->size ) { + hl_aptr(live_obj.arr, vdynamic*)[live_obj.index] = hl_make_dyn(&block, live_obj.t); + live_obj.index++; + } +} + +static void gc_count_live_page( gc_pheader *p, int private_data ) { + if( (1 << p->page_kind) & live_obj.page_kinds ) + gc_iter_live_blocks(p, gc_count_live_block); +} + +HL_API int hl_gc_get_live_objects( hl_type *t, varray *arr ) { + if( !hl_is_dynamic(t) ) return -1; + gc_global_lock(true); + gc_stop_world(true); + gc_mark(); + + live_obj.t = t; + live_obj.count = 0; + live_obj.page_kinds = (1 << MEM_KIND_DYNAMIC) + (1 << MEM_KIND_NOPTR); + if( t->kind == HOBJ ) { + live_obj.page_kinds = hl_get_obj_rt(t)->hasPtr ? 1 << MEM_KIND_DYNAMIC : 1 << MEM_KIND_NOPTR; + } + live_obj.arr = arr; + live_obj.index = 0; + gc_iter_pages(gc_count_live_page); + + gc_stop_world(false); + gc_global_lock(false); + return live_obj.count; +} + +#ifdef HL_VCC +# pragma optimize( "", off ) +#endif +HL_API vdynamic *hl_debug_call( int mode, vdynamic *v ) { + return NULL; +} +#ifdef HL_VCC +# pragma optimize( "", on ) +#endif + +DEFINE_PRIM(_VOID, gc_major, _NO_ARG); +DEFINE_PRIM(_VOID, gc_enable, _BOOL); +DEFINE_PRIM(_VOID, gc_profile, _BOOL); +DEFINE_PRIM(_VOID, gc_stats, _REF(_F64) _REF(_F64) _REF(_F64)); +DEFINE_PRIM(_VOID, gc_dump_memory, _BYTES); +DEFINE_PRIM(_I32, gc_get_live_objects, _TYPE _ARR); +DEFINE_PRIM(_I32, gc_get_flags, _NO_ARG); +DEFINE_PRIM(_VOID, gc_set_flags, _I32); +DEFINE_PRIM(_DYN, debug_call, _I32 _DYN); +DEFINE_PRIM(_VOID, blocking, _BOOL); +DEFINE_PRIM(_VOID, set_thread_flags, _I32 _I32); diff --git a/src/hl.h b/src/hl.h index 30bcdf59c..e6e5f919b 100644 --- a/src/hl.h +++ b/src/hl.h @@ -1,1027 +1,1035 @@ -/* - * Copyright (C)2005-2016 Haxe Foundation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ -#ifndef HL_H -#define HL_H - -/** - Detailed documentation can be found here: - https://github.com/HaxeFoundation/hashlink/wiki/ -**/ - -#define HL_VERSION 0x011000 - -#if defined(_WIN32) -# define HL_WIN -# if !defined(_DURANGO) && !defined(_GAMING_XBOX) -# define HL_WIN_DESKTOP -# endif -#endif - -#if defined(__APPLE__) || defined(__MACH__) || defined(macintosh) -#include -#if TARGET_OS_IOS -#define HL_IOS -#elif TARGET_OS_TV -#define HL_TVOS -#elif TARGET_OS_MAC -#define HL_MAC -#endif -#endif - -#ifdef __ANDROID__ -# define HL_ANDROID -#endif - -#if defined(linux) || defined(__linux__) -# define HL_LINUX -# ifndef _GNU_SOURCE -# define _GNU_SOURCE -# endif -#endif - -#if defined(__EMSCRIPTEN__) -# define HL_EMSCRIPTEN -# ifndef _GNU_SOURCE -# define _GNU_SOURCE -# endif -#endif - -#if defined(HL_IOS) || defined(HL_ANDROID) || defined(HL_TVOS) -# define HL_MOBILE -#endif - -#ifdef __ORBIS__ -# define HL_PS -#endif - -#ifdef __NX__ -# define HL_NX -#endif - -#ifdef _DURANGO -# define HL_XBO -#endif - -#ifdef _GAMING_XBOX -# define HL_XBS -#endif - -#if defined(HL_PS) || defined(HL_NX) || defined(HL_XBO) || defined(HL_XBS) || defined(HL_OS) -# define HL_CONSOLE -#endif - -#if (defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__)) && !defined(HL_CONSOLE) -# define HL_BSD -#endif - -#if defined(_64BITS) || defined(__x86_64__) || defined(_M_X64) || defined(__LP64__) || defined(__wasm64__) -# define HL_64 -#endif - -#if defined(__GNUC__) -# define HL_GCC -#endif - -#if defined(__MINGW32__) -# define HL_MINGW -#endif - -#if defined(__CYGWIN__) -# define HL_CYGWIN -#endif - -#if defined(__llvm__) -# define HL_LLVM -#endif - -#if defined(__clang__) -# define HL_CLANG -#endif - -#if defined(_MSC_VER) && !defined(HL_LLVM) -# define HL_VCC -# pragma warning(disable:4996) // remove deprecated C API usage warnings -# pragma warning(disable:4055) // void* - to - function cast -# pragma warning(disable:4152) // void* - to - function cast -# pragma warning(disable:4201) // anonymous struct -# pragma warning(disable:4127) // while( true ) -# pragma warning(disable:4710) // inline disabled -# pragma warning(disable:4711) // inline activated -# pragma warning(disable:4255) // windows include -# pragma warning(disable:4820) // windows include -# pragma warning(disable:4668) // windows include -# pragma warning(disable:4738) // return float bad performances -# pragma warning(disable:4061) // explicit values in switch -# if (_MSC_VER >= 1920) -# pragma warning(disable:5045) // spectre -# endif -#endif - -#if defined(HL_VCC) || defined(HL_MINGW) || defined(HL_CYGWIN) -# define HL_WIN_CALL -#endif - -#ifdef _DEBUG -# define HL_DEBUG -#endif - -#ifndef HL_CONSOLE -# define HL_TRACK_ENABLE -#endif - -#ifndef HL_NO_THREADS -# define HL_THREADS -# ifdef HL_VCC -# define HL_THREAD_VAR __declspec( thread ) -# define HL_THREAD_STATIC_VAR HL_THREAD_VAR static -# else -# define HL_THREAD_VAR __thread -# define HL_THREAD_STATIC_VAR static HL_THREAD_VAR -# endif -#else -# define HL_THREAD_VAR -# define HL_THREAD_STATIC_VAR static -#endif - -#include -#ifndef HL_VCC -# include -#endif - -#if defined(HL_VCC) || defined(HL_MINGW) -# define EXPORT __declspec( dllexport ) -# define IMPORT __declspec( dllimport ) -#else -#if defined(HL_GCC) || defined(HL_CLANG) -# define EXPORT __attribute__((visibility("default"))) -#else -# define EXPORT -#endif -# define IMPORT extern -#endif - -#ifdef HL_64 -# define HL_WSIZE 8 -# define IS_64 1 -# if defined(HL_VCC) || defined(HL_MINGW) -# define _PTR_FMT L"%IX" -# else -# define _PTR_FMT u"%lX" -# endif -#else -# define HL_WSIZE 4 -# define IS_64 0 -# if defined(HL_VCC) || defined(HL_MINGW) -# define _PTR_FMT L"%IX" -# else -# define _PTR_FMT u"%X" -# endif -#endif - -#ifdef __cplusplus -# define C_FUNCTION_BEGIN extern "C" { -# define C_FUNCTION_END }; -#else -# define C_FUNCTION_BEGIN -# define C_FUNCTION_END -#endif - -typedef intptr_t int_val; -typedef long long int64; -typedef unsigned long long uint64; - -#include -#include -#include -#include - -#if defined(LIBHL_EXPORTS) -#define HL_API extern EXPORT -#elif defined(LIBHL_STATIC) -#define HL_API extern -#else -#define HL_API IMPORT -#endif - -#if defined(HL_VCC) -#define HL_INLINE __inline -#else -#define HL_INLINE inline -#endif - -// -------------- UNICODE ----------------------------------- - -#if defined(HL_WIN) && !defined(HL_LLVM) -# include -typedef wchar_t uchar; -# define USTR(str) L##str -# define HL_NATIVE_UCHAR_FUN -# define usprintf swprintf -# define uprintf wprintf -# define ustrlen wcslen -# define ustrdup _wcsdup -HL_API int uvszprintf( uchar *out, int out_size, const uchar *fmt, va_list arglist ); -# define utod(s,end) wcstod(s,end) -# define utoi(s,end) wcstol(s,end,10) -# define ucmp(a,b) wcscmp(a,b) -# define utostr(out,size,str) wcstombs(out,str,size) -#else -# include -#if defined(HL_IOS) || defined(HL_TVOS) || defined(HL_MAC) -#include -#include -#if !defined(__cplusplus) || (__cplusplus < 201103L && !defined(_LIBCPP_VERSION)) -typedef uint16_t char16_t; -typedef uint32_t char32_t; -#endif -#else -# include -#endif -typedef char16_t uchar; -# undef USTR -# define USTR(str) u##str -#endif - -C_FUNCTION_BEGIN -#ifndef HL_NATIVE_UCHAR_FUN -HL_API double utod( const uchar *str, uchar **end ); -HL_API int utoi( const uchar *str, uchar **end ); -HL_API int ustrlen( const uchar *str ); -HL_API uchar *ustrdup( const uchar *str ); -HL_API int ucmp( const uchar *a, const uchar *b ); -HL_API int utostr( char *out, int out_size, const uchar *str ); -HL_API int usprintf( uchar *out, int out_size, const uchar *fmt, ... ); -HL_API int uvszprintf( uchar *out, int out_size, const uchar *fmt, va_list arglist ); -HL_API void uprintf( const uchar *fmt, const uchar *str ); -#endif -C_FUNCTION_END - -#if defined(HL_VCC) -# define hl_debug_break() if( hl_detect_debugger() ) __debugbreak() -#elif defined(HL_PS) && defined(_DEBUG) -# define hl_debug_break() __debugbreak() -#elif defined(HL_NX) -C_FUNCTION_BEGIN -HL_API void hl_debug_break( void ); -C_FUNCTION_END -#elif !defined(HL_CONSOLE) - -// use __builtin_debugtrap when available -// fall back to breakpoint instructions for certain architectures -// else raise SIGTRAP -# ifdef __has_builtin -# if __has_builtin(__builtin_debugtrap) -# define USE_BUILTIN_DEBUG_TRAP 1 -# endif -# endif - -# ifdef USE_BUILTIN_DEBUG_TRAP -# define hl_debug_break() \ - if( hl_detect_debugger() ) \ - __builtin_debugtrap() -# elif defined(__x86_64__) || defined(__i386__) -# define hl_debug_break() \ - if( hl_detect_debugger() ) \ - __asm__("int3;") -# elif defined(__aarch64__) -# define hl_debug_break() \ - if( hl_detect_debugger() ) \ - __asm__("brk #0xf000;") -# elif defined(__riscv) -# define hl_debug_break() \ - if( hl_detect_debugger() ) \ - __asm__("ebreak;") -# else -# include -# define hl_debug_break() \ - if( hl_detect_debugger() ) \ - raise(SIGTRAP) -# endif -#undef USE_BUILTIN_DEBUG_TRAP -#else -# define hl_debug_break() -#endif - -#ifdef HL_VCC -# define HL_NO_RETURN(f) __declspec(noreturn) f -# define HL_UNREACHABLE -#else -# define HL_NO_RETURN(f) f __attribute__((noreturn)) -# define HL_UNREACHABLE __builtin_unreachable() -#endif - -// ---- TYPES ------------------------------------------- - -typedef enum { - HVOID = 0, - HUI8 = 1, - HUI16 = 2, - HI32 = 3, - HI64 = 4, - HF32 = 5, - HF64 = 6, - HBOOL = 7, - HBYTES = 8, - HDYN = 9, - HFUN = 10, - HOBJ = 11, - HARRAY = 12, - HTYPE = 13, - HREF = 14, - HVIRTUAL= 15, - HDYNOBJ = 16, - HABSTRACT=17, - HENUM = 18, - HNULL = 19, - HMETHOD = 20, - HSTRUCT = 21, - HPACKED = 22, - HGUID = 23, - // --------- - HLAST = 24, - _H_FORCE_INT = 0x7FFFFFFF -} hl_type_kind; - -typedef struct hl_type hl_type; -typedef struct hl_runtime_obj hl_runtime_obj; -typedef struct hl_alloc_block hl_alloc_block; -typedef struct { hl_alloc_block *cur; } hl_alloc; -typedef struct _hl_field_lookup hl_field_lookup; - -typedef struct { - hl_alloc alloc; - void **functions_ptrs; - hl_type **functions_types; -} hl_module_context; - -typedef struct { - hl_type **args; - hl_type *ret; - int nargs; - // storage for closure - hl_type *parent; - struct { - hl_type_kind kind; - void *p; - } closure_type; - struct { - hl_type **args; - hl_type *ret; - int nargs; - hl_type *parent; - } closure; -} hl_type_fun; - -typedef struct { - const uchar *name; - hl_type *t; - int hashed_name; -} hl_obj_field; - -typedef struct { - const uchar *name; - int findex; - int pindex; - int hashed_name; -} hl_obj_proto; - -typedef struct { - int nfields; - int nproto; - int nbindings; - const uchar *name; - hl_type *super; - hl_obj_field *fields; - hl_obj_proto *proto; - int *bindings; - void **global_value; - hl_module_context *m; - hl_runtime_obj *rt; -} hl_type_obj; - -typedef struct { - hl_obj_field *fields; - int nfields; - // runtime - int dataSize; - int *indexes; - hl_field_lookup *lookup; -} hl_type_virtual; - -typedef struct { - const uchar *name; - int nparams; - hl_type **params; - int size; - bool hasptr; - int *offsets; -} hl_enum_construct; - -typedef struct { - const uchar *name; - int nconstructs; - hl_enum_construct *constructs; - void **global_value; -} hl_type_enum; - -struct hl_type { - hl_type_kind kind; - union { - const uchar *abs_name; - hl_type_fun *fun; - hl_type_obj *obj; - hl_type_enum *tenum; - hl_type_virtual *virt; - hl_type *tparam; - }; - void **vobj_proto; - unsigned int *mark_bits; -}; - -C_FUNCTION_BEGIN - -HL_API int hl_type_size( hl_type *t ); -#define hl_pad_size(size,t) ((t)->kind == HVOID ? 0 : ((-(size)) & (hl_type_size(t) - 1))) -HL_API int hl_pad_struct( int size, hl_type *t ); - -HL_API hl_runtime_obj *hl_get_obj_rt( hl_type *ot ); -HL_API hl_runtime_obj *hl_get_obj_proto( hl_type *ot ); -HL_API void hl_flush_proto( hl_type *ot ); -HL_API void hl_init_enum( hl_type *et, hl_module_context *m ); - -/* -------------------- VALUES ------------------------------ */ - -typedef unsigned char vbyte; - -typedef struct { - hl_type *t; -# ifndef HL_64 - int __pad; // force align on 16 bytes for double -# endif - union { - bool b; - unsigned char ui8; - unsigned short ui16; - int i; - float f; - double d; - vbyte *bytes; - void *ptr; - int64 i64; - } v; -} vdynamic; - -typedef struct { - hl_type *t; - /* fields data */ -} vobj; - -typedef struct _vvirtual vvirtual; -struct _vvirtual { - hl_type *t; - vdynamic *value; - vvirtual *next; -}; - -#define hl_vfields(v) ((void**)(((vvirtual*)(v))+1)) - -typedef struct { - hl_type *t; - hl_type *at; - int size; - int __pad; // force align on 16 bytes for double -} varray; - -typedef struct _vclosure { - hl_type *t; - void *fun; - int hasValue; -# ifdef HL_64 - int stackCount; -# endif - void *value; -} vclosure; - -typedef struct { - vclosure cl; - vclosure *wrappedFun; -} vclosure_wrapper; - -struct _hl_field_lookup { - hl_type *t; - int hashed_name; - int field_index; // negative or zero : index in methods -}; - -typedef struct { - void *ptr; - hl_type *closure; - int fid; -} hl_runtime_binding; - -struct hl_runtime_obj { - hl_type *t; - // absolute - int nfields; - int nproto; - int size; - int nmethods; - int nbindings; - unsigned char pad_size; - unsigned char largest_field; - bool hasPtr; - void **methods; - int *fields_indexes; - hl_runtime_binding *bindings; - hl_runtime_obj *parent; - const uchar *(*toStringFun)( vdynamic *obj ); - int (*compareFun)( vdynamic *a, vdynamic *b ); - vdynamic *(*castFun)( vdynamic *obj, hl_type *t ); - vdynamic *(*getFieldFun)( vdynamic *obj, int hfield ); - // relative - int nlookup; - int ninterfaces; - hl_field_lookup *lookup; - int *interfaces; -}; - -typedef struct { - hl_type *t; - hl_field_lookup *lookup; - char *raw_data; - void **values; - int nfields; - int raw_size; - int nvalues; - vvirtual *virtuals; -} vdynobj; - -#define HL_DYNOBJ_INDEX_SHIFT 17 -#define HL_DYNOBJ_INDEX_MASK ((1 << HL_DYNOBJ_INDEX_SHIFT) - 1) - -typedef struct _venum { - hl_type *t; - int index; -} venum; - -HL_API hl_type hlt_void; -HL_API hl_type hlt_i32; -HL_API hl_type hlt_i64; -HL_API hl_type hlt_f64; -HL_API hl_type hlt_f32; -HL_API hl_type hlt_dyn; -HL_API hl_type hlt_array; -HL_API hl_type hlt_bytes; -HL_API hl_type hlt_dynobj; -HL_API hl_type hlt_bool; -HL_API hl_type hlt_abstract; - - - -#if defined(HL_WIN) -typedef uchar pchar; -#define pstrchr wcschr -#define pstrlen ustrlen -#else -typedef char pchar; -#define pstrchr strchr -#define pstrlen strlen -#define HL_UTF8PATH -#endif - -#include - -typedef struct { - pchar* file_path; - pchar** sys_args; - int sys_nargs; - void (*throw_jump)(jmp_buf, int); - uchar* (*resolve_symbol)(void* addr, uchar* out, int* outSize); - int (*capture_stack)(void** stack, int size); - bool (*reload_check)(vbyte* alt_file); - void* (*static_call)(void* fun, hl_type* t, void** args, vdynamic* out); - void* (*get_wrapper)(hl_type* t); - void (*profile_event)(int code, vbyte *data, int len); - void (*before_exit)(); - void (*vtune_init)(); - bool (*load_plugin)( pchar *file ); - vdynamic* (*resolve_type)( hl_type *t, hl_type *gt ); - bool static_call_ref; - int closure_stack_capture; - bool is_debugger_enabled; - bool is_debugger_attached; -} hl_setup_t; - -HL_API hl_setup_t hl_setup; -HL_API void hl_sys_init(); - -HL_API double hl_nan( void ); -HL_API bool hl_is_dynamic( hl_type *t ); -HL_API bool hl_is_ptr( hl_type *t ); -HL_API bool hl_same_type( hl_type *a, hl_type *b ); -HL_API bool hl_safe_cast( hl_type *t, hl_type *to ); - -#define hl_aptr(a,t) ((t*)(((varray*)(a))+1)) - -HL_API varray *hl_alloc_array( hl_type *t, int size ); -HL_API vdynamic *hl_alloc_dynamic( hl_type *t ); -HL_API vdynamic *hl_alloc_dynbool( bool b ); -HL_API vdynamic *hl_alloc_obj( hl_type *t ); -HL_API venum *hl_alloc_enum( hl_type *t, int index ); -HL_API vvirtual *hl_alloc_virtual( hl_type *t ); -HL_API vdynobj *hl_alloc_dynobj( void ); -HL_API vbyte *hl_alloc_bytes( int size ); -HL_API vbyte *hl_copy_bytes( const vbyte *byte, int size ); -HL_API int hl_utf8_length( const vbyte *s, int pos ); -HL_API int hl_from_utf8( uchar *out, int outLen, const char *str ); -HL_API char *hl_to_utf8( const uchar *bytes ); -HL_API uchar *hl_to_utf16( const char *str ); -HL_API uchar *hl_guid_str( int64 guid, uchar buf[14] ); -HL_API vdynamic *hl_virtual_make_value( vvirtual *v ); -HL_API hl_obj_field *hl_obj_field_fetch( hl_type *t, int fid ); - -HL_API int hl_hash( vbyte *name ); -HL_API int hl_hash_utf8( const char *str ); // no cache -HL_API int hl_hash_gen( const uchar *name, bool cache_name ); -HL_API vbyte *hl_field_name( int hash ); - -#define hl_error(msg, ...) hl_throw(hl_alloc_strbytes(USTR(msg), ## __VA_ARGS__)) - -HL_API vdynamic *hl_alloc_strbytes( const uchar *msg, ... ); -HL_API void hl_assert( void ); -HL_API HL_NO_RETURN( void hl_throw( vdynamic *v ) ); -HL_API HL_NO_RETURN( void hl_rethrow( vdynamic *v ) ); -HL_API HL_NO_RETURN( void hl_null_access( void ) ); -HL_API void hl_dump_stack( void ); -HL_API void hl_print_uncaught_exception( vdynamic *exc ); -HL_API varray *hl_exception_stack( void ); -HL_API bool hl_detect_debugger( void ); - -HL_API vvirtual *hl_to_virtual( hl_type *vt, vdynamic *obj ); -HL_API void hl_init_virtual( hl_type *vt, hl_module_context *ctx ); -HL_API hl_field_lookup *hl_lookup_find( hl_field_lookup *l, int size, int hash ); -HL_API hl_field_lookup *hl_lookup_insert( hl_field_lookup *l, int size, int hash, hl_type *t, int index ); - -HL_API int hl_dyn_geti( vdynamic *d, int hfield, hl_type *t ); -HL_API int64 hl_dyn_geti64( vdynamic *d, int hfield ); -HL_API void *hl_dyn_getp( vdynamic *d, int hfield, hl_type *t ); -HL_API float hl_dyn_getf( vdynamic *d, int hfield ); -HL_API double hl_dyn_getd( vdynamic *d, int hfield ); - -HL_API int hl_dyn_casti( void *data, hl_type *t, hl_type *to ); -HL_API int64 hl_dyn_casti64( void *data, hl_type *t ); -HL_API void *hl_dyn_castp( void *data, hl_type *t, hl_type *to ); -HL_API float hl_dyn_castf( void *data, hl_type *t ); -HL_API double hl_dyn_castd( void *data, hl_type *t ); - -#define hl_invalid_comparison 0xAABBCCDD -HL_API int hl_dyn_compare( vdynamic *a, vdynamic *b ); -HL_API vdynamic *hl_make_dyn( void *data, hl_type *t ); -HL_API void hl_write_dyn( void *data, hl_type *t, vdynamic *v, bool is_tmp ); - -HL_API void hl_dyn_seti( vdynamic *d, int hfield, hl_type *t, int value ); -HL_API void hl_dyn_seti64( vdynamic *d, int hfield, int64 value ); -HL_API void hl_dyn_setp( vdynamic *d, int hfield, hl_type *t, void *ptr ); -HL_API void hl_dyn_setf( vdynamic *d, int hfield, float f ); -HL_API void hl_dyn_setd( vdynamic *d, int hfield, double v ); - -typedef enum { - OpAdd, - OpSub, - OpMul, - OpMod, - OpDiv, - OpShl, - OpShr, - OpUShr, - OpAnd, - OpOr, - OpXor, - OpLast -} DynOp; -HL_API vdynamic *hl_dyn_op( int op, vdynamic *a, vdynamic *b ); - -HL_API vclosure *hl_alloc_closure_void( hl_type *t, void *fvalue ); -HL_API vclosure *hl_alloc_closure_ptr( hl_type *fullt, void *fvalue, void *ptr ); -HL_API vclosure *hl_make_fun_wrapper( vclosure *c, hl_type *to ); -HL_API void *hl_wrapper_call( void *value, void **args, vdynamic *ret ); -HL_API void *hl_dyn_call_obj( vdynamic *obj, hl_type *ft, int hfield, void **args, vdynamic *ret ); -HL_API vdynamic *hl_dyn_call( vclosure *c, vdynamic **args, int nargs ); -HL_API vdynamic *hl_dyn_call_safe( vclosure *c, vdynamic **args, int nargs, bool *isException ); - -/* - These macros should be only used when the closure `cl` has been type checked beforehand - so you are sure it's of the used typed. Otherwise use hl_dyn_call -*/ -#define hl_call0(ret,cl) \ - (cl->hasValue ? ((ret(*)(vdynamic*))cl->fun)((vdynamic*)cl->value) : ((ret(*)())cl->fun)()) -#define hl_call1(ret,cl,t,v) \ - (cl->hasValue ? ((ret(*)(vdynamic*,t))cl->fun)((vdynamic*)cl->value,v) : ((ret(*)(t))cl->fun)(v)) -#define hl_call2(ret,cl,t1,v1,t2,v2) \ - (cl->hasValue ? ((ret(*)(vdynamic*,t1,t2))cl->fun)((vdynamic*)cl->value,v1,v2) : ((ret(*)(t1,t2))cl->fun)(v1,v2)) -#define hl_call3(ret,cl,t1,v1,t2,v2,t3,v3) \ - (cl->hasValue ? ((ret(*)(vdynamic*,t1,t2,t3))cl->fun)((vdynamic*)cl->value,v1,v2,v3) : ((ret(*)(t1,t2,t3))cl->fun)(v1,v2,v3)) -#define hl_call4(ret,cl,t1,v1,t2,v2,t3,v3,t4,v4) \ - (cl->hasValue ? ((ret(*)(vdynamic*,t1,t2,t3,t4))cl->fun)((vdynamic*)cl->value,v1,v2,v3,v4) : ((ret(*)(t1,t2,t3,t4))cl->fun)(v1,v2,v3,v4)) - -// ----------------------- THREADS -------------------------------------------------- - -struct _hl_thread; -struct _hl_mutex; -struct _hl_semaphore; -struct _hl_condition; -struct _hl_tls; -typedef struct _hl_thread hl_thread; -typedef struct _hl_mutex hl_mutex; -typedef struct _hl_semaphore hl_semaphore; -typedef struct _hl_condition hl_condition; -typedef struct _hl_tls hl_tls; - -HL_API hl_thread *hl_thread_start( void *callback, void *param, bool withGC ); -HL_API hl_thread *hl_thread_current( void ); -HL_API void hl_thread_yield(void); -HL_API void hl_register_thread( void *stack_top ); -HL_API void hl_unregister_thread( void ); - -HL_API hl_mutex *hl_mutex_alloc( bool gc_thread ); -HL_API void hl_mutex_acquire( hl_mutex *l ); -HL_API bool hl_mutex_try_acquire( hl_mutex *l ); -HL_API void hl_mutex_release( hl_mutex *l ); -HL_API void hl_mutex_free( hl_mutex *l ); - -HL_API hl_semaphore *hl_semaphore_alloc(int value); -HL_API void hl_semaphore_acquire(hl_semaphore *sem); -HL_API bool hl_semaphore_try_acquire(hl_semaphore *sem, vdynamic *timeout); -HL_API void hl_semaphore_release(hl_semaphore *sem); -HL_API void hl_semaphore_free(hl_semaphore *sem); - -HL_API hl_condition *hl_condition_alloc(); -HL_API void hl_condition_acquire(hl_condition *cond); -HL_API bool hl_condition_try_acquire(hl_condition *cond); -HL_API void hl_condition_release(hl_condition *cond); -HL_API void hl_condition_wait(hl_condition *cond); -HL_API bool hl_condition_timed_wait(hl_condition *cond, double timeout); -HL_API void hl_condition_signal(hl_condition *cond); -HL_API void hl_condition_broadcast(hl_condition *cond); -HL_API void hl_condition_free(hl_condition *cond); - -HL_API hl_tls *hl_tls_alloc( bool gc_value ); -HL_API void hl_tls_set( hl_tls *l, void *value ); -HL_API void *hl_tls_get( hl_tls *l ); -HL_API void hl_tls_free( hl_tls *l ); - -// ----------------------- ALLOC -------------------------------------------------- - -#define MEM_HAS_PTR(kind) (!((kind)&2)) -#define MEM_KIND_DYNAMIC 0 -#define MEM_KIND_RAW 1 -#define MEM_KIND_NOPTR 2 -#define MEM_KIND_FINALIZER 3 -#define MEM_ALIGN_DOUBLE 128 -#define MEM_ZERO 256 - -HL_API void *hl_gc_alloc_gen( hl_type *t, int size, int flags ); -HL_API void hl_add_root( void *ptr ); -HL_API void hl_remove_root( void *ptr ); -HL_API void hl_gc_major( void ); -HL_API bool hl_is_gc_ptr( void *ptr ); -HL_API int hl_gc_get_memsize( void *ptr ); - -HL_API void hl_blocking( bool b ); -HL_API bool hl_is_blocking( void ); - -typedef void (*hl_types_dump)( void (*)( void *, int) ); -HL_API void hl_gc_set_dump_types( hl_types_dump tdump ); - -#define hl_gc_alloc_noptr(size) hl_gc_alloc_gen(&hlt_bytes,size,MEM_KIND_NOPTR) -#define hl_gc_alloc(t,size) hl_gc_alloc_gen(t,size,MEM_KIND_DYNAMIC) -#define hl_gc_alloc_raw(size) hl_gc_alloc_gen(&hlt_abstract,size,MEM_KIND_RAW) -#define hl_gc_alloc_finalizer(size) hl_gc_alloc_gen(&hlt_abstract,size,MEM_KIND_FINALIZER) - -HL_API void hl_alloc_init( hl_alloc *a ); -HL_API void *hl_malloc( hl_alloc *a, int size ); -HL_API void *hl_zalloc( hl_alloc *a, int size ); -HL_API void hl_free( hl_alloc *a ); - -HL_API void hl_global_init( void ); -HL_API void hl_global_free( void ); -HL_API void hl_global_lock( bool lock ); - -HL_API void *hl_alloc_executable_memory( int size ); -HL_API void hl_free_executable_memory( void *ptr, int size ); - -// ----------------------- BUFFER -------------------------------------------------- - -typedef struct hl_buffer hl_buffer; - -HL_API hl_buffer *hl_alloc_buffer( void ); -HL_API void hl_buffer_val( hl_buffer *b, vdynamic *v ); -HL_API void hl_buffer_char( hl_buffer *b, uchar c ); -HL_API void hl_buffer_str( hl_buffer *b, const uchar *str ); -HL_API void hl_buffer_cstr( hl_buffer *b, const char *str ); -HL_API void hl_buffer_str_sub( hl_buffer *b, const uchar *str, int len ); -HL_API int hl_buffer_length( hl_buffer *b ); -HL_API uchar *hl_buffer_content( hl_buffer *b, int *len ); -HL_API uchar *hl_to_string( vdynamic *v ); -HL_API const uchar *hl_type_str( hl_type *t ); -HL_API void hl_throw_buffer( hl_buffer *b ); - -// ----------------------- FFI ------------------------------------------------------ - -// match GNU C++ mangling -#define TYPE_STR "vcsilfdbBDPOATR??X?N?S?g" - -#undef _VOID -#define _NO_ARG -#define _VOID "v" -#define _I8 "c" -#define _I16 "s" -#define _I32 "i" -#define _I64 "l" -#define _F32 "f" -#define _F64 "d" -#define _BOOL "b" -#define _BYTES "B" -#define _DYN "D" -#define _FUN(t, args) "P" args "_" t -#define _OBJ(fields) "O" fields "_" -#define _ARR "A" -#define _TYPE "T" -#define _REF(t) "R" t -#define _ABSTRACT(name) "X" #name "_" -#undef _NULL -#define _NULL(t) "N" t -#define _STRUCT "S" -#define _GUID "g" - -#undef _STRING -#define _STRING _OBJ(_BYTES _I32) - -typedef struct { - hl_type *t; - uchar *bytes; - int length; -} vstring; - -#define DEFINE_PRIM(t,name,args) DEFINE_PRIM_WITH_NAME(t,name,args,name) -#define _DEFINE_PRIM_WITH_NAME(t,name,args,realName) C_FUNCTION_BEGIN EXPORT void *hlp_##realName( const char **sign ) { *sign = _FUN(t,args); return (void*)(&HL_NAME(name)); } C_FUNCTION_END - -#if !defined(HL_NAME) -# define HL_NAME(p) p -# ifdef LIBHL_EXPORTS -# define HL_PRIM EXPORT -# undef DEFINE_PRIM -# define DEFINE_PRIM(t,name,args) _DEFINE_PRIM_WITH_NAME(t,hl_##name,args,name) -# define DEFINE_PRIM_WITH_NAME _DEFINE_PRIM_WITH_NAME -# else -# define HL_PRIM -# define DEFINE_PRIM_WITH_NAME(t,name,args,realName) -# endif -#elif defined(LIBHL_STATIC) -# ifdef __cplusplus -# define HL_PRIM extern "C" -# else -# define HL_PRIM -# endif -#define DEFINE_PRIM_WITH_NAME(t,name,args,realName) -#else -# ifdef __cplusplus -# define HL_PRIM extern "C" EXPORT -# else -# define HL_PRIM EXPORT -# endif -# define DEFINE_PRIM_WITH_NAME _DEFINE_PRIM_WITH_NAME -#endif - -#if defined(HL_GCC) && !defined(HL_CONSOLE) -# ifdef HL_CLANG -# define HL_NO_OPT __attribute__ ((optnone)) -# else -# define HL_NO_OPT __attribute__((optimize("-O0"))) -# endif -#else -# define HL_NO_OPT -#endif - -// -------------- EXTRA ------------------------------------ - -#define hl_fatal(msg) hl_fatal_error(msg,__FILE__,__LINE__) -#define hl_fatal1(msg,p0) hl_fatal_fmt(__FILE__,__LINE__,msg,p0) -#define hl_fatal2(msg,p0,p1) hl_fatal_fmt(__FILE__,__LINE__,msg,p0,p1) -#define hl_fatal3(msg,p0,p1,p2) hl_fatal_fmt(__FILE__,__LINE__,msg,p0,p1,p2) -#define hl_fatal4(msg,p0,p1,p2,p3) hl_fatal_fmt(__FILE__,__LINE__,msg,p0,p1,p2,p3) -HL_API void *hl_fatal_error( const char *msg, const char *file, int line ); -HL_API void hl_fatal_fmt( const char *file, int line, const char *fmt, ...); - -typedef struct _hl_trap_ctx hl_trap_ctx; -struct _hl_trap_ctx { - jmp_buf buf; - hl_trap_ctx *prev; - vdynamic *tcheck; -}; -#define hl_trap(ctx,r,label) { hl_thread_info *__tinf = hl_get_thread(); ctx.tcheck = NULL; ctx.prev = __tinf->trap_current; __tinf->trap_current = &ctx; if( setjmp(ctx.buf) ) { r = __tinf->exc_value; goto label; } } -#define hl_endtrap(ctx) hl_get_thread()->trap_current = ctx.prev - -#define HL_EXC_MAX_STACK 0x100 -#define HL_EXC_RETHROW 1 -#define HL_EXC_CATCH_ALL 2 -#define HL_EXC_IS_THROW 4 -#define HL_THREAD_INVISIBLE 16 -#define HL_THREAD_PROFILER_PAUSED 32 -#define HL_EXC_KILL 64 -#define HL_TREAD_TRACK_SHIFT 16 - -#define HL_TRACK_ALLOC 1 -#define HL_TRACK_CAST 2 -#define HL_TRACK_DYNFIELD 4 -#define HL_TRACK_DYNCALL 8 -#define HL_TRACK_MASK (HL_TRACK_ALLOC | HL_TRACK_CAST | HL_TRACK_DYNFIELD | HL_TRACK_DYNCALL) - -#define HL_MAX_EXTRA_STACK 64 - -#ifdef HL_MAC -#include -#include -#endif - -typedef struct { - int thread_id; - // gc vars - volatile int gc_blocking; - void *stack_top; - void *stack_cur; - // exception handling - hl_trap_ctx *trap_current; - hl_trap_ctx *trap_uncaught; - vclosure *exc_handler; - vdynamic *exc_value; - int flags; - int exc_stack_count; - // extra - char thread_name[128]; - jmp_buf gc_regs; - void *exc_stack_trace[HL_EXC_MAX_STACK]; - void *extra_stack_data[HL_MAX_EXTRA_STACK]; - int extra_stack_size; - #ifdef HL_MAC - thread_t mach_thread_id; - pthread_t pthread_id; - #endif -} hl_thread_info; - -typedef struct { - int count; - bool stopping_world; - hl_thread_info **threads; - hl_mutex *global_lock; - hl_mutex *exclusive_lock; - void *guid_map; -} hl_threads_info; - -HL_API hl_thread_info *hl_get_thread(); -HL_API hl_threads_info *hl_gc_threads_info(); - -#ifdef HL_TRACK_ENABLE - -typedef struct { - int flags; - void (*on_alloc)(hl_type *,int,int,void*); - void (*on_cast)(hl_type *, hl_type*); - void (*on_dynfield)( vdynamic *, int ); - void (*on_dyncall)( vdynamic *, int ); -} hl_track_info; - -#define hl_is_tracking(flag) ((hl_track.flags&(flag)) && (hl_get_thread()->flags & (flag< +#if TARGET_OS_IOS +#define HL_IOS +#elif TARGET_OS_TV +#define HL_TVOS +#elif TARGET_OS_MAC +#define HL_MAC +#endif +#endif + +#ifdef __ANDROID__ +# define HL_ANDROID +#endif + +#if defined(linux) || defined(__linux__) +# define HL_LINUX +# ifndef _GNU_SOURCE +# define _GNU_SOURCE +# endif +#endif + +#if defined(__EMSCRIPTEN__) +# define HL_EMSCRIPTEN +# ifndef _GNU_SOURCE +# define _GNU_SOURCE +# endif +#endif + +#if defined(HL_IOS) || defined(HL_ANDROID) || defined(HL_TVOS) +# define HL_MOBILE +#endif + +#ifdef __ORBIS__ +# define HL_PS +#endif + +#ifdef __NX__ +# define HL_NX +#endif + +#ifdef _DURANGO +# define HL_XBO +#endif + +#ifdef _GAMING_XBOX +# define HL_XBS +#endif + +#if defined(HL_PS) || defined(HL_NX) || defined(HL_XBO) || defined(HL_XBS) || defined(HL_OS) +# define HL_CONSOLE +#endif + +#if (defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__)) && !defined(HL_CONSOLE) +# define HL_BSD +#endif + +#if defined(_64BITS) || defined(__x86_64__) || defined(_M_X64) || defined(__LP64__) || defined(__wasm64__) || defined(__aarch64__) +# define HL_64 +#endif + +#if defined(__aarch64__) || defined(_M_ARM64) +# define HL_ARM64 +#endif + +#if defined(__GNUC__) +# define HL_GCC +#endif + +#if defined(__MINGW32__) +# define HL_MINGW +#endif + +#if defined(__CYGWIN__) +# define HL_CYGWIN +#endif + +#if defined(__llvm__) +# define HL_LLVM +#endif + +#if defined(__clang__) +# define HL_CLANG +#endif + +#if defined(_MSC_VER) && !defined(HL_LLVM) +# define HL_VCC +# pragma warning(disable:4996) // remove deprecated C API usage warnings +# pragma warning(disable:4055) // void* - to - function cast +# pragma warning(disable:4152) // void* - to - function cast +# pragma warning(disable:4201) // anonymous struct +# pragma warning(disable:4127) // while( true ) +# pragma warning(disable:4710) // inline disabled +# pragma warning(disable:4711) // inline activated +# pragma warning(disable:4255) // windows include +# pragma warning(disable:4820) // windows include +# pragma warning(disable:4668) // windows include +# pragma warning(disable:4738) // return float bad performances +# pragma warning(disable:4061) // explicit values in switch +# if (_MSC_VER >= 1920) +# pragma warning(disable:5045) // spectre +# endif +#endif + +#if defined(HL_VCC) || defined(HL_MINGW) || defined(HL_CYGWIN) +# define HL_WIN_CALL +#endif + +#ifdef _DEBUG +# define HL_DEBUG +#endif + +#ifndef HL_CONSOLE +# define HL_TRACK_ENABLE +#endif + +#ifndef HL_NO_THREADS +# define HL_THREADS +# ifdef HL_VCC +# define HL_THREAD_VAR __declspec( thread ) +# define HL_THREAD_STATIC_VAR HL_THREAD_VAR static +# else +# define HL_THREAD_VAR __thread +# define HL_THREAD_STATIC_VAR static HL_THREAD_VAR +# endif +#else +# define HL_THREAD_VAR +# define HL_THREAD_STATIC_VAR static +#endif + +#include +#ifndef HL_VCC +# include +#endif + +#if defined(HL_VCC) || defined(HL_MINGW) +# define EXPORT __declspec( dllexport ) +# define IMPORT __declspec( dllimport ) +#else +#if defined(HL_GCC) || defined(HL_CLANG) +# define EXPORT __attribute__((visibility("default"))) +#else +# define EXPORT +#endif +# define IMPORT extern +#endif + +#ifdef HL_64 +# define HL_WSIZE 8 +# define IS_64 1 +# if defined(HL_VCC) || defined(HL_MINGW) +# define _PTR_FMT L"%IX" +# else +# define _PTR_FMT u"%lX" +# endif +#else +# define HL_WSIZE 4 +# define IS_64 0 +# if defined(HL_VCC) || defined(HL_MINGW) +# define _PTR_FMT L"%IX" +# else +# define _PTR_FMT u"%X" +# endif +#endif + +#ifdef __cplusplus +# define C_FUNCTION_BEGIN extern "C" { +# define C_FUNCTION_END }; +#else +# define C_FUNCTION_BEGIN +# define C_FUNCTION_END +#endif + +typedef intptr_t int_val; +typedef long long int64; +typedef unsigned long long uint64; + +#include +#include +#include +#include + +#if defined(LIBHL_EXPORTS) +#define HL_API extern EXPORT +#elif defined(LIBHL_STATIC) +#define HL_API extern +#else +#define HL_API IMPORT +#endif + +#if defined(HL_VCC) +#define HL_INLINE __inline +#else +#define HL_INLINE inline +#endif + +// -------------- UNICODE ----------------------------------- + +#if defined(HL_WIN) && !defined(HL_LLVM) +# include +typedef wchar_t uchar; +# define USTR(str) L##str +# define HL_NATIVE_UCHAR_FUN +# define usprintf swprintf +# define uprintf wprintf +# define ustrlen wcslen +# define ustrdup _wcsdup +HL_API int uvszprintf( uchar *out, int out_size, const uchar *fmt, va_list arglist ); +# define utod(s,end) wcstod(s,end) +# define utoi(s,end) wcstol(s,end,10) +# define ucmp(a,b) wcscmp(a,b) +# define utostr(out,size,str) wcstombs(out,str,size) +#else +# include +#if defined(HL_IOS) || defined(HL_TVOS) || defined(HL_MAC) +#include +#include +#if !defined(__cplusplus) || (__cplusplus < 201103L && !defined(_LIBCPP_VERSION)) +typedef uint16_t char16_t; +typedef uint32_t char32_t; +#endif +#else +# include +#endif +typedef char16_t uchar; +# undef USTR +# define USTR(str) u##str +#endif + +C_FUNCTION_BEGIN +#ifndef HL_NATIVE_UCHAR_FUN +HL_API double utod( const uchar *str, uchar **end ); +HL_API int utoi( const uchar *str, uchar **end ); +HL_API int ustrlen( const uchar *str ); +HL_API uchar *ustrdup( const uchar *str ); +HL_API int ucmp( const uchar *a, const uchar *b ); +HL_API int utostr( char *out, int out_size, const uchar *str ); +HL_API int usprintf( uchar *out, int out_size, const uchar *fmt, ... ); +HL_API int uvszprintf( uchar *out, int out_size, const uchar *fmt, va_list arglist ); +HL_API void uprintf( const uchar *fmt, const uchar *str ); +#endif +C_FUNCTION_END + +#if defined(HL_VCC) +# define hl_debug_break() if( hl_detect_debugger() ) __debugbreak() +#elif defined(HL_PS) && defined(_DEBUG) +# define hl_debug_break() __debugbreak() +#elif defined(HL_NX) +C_FUNCTION_BEGIN +HL_API void hl_debug_break( void ); +C_FUNCTION_END +#elif !defined(HL_CONSOLE) + +// use __builtin_debugtrap when available +// fall back to breakpoint instructions for certain architectures +// else raise SIGTRAP +# ifdef __has_builtin +# if __has_builtin(__builtin_debugtrap) +# define USE_BUILTIN_DEBUG_TRAP 1 +# endif +# endif + +# ifdef USE_BUILTIN_DEBUG_TRAP +# define hl_debug_break() \ + if( hl_detect_debugger() ) \ + __builtin_debugtrap() +# elif defined(__x86_64__) || defined(__i386__) +# define hl_debug_break() \ + if( hl_detect_debugger() ) \ + __asm__("int3;") +# elif defined(__aarch64__) +# define hl_debug_break() \ + if( hl_detect_debugger() ) \ + __asm__("brk #0xf000;") +# elif defined(__riscv) +# define hl_debug_break() \ + if( hl_detect_debugger() ) \ + __asm__("ebreak;") +# else +# include +# define hl_debug_break() \ + if( hl_detect_debugger() ) \ + raise(SIGTRAP) +# endif +#undef USE_BUILTIN_DEBUG_TRAP +#else +# define hl_debug_break() +#endif + +#ifdef HL_VCC +# define HL_NO_RETURN(f) __declspec(noreturn) f +# define HL_UNREACHABLE +#else +# define HL_NO_RETURN(f) f __attribute__((noreturn)) +# define HL_UNREACHABLE __builtin_unreachable() +#endif + +// ---- TYPES ------------------------------------------- + +typedef enum { + HVOID = 0, + HUI8 = 1, + HUI16 = 2, + HI32 = 3, + HI64 = 4, + HF32 = 5, + HF64 = 6, + HBOOL = 7, + HBYTES = 8, + HDYN = 9, + HFUN = 10, + HOBJ = 11, + HARRAY = 12, + HTYPE = 13, + HREF = 14, + HVIRTUAL= 15, + HDYNOBJ = 16, + HABSTRACT=17, + HENUM = 18, + HNULL = 19, + HMETHOD = 20, + HSTRUCT = 21, + HPACKED = 22, + HGUID = 23, + // --------- + HLAST = 24, + _H_FORCE_INT = 0x7FFFFFFF +} hl_type_kind; + +typedef struct hl_type hl_type; +typedef struct hl_runtime_obj hl_runtime_obj; +typedef struct hl_alloc_block hl_alloc_block; +typedef struct { hl_alloc_block *cur; } hl_alloc; +typedef struct _hl_field_lookup hl_field_lookup; + +typedef struct { + hl_alloc alloc; + void **functions_ptrs; + hl_type **functions_types; +} hl_module_context; + +typedef struct { + hl_type **args; + hl_type *ret; + int nargs; + // storage for closure + hl_type *parent; + struct { + hl_type_kind kind; + void *p; + } closure_type; + struct { + hl_type **args; + hl_type *ret; + int nargs; + hl_type *parent; + } closure; +} hl_type_fun; + +typedef struct { + const uchar *name; + hl_type *t; + int hashed_name; +} hl_obj_field; + +typedef struct { + const uchar *name; + int findex; + int pindex; + int hashed_name; +} hl_obj_proto; + +typedef struct { + int nfields; + int nproto; + int nbindings; + const uchar *name; + hl_type *super; + hl_obj_field *fields; + hl_obj_proto *proto; + int *bindings; + void **global_value; + hl_module_context *m; + hl_runtime_obj *rt; +} hl_type_obj; + +typedef struct { + hl_obj_field *fields; + int nfields; + // runtime + int dataSize; + int *indexes; + hl_field_lookup *lookup; +} hl_type_virtual; + +typedef struct { + const uchar *name; + int nparams; + hl_type **params; + int size; + bool hasptr; + int *offsets; +} hl_enum_construct; + +typedef struct { + const uchar *name; + int nconstructs; + hl_enum_construct *constructs; + void **global_value; +} hl_type_enum; + +struct hl_type { + hl_type_kind kind; + union { + const uchar *abs_name; + hl_type_fun *fun; + hl_type_obj *obj; + hl_type_enum *tenum; + hl_type_virtual *virt; + hl_type *tparam; + }; + void **vobj_proto; + unsigned int *mark_bits; +}; + +C_FUNCTION_BEGIN + +HL_API int hl_type_size( hl_type *t ); +#define hl_pad_size(size,t) ((t)->kind == HVOID ? 0 : ((-(size)) & (hl_type_size(t) - 1))) +HL_API int hl_pad_struct( int size, hl_type *t ); + +HL_API hl_runtime_obj *hl_get_obj_rt( hl_type *ot ); +HL_API hl_runtime_obj *hl_get_obj_proto( hl_type *ot ); +HL_API void hl_flush_proto( hl_type *ot ); +HL_API void hl_init_enum( hl_type *et, hl_module_context *m ); + +/* -------------------- VALUES ------------------------------ */ + +typedef unsigned char vbyte; + +typedef struct { + hl_type *t; +# ifndef HL_64 + int __pad; // force align on 16 bytes for double +# endif + union { + bool b; + unsigned char ui8; + unsigned short ui16; + int i; + float f; + double d; + vbyte *bytes; + void *ptr; + int64 i64; + } v; +} vdynamic; + +typedef struct { + hl_type *t; + /* fields data */ +} vobj; + +typedef struct _vvirtual vvirtual; +struct _vvirtual { + hl_type *t; + vdynamic *value; + vvirtual *next; +}; + +#define hl_vfields(v) ((void**)(((vvirtual*)(v))+1)) + +typedef struct { + hl_type *t; + hl_type *at; + int size; + int __pad; // force align on 16 bytes for double +} varray; + +typedef struct _vclosure { + hl_type *t; + void *fun; + int hasValue; +# ifdef HL_64 + int stackCount; +# endif + void *value; +} vclosure; + +typedef struct { + vclosure cl; + vclosure *wrappedFun; +} vclosure_wrapper; + +struct _hl_field_lookup { + hl_type *t; + int hashed_name; + int field_index; // negative or zero : index in methods +}; + +typedef struct { + void *ptr; + hl_type *closure; + int fid; +} hl_runtime_binding; + +struct hl_runtime_obj { + hl_type *t; + // absolute + int nfields; + int nproto; + int size; + int nmethods; + int nbindings; + unsigned char pad_size; + unsigned char largest_field; + bool hasPtr; + void **methods; + int *fields_indexes; + hl_runtime_binding *bindings; + hl_runtime_obj *parent; + const uchar *(*toStringFun)( vdynamic *obj ); + int (*compareFun)( vdynamic *a, vdynamic *b ); + vdynamic *(*castFun)( vdynamic *obj, hl_type *t ); + vdynamic *(*getFieldFun)( vdynamic *obj, int hfield ); + // relative + int nlookup; + int ninterfaces; + hl_field_lookup *lookup; + int *interfaces; +}; + +typedef struct { + hl_type *t; + hl_field_lookup *lookup; + char *raw_data; + void **values; + int nfields; + int raw_size; + int nvalues; + vvirtual *virtuals; +} vdynobj; + +#define HL_DYNOBJ_INDEX_SHIFT 17 +#define HL_DYNOBJ_INDEX_MASK ((1 << HL_DYNOBJ_INDEX_SHIFT) - 1) + +typedef struct _venum { + hl_type *t; + int index; +} venum; + +HL_API hl_type hlt_void; +HL_API hl_type hlt_i32; +HL_API hl_type hlt_i64; +HL_API hl_type hlt_f64; +HL_API hl_type hlt_f32; +HL_API hl_type hlt_dyn; +HL_API hl_type hlt_array; +HL_API hl_type hlt_bytes; +HL_API hl_type hlt_dynobj; +HL_API hl_type hlt_bool; +HL_API hl_type hlt_abstract; + + + +#if defined(HL_WIN) +typedef uchar pchar; +#define pstrchr wcschr +#define pstrlen ustrlen +#else +typedef char pchar; +#define pstrchr strchr +#define pstrlen strlen +#define HL_UTF8PATH +#endif + +#include + +typedef struct { + pchar* file_path; + pchar** sys_args; + int sys_nargs; + void (*throw_jump)(jmp_buf, int); + uchar* (*resolve_symbol)(void* addr, uchar* out, int* outSize); + int (*capture_stack)(void** stack, int size); + bool (*reload_check)(vbyte* alt_file); + void* (*static_call)(void* fun, hl_type* t, void** args, vdynamic* out); + void* (*get_wrapper)(hl_type* t); + void (*profile_event)(int code, vbyte *data, int len); + void (*before_exit)(); + void (*vtune_init)(); + bool (*load_plugin)( pchar *file ); + vdynamic* (*resolve_type)( hl_type *t, hl_type *gt ); + bool static_call_ref; + int closure_stack_capture; + bool is_debugger_enabled; + bool is_debugger_attached; +} hl_setup_t; + +HL_API hl_setup_t hl_setup; +HL_API void hl_sys_init(); + +HL_API double hl_nan( void ); +HL_API bool hl_is_dynamic( hl_type *t ); +HL_API bool hl_is_ptr( hl_type *t ); +HL_API bool hl_same_type( hl_type *a, hl_type *b ); +HL_API bool hl_safe_cast( hl_type *t, hl_type *to ); + +#define hl_aptr(a,t) ((t*)(((varray*)(a))+1)) + +HL_API varray *hl_alloc_array( hl_type *t, int size ); +HL_API vdynamic *hl_alloc_dynamic( hl_type *t ); +HL_API vdynamic *hl_alloc_dynbool( bool b ); +HL_API vdynamic *hl_alloc_obj( hl_type *t ); +HL_API venum *hl_alloc_enum( hl_type *t, int index ); +HL_API vvirtual *hl_alloc_virtual( hl_type *t ); +HL_API vdynobj *hl_alloc_dynobj( void ); +HL_API vbyte *hl_alloc_bytes( int size ); +HL_API vbyte *hl_copy_bytes( const vbyte *byte, int size ); +HL_API int hl_utf8_length( const vbyte *s, int pos ); +HL_API int hl_from_utf8( uchar *out, int outLen, const char *str ); +HL_API char *hl_to_utf8( const uchar *bytes ); +HL_API uchar *hl_to_utf16( const char *str ); +HL_API uchar *hl_guid_str( int64 guid, uchar buf[14] ); +HL_API vdynamic *hl_virtual_make_value( vvirtual *v ); +HL_API hl_obj_field *hl_obj_field_fetch( hl_type *t, int fid ); + +HL_API int hl_hash( vbyte *name ); +HL_API int hl_hash_utf8( const char *str ); // no cache +HL_API int hl_hash_gen( const uchar *name, bool cache_name ); +HL_API vbyte *hl_field_name( int hash ); + +#define hl_error(msg, ...) hl_throw(hl_alloc_strbytes(USTR(msg), ## __VA_ARGS__)) + +HL_API vdynamic *hl_alloc_strbytes( const uchar *msg, ... ); +HL_API void hl_assert( void ); +HL_API HL_NO_RETURN( void hl_throw( vdynamic *v ) ); +HL_API HL_NO_RETURN( void hl_rethrow( vdynamic *v ) ); +HL_API HL_NO_RETURN( void hl_null_access( void ) ); +HL_API void hl_dump_stack( void ); +HL_API void hl_print_uncaught_exception( vdynamic *exc ); +HL_API varray *hl_exception_stack( void ); +HL_API bool hl_detect_debugger( void ); + +HL_API vvirtual *hl_to_virtual( hl_type *vt, vdynamic *obj ); +HL_API void hl_init_virtual( hl_type *vt, hl_module_context *ctx ); +HL_API hl_field_lookup *hl_lookup_find( hl_field_lookup *l, int size, int hash ); +HL_API hl_field_lookup *hl_lookup_insert( hl_field_lookup *l, int size, int hash, hl_type *t, int index ); + +HL_API int hl_dyn_geti( vdynamic *d, int hfield, hl_type *t ); +HL_API int64 hl_dyn_geti64( vdynamic *d, int hfield ); +HL_API void *hl_dyn_getp( vdynamic *d, int hfield, hl_type *t ); +HL_API float hl_dyn_getf( vdynamic *d, int hfield ); +HL_API double hl_dyn_getd( vdynamic *d, int hfield ); + +HL_API int hl_dyn_casti( void *data, hl_type *t, hl_type *to ); +HL_API int64 hl_dyn_casti64( void *data, hl_type *t ); +HL_API void *hl_dyn_castp( void *data, hl_type *t, hl_type *to ); +HL_API float hl_dyn_castf( void *data, hl_type *t ); +HL_API double hl_dyn_castd( void *data, hl_type *t ); + +#define hl_invalid_comparison 0xAABBCCDD +HL_API int hl_dyn_compare( vdynamic *a, vdynamic *b ); +HL_API vdynamic *hl_make_dyn( void *data, hl_type *t ); +HL_API void hl_write_dyn( void *data, hl_type *t, vdynamic *v, bool is_tmp ); + +HL_API void hl_dyn_seti( vdynamic *d, int hfield, hl_type *t, int value ); +HL_API void hl_dyn_seti64( vdynamic *d, int hfield, int64 value ); +HL_API void hl_dyn_setp( vdynamic *d, int hfield, hl_type *t, void *ptr ); +HL_API void hl_dyn_setf( vdynamic *d, int hfield, float f ); +HL_API void hl_dyn_setd( vdynamic *d, int hfield, double v ); + +typedef enum { + OpAdd, + OpSub, + OpMul, + OpMod, + OpDiv, + OpShl, + OpShr, + OpUShr, + OpAnd, + OpOr, + OpXor, + OpLast +} DynOp; +HL_API vdynamic *hl_dyn_op( int op, vdynamic *a, vdynamic *b ); + +HL_API vclosure *hl_alloc_closure_void( hl_type *t, void *fvalue ); +HL_API vclosure *hl_alloc_closure_ptr( hl_type *fullt, void *fvalue, void *ptr ); +HL_API vclosure *hl_make_fun_wrapper( vclosure *c, hl_type *to ); +HL_API void *hl_wrapper_call( void *value, void **args, vdynamic *ret ); +HL_API void *hl_dyn_call_obj( vdynamic *obj, hl_type *ft, int hfield, void **args, vdynamic *ret ); +HL_API vdynamic *hl_dyn_call( vclosure *c, vdynamic **args, int nargs ); +HL_API vdynamic *hl_dyn_call_safe( vclosure *c, vdynamic **args, int nargs, bool *isException ); + +/* + These macros should be only used when the closure `cl` has been type checked beforehand + so you are sure it's of the used typed. Otherwise use hl_dyn_call +*/ +#define hl_call0(ret,cl) \ + (cl->hasValue ? ((ret(*)(vdynamic*))cl->fun)((vdynamic*)cl->value) : ((ret(*)())cl->fun)()) +#define hl_call1(ret,cl,t,v) \ + (cl->hasValue ? ((ret(*)(vdynamic*,t))cl->fun)((vdynamic*)cl->value,v) : ((ret(*)(t))cl->fun)(v)) +#define hl_call2(ret,cl,t1,v1,t2,v2) \ + (cl->hasValue ? ((ret(*)(vdynamic*,t1,t2))cl->fun)((vdynamic*)cl->value,v1,v2) : ((ret(*)(t1,t2))cl->fun)(v1,v2)) +#define hl_call3(ret,cl,t1,v1,t2,v2,t3,v3) \ + (cl->hasValue ? ((ret(*)(vdynamic*,t1,t2,t3))cl->fun)((vdynamic*)cl->value,v1,v2,v3) : ((ret(*)(t1,t2,t3))cl->fun)(v1,v2,v3)) +#define hl_call4(ret,cl,t1,v1,t2,v2,t3,v3,t4,v4) \ + (cl->hasValue ? ((ret(*)(vdynamic*,t1,t2,t3,t4))cl->fun)((vdynamic*)cl->value,v1,v2,v3,v4) : ((ret(*)(t1,t2,t3,t4))cl->fun)(v1,v2,v3,v4)) + +// ----------------------- THREADS -------------------------------------------------- + +struct _hl_thread; +struct _hl_mutex; +struct _hl_semaphore; +struct _hl_condition; +struct _hl_tls; +typedef struct _hl_thread hl_thread; +typedef struct _hl_mutex hl_mutex; +typedef struct _hl_semaphore hl_semaphore; +typedef struct _hl_condition hl_condition; +typedef struct _hl_tls hl_tls; + +HL_API hl_thread *hl_thread_start( void *callback, void *param, bool withGC ); +HL_API hl_thread *hl_thread_current( void ); +HL_API void hl_thread_yield(void); +HL_API void hl_register_thread( void *stack_top ); +HL_API void hl_unregister_thread( void ); + +HL_API hl_mutex *hl_mutex_alloc( bool gc_thread ); +HL_API void hl_mutex_acquire( hl_mutex *l ); +HL_API bool hl_mutex_try_acquire( hl_mutex *l ); +HL_API void hl_mutex_release( hl_mutex *l ); +HL_API void hl_mutex_free( hl_mutex *l ); + +HL_API hl_semaphore *hl_semaphore_alloc(int value); +HL_API void hl_semaphore_acquire(hl_semaphore *sem); +HL_API bool hl_semaphore_try_acquire(hl_semaphore *sem, vdynamic *timeout); +HL_API void hl_semaphore_release(hl_semaphore *sem); +HL_API void hl_semaphore_free(hl_semaphore *sem); + +HL_API hl_condition *hl_condition_alloc(); +HL_API void hl_condition_acquire(hl_condition *cond); +HL_API bool hl_condition_try_acquire(hl_condition *cond); +HL_API void hl_condition_release(hl_condition *cond); +HL_API void hl_condition_wait(hl_condition *cond); +HL_API bool hl_condition_timed_wait(hl_condition *cond, double timeout); +HL_API void hl_condition_signal(hl_condition *cond); +HL_API void hl_condition_broadcast(hl_condition *cond); +HL_API void hl_condition_free(hl_condition *cond); + +HL_API hl_tls *hl_tls_alloc( bool gc_value ); +HL_API void hl_tls_set( hl_tls *l, void *value ); +HL_API void *hl_tls_get( hl_tls *l ); +HL_API void hl_tls_free( hl_tls *l ); + +// ----------------------- ALLOC -------------------------------------------------- + +#define MEM_HAS_PTR(kind) (!((kind)&2)) +#define MEM_KIND_DYNAMIC 0 +#define MEM_KIND_RAW 1 +#define MEM_KIND_NOPTR 2 +#define MEM_KIND_FINALIZER 3 +#define MEM_ALIGN_DOUBLE 128 +#define MEM_ZERO 256 + +HL_API void *hl_gc_alloc_gen( hl_type *t, int size, int flags ); +HL_API void hl_add_root( void *ptr ); +HL_API void hl_remove_root( void *ptr ); +HL_API void hl_gc_major( void ); +HL_API bool hl_is_gc_ptr( void *ptr ); +HL_API int hl_gc_get_memsize( void *ptr ); + +HL_API void hl_blocking( bool b ); +HL_API bool hl_is_blocking( void ); + +typedef void (*hl_types_dump)( void (*)( void *, int) ); +HL_API void hl_gc_set_dump_types( hl_types_dump tdump ); + +#define hl_gc_alloc_noptr(size) hl_gc_alloc_gen(&hlt_bytes,size,MEM_KIND_NOPTR) +#define hl_gc_alloc(t,size) hl_gc_alloc_gen(t,size,MEM_KIND_DYNAMIC) +#define hl_gc_alloc_raw(size) hl_gc_alloc_gen(&hlt_abstract,size,MEM_KIND_RAW) +#define hl_gc_alloc_finalizer(size) hl_gc_alloc_gen(&hlt_abstract,size,MEM_KIND_FINALIZER) + +HL_API void hl_alloc_init( hl_alloc *a ); +HL_API void *hl_malloc( hl_alloc *a, int size ); +HL_API void *hl_zalloc( hl_alloc *a, int size ); +HL_API void hl_free( hl_alloc *a ); + +HL_API void hl_global_init( void ); +HL_API void hl_global_free( void ); +HL_API void hl_global_lock( bool lock ); + +HL_API void *hl_alloc_executable_memory( int size ); +HL_API void hl_free_executable_memory( void *ptr, int size ); +HL_API void hl_jit_write_protect( bool executable ); +HL_API void hl_jit_flush_cache( void *ptr, int size ); + +// ----------------------- BUFFER -------------------------------------------------- + +typedef struct hl_buffer hl_buffer; + +HL_API hl_buffer *hl_alloc_buffer( void ); +HL_API void hl_buffer_val( hl_buffer *b, vdynamic *v ); +HL_API void hl_buffer_char( hl_buffer *b, uchar c ); +HL_API void hl_buffer_str( hl_buffer *b, const uchar *str ); +HL_API void hl_buffer_cstr( hl_buffer *b, const char *str ); +HL_API void hl_buffer_str_sub( hl_buffer *b, const uchar *str, int len ); +HL_API int hl_buffer_length( hl_buffer *b ); +HL_API uchar *hl_buffer_content( hl_buffer *b, int *len ); +HL_API uchar *hl_to_string( vdynamic *v ); +HL_API const uchar *hl_type_str( hl_type *t ); +HL_API void hl_throw_buffer( hl_buffer *b ); + +// ----------------------- FFI ------------------------------------------------------ + +// match GNU C++ mangling +#define TYPE_STR "vcsilfdbBDPOATR??X?N?S?g" + +#undef _VOID +#define _NO_ARG +#define _VOID "v" +#define _I8 "c" +#define _I16 "s" +#define _I32 "i" +#define _I64 "l" +#define _F32 "f" +#define _F64 "d" +#define _BOOL "b" +#define _BYTES "B" +#define _DYN "D" +#define _FUN(t, args) "P" args "_" t +#define _OBJ(fields) "O" fields "_" +#define _ARR "A" +#define _TYPE "T" +#define _REF(t) "R" t +#define _ABSTRACT(name) "X" #name "_" +#undef _NULL +#define _NULL(t) "N" t +#define _STRUCT "S" +#define _GUID "g" + +#undef _STRING +#define _STRING _OBJ(_BYTES _I32) + +typedef struct { + hl_type *t; + uchar *bytes; + int length; +} vstring; + +HL_API int hl_str_cmp( vstring *a, vstring *b ); + +#define DEFINE_PRIM(t,name,args) DEFINE_PRIM_WITH_NAME(t,name,args,name) +#define _DEFINE_PRIM_WITH_NAME(t,name,args,realName) C_FUNCTION_BEGIN EXPORT void *hlp_##realName( const char **sign ) { *sign = _FUN(t,args); return (void*)(&HL_NAME(name)); } C_FUNCTION_END + +#if !defined(HL_NAME) +# define HL_NAME(p) p +# ifdef LIBHL_EXPORTS +# define HL_PRIM EXPORT +# undef DEFINE_PRIM +# define DEFINE_PRIM(t,name,args) _DEFINE_PRIM_WITH_NAME(t,hl_##name,args,name) +# define DEFINE_PRIM_WITH_NAME _DEFINE_PRIM_WITH_NAME +# else +# define HL_PRIM +# define DEFINE_PRIM_WITH_NAME(t,name,args,realName) +# endif +#elif defined(LIBHL_STATIC) +# ifdef __cplusplus +# define HL_PRIM extern "C" +# else +# define HL_PRIM +# endif +#define DEFINE_PRIM_WITH_NAME(t,name,args,realName) +#else +# ifdef __cplusplus +# define HL_PRIM extern "C" EXPORT +# else +# define HL_PRIM EXPORT +# endif +# define DEFINE_PRIM_WITH_NAME _DEFINE_PRIM_WITH_NAME +#endif + +#if defined(HL_GCC) && !defined(HL_CONSOLE) +# ifdef HL_CLANG +# define HL_NO_OPT __attribute__ ((optnone)) +# else +# define HL_NO_OPT __attribute__((optimize("-O0"))) +# endif +#else +# define HL_NO_OPT +#endif + +// -------------- EXTRA ------------------------------------ + +#define hl_fatal(msg) hl_fatal_error(msg,__FILE__,__LINE__) +#define hl_fatal1(msg,p0) hl_fatal_fmt(__FILE__,__LINE__,msg,p0) +#define hl_fatal2(msg,p0,p1) hl_fatal_fmt(__FILE__,__LINE__,msg,p0,p1) +#define hl_fatal3(msg,p0,p1,p2) hl_fatal_fmt(__FILE__,__LINE__,msg,p0,p1,p2) +#define hl_fatal4(msg,p0,p1,p2,p3) hl_fatal_fmt(__FILE__,__LINE__,msg,p0,p1,p2,p3) +HL_API void *hl_fatal_error( const char *msg, const char *file, int line ); +HL_API void hl_fatal_fmt( const char *file, int line, const char *fmt, ...); + +typedef struct _hl_trap_ctx hl_trap_ctx; +struct _hl_trap_ctx { + jmp_buf buf; + hl_trap_ctx *prev; + vdynamic *tcheck; +}; +#define hl_trap(ctx,r,label) { hl_thread_info *__tinf = hl_get_thread(); ctx.tcheck = NULL; ctx.prev = __tinf->trap_current; __tinf->trap_current = &ctx; if( setjmp(ctx.buf) ) { r = __tinf->exc_value; goto label; } } +#define hl_endtrap(ctx) hl_get_thread()->trap_current = ctx.prev + +#define HL_EXC_MAX_STACK 0x100 +#define HL_EXC_RETHROW 1 +#define HL_EXC_CATCH_ALL 2 +#define HL_EXC_IS_THROW 4 +#define HL_THREAD_INVISIBLE 16 +#define HL_THREAD_PROFILER_PAUSED 32 +#define HL_EXC_KILL 64 +#define HL_TREAD_TRACK_SHIFT 16 + +#define HL_TRACK_ALLOC 1 +#define HL_TRACK_CAST 2 +#define HL_TRACK_DYNFIELD 4 +#define HL_TRACK_DYNCALL 8 +#define HL_TRACK_MASK (HL_TRACK_ALLOC | HL_TRACK_CAST | HL_TRACK_DYNFIELD | HL_TRACK_DYNCALL) + +#define HL_MAX_EXTRA_STACK 64 + +#ifdef HL_MAC +#include +#include +#endif + +typedef struct { + int thread_id; + // gc vars + volatile int gc_blocking; + void *stack_top; + void *stack_cur; + // exception handling + hl_trap_ctx *trap_current; + hl_trap_ctx *trap_uncaught; + vclosure *exc_handler; + vdynamic *exc_value; + int flags; + int exc_stack_count; + // extra + char thread_name[128]; + jmp_buf gc_regs; + void *exc_stack_trace[HL_EXC_MAX_STACK]; + void *extra_stack_data[HL_MAX_EXTRA_STACK]; + int extra_stack_size; + #ifdef HL_MAC + thread_t mach_thread_id; + pthread_t pthread_id; + #endif +} hl_thread_info; + +typedef struct { + int count; + bool stopping_world; + hl_thread_info **threads; + hl_mutex *global_lock; + hl_mutex *exclusive_lock; + void *guid_map; +} hl_threads_info; + +HL_API hl_thread_info *hl_get_thread(); +HL_API hl_threads_info *hl_gc_threads_info(); + +#ifdef HL_TRACK_ENABLE + +typedef struct { + int flags; + void (*on_alloc)(hl_type *,int,int,void*); + void (*on_cast)(hl_type *, hl_type*); + void (*on_dynfield)( vdynamic *, int ); + void (*on_dyncall)( vdynamic *, int ); +} hl_track_info; + +#define hl_is_tracking(flag) ((hl_track.flags&(flag)) && (hl_get_thread()->flags & (flag< +#include +#include +#include "jit_common.h" +#include "jit_aarch64_emit.h" +#include "hlsystem.h" + +// Helper for LDR/STR scaled offset from struct field +#define FIELD_OFFSET_SCALED(type, field) (offsetof(type, field) / 8) + +// ============================================================================ +// AArch64 Register Configuration (AAPCS64) +// ============================================================================ + +/* + * AAPCS64 (ARM Architecture Procedure Call Standard for ARM64) + * + * Register Usage: + * - X0-X7: Argument/result registers (caller-saved) + * - X8: Indirect result location register (caller-saved) + * - X9-X15: Temporary registers (caller-saved) + * - X16-X17: Intra-procedure-call temporary registers (caller-saved) + * - X18: Platform register (avoid use - may be reserved by OS) + * - X19-X28: Callee-saved registers + * - X29: Frame pointer (FP) + * - X30: Link register (LR) + * - SP: Stack pointer (must be 16-byte aligned) + * + * FP/SIMD Registers: + * - V0-V7: Argument/result registers (caller-saved) + * - V8-V15: Callee-saved (only lower 64 bits, D8-D15) + * - V16-V31: Temporary registers (caller-saved) + */ + +#define RCPU_COUNT 31 // X0-X30 (SP is not a general register) +#define RFPU_COUNT 32 // V0-V31 + +// Calling convention: first 8 args in X0-X7 +#define CALL_NREGS 8 +static const Arm64Reg CALL_REGS[] = { X0, X1, X2, X3, X4, X5, X6, X7 }; +static const Arm64FpReg FP_CALL_REGS[] = { V0, V1, V2, V3, V4, V5, V6, V7 }; + +// Caller-saved (scratch) registers: X0-X17 (avoid X18) +// Note: We use X0-X17 as scratch, but X0-X7 are also argument registers +#define RCPU_SCRATCH_COUNT 18 + +// vdynamic structure: type (8 bytes) + value (8 bytes) +#define HDYN_VALUE 8 +static const Arm64Reg RCPU_SCRATCH_REGS[] = { + X0, X1, X2, X3, X4, X5, X6, X7, + X8, X9, X10, X11, X12, X13, X14, X15, + X16, X17 +}; + +// FP register count for allocation pool (V0-V7, V16-V31 are caller-saved; V8-V15 are +// callee-saved per AAPCS64 but we don't save them, so we avoid allocating them) +#define RFPU_SCRATCH_COUNT 32 + +// Callee-saved registers: X19-X28 +// X29 (FP) and X30 (LR) are also callee-saved but handled specially +#define RCPU_CALLEE_SAVED_COUNT 10 +static const Arm64Reg RCPU_CALLEE_SAVED[] = { + X19, X20, X21, X22, X23, X24, X25, X26, X27, X28 +}; + +// Callee-saved registers available for allocation (excludes RTMP/RTMP2) +// These survive function calls, so we don't need to spill them before BLR +#define RCPU_CALLEE_ALLOC_COUNT 8 +static const Arm64Reg RCPU_CALLEE_ALLOC[] = { + X19, X20, X21, X22, X23, X24, X25, X26 +}; + +// FP callee-saved: V8-V15 (only lower 64 bits per AAPCS64) +// NOTE: We intentionally do NOT allocate these registers because our prologue +// doesn't save them. This array is kept for documentation and is_callee_saved_fpu(). +#define RFPU_CALLEE_SAVED_COUNT 8 +static const Arm64FpReg RFPU_CALLEE_SAVED[] = { + V8, V9, V10, V11, V12, V13, V14, V15 +}; + +// Helper macros for accessing registers +#define REG_COUNT (RCPU_COUNT + RFPU_COUNT) +#define VFPR(i) ((i) + RCPU_COUNT) // FP register index +#define PVFPR(i) REG_AT(VFPR(i)) // Pointer to FP register + +// Reserved registers for JIT internal use +#define RTMP X27 // Temporary register for multi-instruction sequences +#define RTMP2 X28 // Second temporary register + +// Special purpose registers +#define RFP X29 // Frame pointer +#define RLR X30 // Link register + +// Stack alignment requirement +#define STACK_ALIGN 16 + +// EMIT32 is defined in jit_common.h - use EMIT32(ctx,ctx, val) + +// ============================================================================ +// Error Handling +// ============================================================================ + +void _jit_error(jit_ctx *ctx, const char *msg, int line) { + printf("JIT ERROR: %s (jit_aarch64.c:%d)\n", msg, line); + if (ctx && ctx->f) { + // hl_function doesn't have a 'name' field directly + // The function object info would be in the module + int func_index = (int)(ctx->f - ctx->m->code->functions); + printf("In function at index %d\n", func_index); + } + jit_exit(); +} + +void on_jit_error(const char *msg, int_val line) { + printf("JIT Runtime Error: %s (line %d)\n", msg, (int)line); + jit_exit(); +} + +static void jit_null_fail(int fhash) { + vbyte *field = hl_field_name(fhash); + hl_buffer *b = hl_alloc_buffer(); + hl_buffer_str(b, USTR("Null access .")); + hl_buffer_str(b, (uchar*)field); + vdynamic *d = hl_alloc_dynamic(&hlt_bytes); + d->v.ptr = hl_buffer_content(b, NULL); + hl_throw(d); +} + +#define JIT_ASSERT(cond) do { if (!(cond)) { \ + printf("JIT ASSERTION FAILED: %s (jit_aarch64.c:%d)\n", #cond, __LINE__); \ + jit_exit(); \ +} } while(0) + +// ============================================================================ +// Register Allocation Helpers +// ============================================================================ + +/** + * Check if a CPU register is a call (argument) register + */ +static bool is_call_reg(Arm64Reg r) { + for (int i = 0; i < CALL_NREGS; i++) { + if (CALL_REGS[i] == r) + return true; + } + return false; +} + +/** + * Get the index of a register in the call register array + * Returns -1 if not a call register + */ +static int call_reg_index(Arm64Reg r) { + for (int i = 0; i < CALL_NREGS; i++) { + if (CALL_REGS[i] == r) + return i; + } + return -1; +} + +/** + * Check if a register is callee-saved (must be preserved across calls) + */ +static bool is_callee_saved_cpu(Arm64Reg r) { + for (int i = 0; i < RCPU_CALLEE_SAVED_COUNT; i++) { + if (RCPU_CALLEE_SAVED[i] == r) + return true; + } + return r == RFP || r == RLR; +} + +static bool is_callee_saved_fpu(Arm64FpReg r) { + for (int i = 0; i < RFPU_CALLEE_SAVED_COUNT; i++) { + if (RFPU_CALLEE_SAVED[i] == r) + return true; + } + return false; +} + +/** + * Check if type is String (HOBJ with bytes:HBYTES + length:HI32) + * Used for value-based string comparison per Haxe spec. + */ +static bool is_string_type(hl_type *t) { + if (t->kind != HOBJ || !t->obj) return false; + if (t->obj->nfields != 2) return false; + return t->obj->fields[0].t->kind == HBYTES && + t->obj->fields[1].t->kind == HI32; +} + +// ============================================================================ +// Register Allocation +// ============================================================================ + +// Forward declarations +static void free_reg(jit_ctx *ctx, preg *p); +static void patch_jump(jit_ctx *ctx, int pos, int target_pos); + +/** + * Find a free CPU register, evicting if necessary + * @param k Register kind (RCPU, RCPU_CALL, etc.) + * @return Pointer to allocated physical register + */ +static preg *alloc_cpu(jit_ctx *ctx, preg_kind k) { + preg *p; + int i; + int start_idx = 0; + int count = RCPU_SCRATCH_COUNT; + const Arm64Reg *regs = RCPU_SCRATCH_REGS; + + // For RCPU_CALL, only use non-argument registers + if (k == RCPU_CALL) { + // Use registers that are NOT in CALL_REGS + // For now, use X8-X17 (scratch registers that aren't args) + start_idx = 8; // Start from X8 + } + + // First pass: find a free scratch register (not holding anything and not locked) + // Lock check: p->lock >= ctx->currentPos means locked at current operation + for (i = start_idx; i < count; i++) { + p = REG_AT(regs[i]); + if (p->holds == NULL && p->lock < ctx->currentPos) + return p; + } + + // Second pass: try callee-saved registers (X19-X26) before evicting scratch + // These survive function calls, so values don't need to be spilled before BLR + for (i = 0; i < RCPU_CALLEE_ALLOC_COUNT; i++) { + p = REG_AT(RCPU_CALLEE_ALLOC[i]); + if (p->holds == NULL && p->lock < ctx->currentPos) { + ctx->callee_saved_used |= (1 << i); // Mark register as used for Phase 2 NOP patching + return p; + } + } + + // Third pass: evict a callee-saved register if one is unlocked + for (i = 0; i < RCPU_CALLEE_ALLOC_COUNT; i++) { + p = REG_AT(RCPU_CALLEE_ALLOC[i]); + if (p->lock < ctx->currentPos) { + ctx->callee_saved_used |= (1 << i); // Mark register as used for Phase 2 NOP patching + free_reg(ctx, p); // Spill to stack before reusing + return p; + } + } + + // Fourth pass: evict a scratch register + for (i = start_idx; i < count; i++) { + p = REG_AT(regs[i]); + if (p->lock < ctx->currentPos) { + free_reg(ctx, p); // Spill to stack before reusing + return p; + } + } + + // All registers are locked - this is an error + JIT_ASSERT(0); + return NULL; +} + +/** + * Allocate a floating-point register + * + * IMPORTANT: We only use caller-saved FP registers (V0-V7, V16-V31). + * V8-V15 are callee-saved per AAPCS64, and since our prologue/epilogue + * doesn't save/restore them, we must not allocate them. + * + * This gives us 24 FP registers which is sufficient for most code. + * If all are in use, we evict (spill to stack) the least recently used. + */ +static preg *alloc_fpu(jit_ctx *ctx) { + preg *p; + int i; + + // First pass: find a free caller-saved register (V0-V7, V16-V31) + // Lock check: p->lock >= ctx->currentPos means locked at current operation + for (i = 0; i < RFPU_COUNT; i++) { + if (i >= 8 && i < 16) + continue; // NEVER use callee-saved V8-V15 - they aren't saved in prologue + p = PVFPR(i); + if (p->holds == NULL && p->lock < ctx->currentPos) + return p; + } + + // Second pass: evict an unlocked caller-saved register + // Only iterate over V0-V7 and V16-V31, skip V8-V15 + for (i = 0; i < RFPU_COUNT; i++) { + if (i >= 8 && i < 16) + continue; // NEVER use callee-saved V8-V15 + p = PVFPR(i); + if (p->lock < ctx->currentPos) { + free_reg(ctx, p); // Spill to stack before reusing + return p; + } + } + + JIT_ASSERT(0); + return NULL; +} + +/** + * Allocate a register of the appropriate type based on the virtual register's type + */ +static preg *alloc_reg(jit_ctx *ctx, vreg *r, preg_kind k) { + if (IS_FLOAT(r)) + return alloc_fpu(ctx); + else + return alloc_cpu(ctx, k); +} + +// ============================================================================ +// Register State Management +// ============================================================================ + +/** + * Store a virtual register to its stack location + */ +static void store(jit_ctx *ctx, vreg *r, preg *p); // Forward declaration +static void mov_reg_reg(jit_ctx *ctx, Arm64Reg dst, Arm64Reg src, bool is_64bit); // Forward declaration +static void ldr_stack(jit_ctx *ctx, Arm64Reg dst, int stack_offset, int size); // Forward declaration +static void emit_call_findex(jit_ctx *ctx, int findex, int stack_space); // Forward declaration + +/** + * Free a physical register by storing its content to stack if needed + */ +static void free_reg(jit_ctx *ctx, preg *p) { + vreg *r = p->holds; + if (r != NULL) { + store(ctx, r, p); + r->current = NULL; + p->holds = NULL; + } + // Unlock the register so it can be reused + RUNLOCK(p); +} + +/** + * Discard the content of a physical register, storing if dirty. + * Used when we're done using a register but the vreg might still be live. + * If the vreg is dirty (modified but not yet on stack), we store it first. + */ +static void discard(jit_ctx *ctx, preg *p) { + vreg *r = p->holds; + if (r != NULL) { + // If dirty, store to stack before clearing the binding + if (r->dirty) { + store(ctx, r, p); + } + r->current = NULL; + p->holds = NULL; + } + // Unlock the register so it can be reused + RUNLOCK(p); +} + +/** + * Spill all caller-saved registers to stack before a function call. + * In AAPCS64: X0-X17 and V0-V7 are caller-saved and may be clobbered. + * + * This function: + * 1. Stores each bound register's value to its vreg's stack slot + * 2. Clears the register↔vreg bindings + * + * IMPORTANT: Must be called BEFORE the BLR instruction, not after! + * At that point register values are still valid and can be spilled to stack. + * After the call, caller-saved registers contain garbage from the callee. + * + * ARCHITECTURAL NOTE - Why AArch64 differs from x86: + * + * The x86 JIT's discard_regs() just clears register bindings without spilling. + * This works because x86 (CISC) can use memory operands directly in ALU + * instructions: + * + * x86: ADD [rbp-8], rax ; Operate directly on stack slot + * + * So x86 treats stack slots as the "source of truth" - values are written + * to stack as part of normal operations, and registers are just caches. + * Clearing bindings is safe because the value is already on the stack. + * + * AArch64 (RISC load/store architecture) cannot do this: + * + * AArch64: LDR x1, [fp, #-8] ; Must load to register first + * ADD x0, x0, x1 ; Operate on registers only + * STR x0, [fp, #-8] ; Separate store instruction + * + * Adding a store after every operation would cost ~1 extra instruction per op. + * Instead, we keep values in registers (registers are "source of truth") and + * only spill when necessary - specifically, before function calls that will + * clobber caller-saved registers. + * + * This is not a workaround but the natural design for load/store architectures. + */ +static void spill_regs(jit_ctx *ctx) { + int i; + // Spill and discard CPU scratch registers (X0-X17) - these get clobbered by calls + for (i = 0; i < 18; i++) { + preg *r = &ctx->pregs[i]; + if (r->holds) { + if (r->holds->dirty) { + free_reg(ctx, r); // Dirty: store to stack, then clear binding + } else { + discard(ctx, r); // Clean: just clear binding (value already on stack) + } + } + } + // NOTE: Do NOT spill callee-saved registers (X19-X26)! + // They survive function calls, so their values remain valid after BLR. + // This is the key optimization - values in callee-saved don't need spilling. + + // Spill and discard FPU scratch registers (V0-V7, V16-V31) - these get clobbered by calls + // NOTE: V8-V15 are callee-saved per AAPCS64, but we intentionally never allocate them + // (see alloc_fpu) since our prologue doesn't save them. No need to handle them here. + for (i = 0; i < 8; i++) { + preg *r = &ctx->pregs[RCPU_COUNT + i]; + if (r->holds) { + if (r->holds->dirty) { + free_reg(ctx, r); // Dirty: store to stack, then clear binding + } else { + discard(ctx, r); // Clean: just clear binding (value already on stack) + } + } + } + // Also spill V16-V31 (caller-saved temporary FPU registers) + for (i = 16; i < 32; i++) { + preg *r = &ctx->pregs[RCPU_COUNT + i]; + if (r->holds) { + if (r->holds->dirty) { + free_reg(ctx, r); // Dirty: store to stack, then clear binding + } else { + discard(ctx, r); // Clean: just clear binding (value already on stack) + } + } + } +} + +/** + * Spill callee-saved registers to stack. + * Called before jumps to labels - callee-saved must be on stack at merge points. + * NOTE: This is NOT called before function calls (callee-saved survive calls). + */ +static void spill_callee_saved(jit_ctx *ctx) { + int i; + // Spill callee-saved CPU registers (X19-X26) that are in use + for (i = 0; i < RCPU_CALLEE_ALLOC_COUNT; i++) { + preg *r = REG_AT(RCPU_CALLEE_ALLOC[i]); + if (r->holds) { + if (r->holds->dirty) { + free_reg(ctx, r); // Dirty: store to stack, then clear binding + } else { + discard(ctx, r); // Clean: just clear binding + } + } + } +} + +/** + * Ensure a virtual register is in a physical register + * Loads from stack if necessary + */ +static preg *fetch(jit_ctx *ctx, vreg *r); // Forward declaration + +/** + * Bind a vreg to a physical register (bidirectional association) + * This is essential for proper spilling when the register is evicted + */ +static void reg_bind(jit_ctx *ctx, vreg *r, preg *p) { + // If vreg was dirty in another register, store to stack first + // This prevents losing values when rebinding (e.g., dst = dst op src) + if (r->current && r->current != p) { + if (r->dirty) { + store(ctx, r, r->current); + } + r->current->holds = NULL; + } + // Set new binding + r->current = p; + p->holds = r; +} + +/** + * Allocate a destination register for a vreg + * Helper function used by many operations + * Binds the vreg to the allocated register for proper spilling + */ +static preg *alloc_dst(jit_ctx *ctx, vreg *r) { + preg *p; + if (IS_FLOAT(r)) { + p = alloc_fpu(ctx); + } else { + p = alloc_cpu(ctx, RCPU); + } + // Bind the vreg to this register so we can spill it later if needed + reg_bind(ctx, r, p); + // Mark dirty: a new value is about to be written to this register, + // and it's not on the stack yet. This ensures spill_regs() will + // store it before the next call/jump. + r->dirty = 1; + return p; +} + +// ============================================================================ +// Basic Data Movement - Encoding Helpers +// ============================================================================ + +/** + * Generate MOV instruction (register to register) + * For integer: MOV Xd, Xn (using ORR Xd, XZR, Xn) + * For float: FMOV Vd, Vn + */ +static void mov_reg_reg(jit_ctx *ctx, Arm64Reg dst, Arm64Reg src, bool is_64bit) { + // SP (register 31) can't be used with ORR - must use ADD instead + if (src == SP_REG || dst == SP_REG) { + // MOV Xd, SP or MOV SP, Xn => ADD Xd, Xn, #0 + encode_add_sub_imm(ctx, is_64bit ? 1 : 0, 0, 0, 0, 0, src, dst); + } else if (is_64bit) { + // MOV Xd, Xn => ORR Xd, XZR, Xn + encode_logical_reg(ctx, 1, 0x01, 0, 0, src, 0, XZR, dst); + } else { + // MOV Wd, Wn => ORR Wd, WZR, Wn + encode_logical_reg(ctx, 0, 0x01, 0, 0, src, 0, XZR, dst); + } +} + +static void fmov_reg_reg(jit_ctx *ctx, Arm64FpReg dst, Arm64FpReg src, bool is_double) { + // FMOV Vd, Vn (using FP 1-source with opcode 0) + int type = is_double ? 0x01 : 0x00; // 01=double, 00=single + encode_fp_1src(ctx, 0, 0, type, 0, src, dst); +} + +/** + * Load from stack to register + * Format: LDR/LDUR Xt, [FP, #offset] + * + * Uses LDUR for signed offsets in range [-256, +255] (single instruction) + * Uses LDR with scaled unsigned offset for aligned positive offsets + * Falls back to computing address in register for large offsets + */ +static void ldr_stack(jit_ctx *ctx, Arm64Reg dst, int stack_offset, int size) { + int size_enc = (size == 8) ? 3 : ((size == 4) ? 2 : ((size == 2) ? 1 : 0)); + + // Priority 1: Use LDUR for small signed offsets (-256 to +255) + // This handles most negative stack offsets in a single instruction + if (stack_offset >= -256 && stack_offset <= 255) { + encode_ldur_stur(ctx, size_enc, 0, 0x01, stack_offset, RFP, dst); + return; + } + + // Priority 2: Use LDR with scaled unsigned offset for larger positive aligned offsets + if (stack_offset >= 0 && (stack_offset % size == 0) && stack_offset < 4096 * size) { + int scaled_offset = stack_offset / size; + encode_ldr_str_imm(ctx, size_enc, 0, 0x01, scaled_offset, RFP, dst); + return; + } + + // Fallback: Compute address in register for large/unaligned offsets + load_immediate(ctx, stack_offset, RTMP, true); + encode_add_sub_reg(ctx, 1, 0, 0, 0, RTMP, 0, RFP, RTMP); + encode_ldr_str_imm(ctx, size_enc, 0, 0x01, 0, RTMP, dst); +} + +/** + * Load from stack to FP register + * Format: LDR/LDUR Dt/St, [FP, #offset] + */ +static void ldr_stack_fp(jit_ctx *ctx, Arm64FpReg dst, int stack_offset, int size) { + int size_enc = (size == 8) ? 3 : ((size == 4) ? 2 : 1); + + // Priority 1: Use LDUR for small signed offsets (-256 to +255) + if (stack_offset >= -256 && stack_offset <= 255) { + encode_ldur_stur(ctx, size_enc, 1, 0x01, stack_offset, RFP, dst); + return; + } + + // Priority 2: Use LDR with scaled unsigned offset for larger positive aligned offsets + if (stack_offset >= 0 && (stack_offset % size == 0) && stack_offset < 4096 * size) { + int scaled_offset = stack_offset / size; + encode_ldr_str_imm(ctx, size_enc, 1, 0x01, scaled_offset, RFP, dst); + return; + } + + // Fallback: Compute address in register + load_immediate(ctx, stack_offset, RTMP, true); + encode_add_sub_reg(ctx, 1, 0, 0, 0, RTMP, 0, RFP, RTMP); + encode_ldr_str_imm(ctx, size_enc, 1, 0x01, 0, RTMP, dst); +} + +/** + * Store register to stack + * Format: STR/STUR Xt, [FP, #offset] + * + * Uses STUR for signed offsets in range [-256, +255] (single instruction) + * Uses STR with scaled unsigned offset for aligned positive offsets + * Falls back to computing address in register for large offsets + */ +static void str_stack(jit_ctx *ctx, Arm64Reg src, int stack_offset, int size) { + int size_enc = (size == 8) ? 3 : ((size == 4) ? 2 : ((size == 2) ? 1 : 0)); + + // Priority 1: Use STUR for small signed offsets (-256 to +255) + if (stack_offset >= -256 && stack_offset <= 255) { + encode_ldur_stur(ctx, size_enc, 0, 0x00, stack_offset, RFP, src); + return; + } + + // Priority 2: Use STR with scaled unsigned offset for larger positive aligned offsets + if (stack_offset >= 0 && (stack_offset % size == 0) && stack_offset < 4096 * size) { + int scaled_offset = stack_offset / size; + encode_ldr_str_imm(ctx, size_enc, 0, 0x00, scaled_offset, RFP, src); + return; + } + + // Fallback: Compute address in register + load_immediate(ctx, stack_offset, RTMP, true); + encode_add_sub_reg(ctx, 1, 0, 0, 0, RTMP, 0, RFP, RTMP); + encode_ldr_str_imm(ctx, size_enc, 0, 0x00, 0, RTMP, src); +} + +/** + * Store FP register to stack + * Format: STR/STUR Dt/St, [FP, #offset] + */ +static void str_stack_fp(jit_ctx *ctx, Arm64FpReg src, int stack_offset, int size) { + int size_enc = (size == 8) ? 3 : ((size == 4) ? 2 : 1); + + // Priority 1: Use STUR for small signed offsets (-256 to +255) + if (stack_offset >= -256 && stack_offset <= 255) { + encode_ldur_stur(ctx, size_enc, 1, 0x00, stack_offset, RFP, src); + return; + } + + // Priority 2: Use STR with scaled unsigned offset for larger positive aligned offsets + if (stack_offset >= 0 && (stack_offset % size == 0) && stack_offset < 4096 * size) { + int scaled_offset = stack_offset / size; + encode_ldr_str_imm(ctx, size_enc, 1, 0x00, scaled_offset, RFP, src); + return; + } + + // Fallback: Compute address in register + load_immediate(ctx, stack_offset, RTMP, true); + encode_add_sub_reg(ctx, 1, 0, 0, 0, RTMP, 0, RFP, RTMP); + encode_ldr_str_imm(ctx, size_enc, 1, 0x00, 0, RTMP, src); +} + +/** + * STP with signed offset (no writeback) - for NOPpable callee-saved saves. + * Format: STP Xt1, Xt2, [Xn, #imm] + * This allows individual STPs to be patched to NOPs without affecting SP. + */ +static void stp_offset(jit_ctx *ctx, Arm64Reg rt, Arm64Reg rt2, Arm64Reg rn, int offset) { + int imm7 = offset / 8; + // opc=10 (64-bit), 101, addr_mode=10 (signed offset), L=0 (store), imm7, Rt2, Rn, Rt + unsigned int insn = (2u << 30) | (5u << 27) | (2u << 23) | (0u << 22) | + ((imm7 & 0x7F) << 15) | (rt2 << 10) | (rn << 5) | rt; + EMIT32(ctx, insn); +} + +/** + * LDP with signed offset (no writeback) - for NOPpable callee-saved restores. + * Format: LDP Xt1, Xt2, [Xn, #imm] + */ +static void ldp_offset(jit_ctx *ctx, Arm64Reg rt, Arm64Reg rt2, Arm64Reg rn, int offset) { + int imm7 = offset / 8; + // opc=10 (64-bit), 101, addr_mode=10 (signed offset), L=1 (load), imm7, Rt2, Rn, Rt + unsigned int insn = (2u << 30) | (5u << 27) | (2u << 23) | (1u << 22) | + ((imm7 & 0x7F) << 15) | (rt2 << 10) | (rn << 5) | rt; + EMIT32(ctx, insn); +} + +// ============================================================================ +// Data Movement Operations +// ============================================================================ + +/** + * Store a virtual register to its stack location + */ +static void store(jit_ctx *ctx, vreg *r, preg *p) { + if (r == NULL || p == NULL || r->size == 0) + return; + + int size = r->size; + int offset = r->stackPos; + + if (p->kind == RCPU) { + str_stack(ctx, p->id, offset, size); + } else if (p->kind == RFPU) { + str_stack_fp(ctx, p->id, offset, size); + } + + r->dirty = 0; // Stack is now up-to-date +} + +/** + * Mark a virtual register as dirty (register value differs from stack). + * The value will be spilled at the next basic block boundary (jump, call, label). + * This defers stores to reduce instruction count within basic blocks. + */ +static void mark_dirty(jit_ctx *ctx, vreg *r) { + (void)ctx; // unused, kept for consistency with other functions + if (r != NULL && r->current != NULL && r->size > 0) { + r->dirty = 1; + } +} + +/** + * Store to a vreg's stack slot and clear any stale register binding. + * Use this when storing directly (e.g., from X0 after a call) without + * going through the normal register allocation path. + * + * This prevents spill_regs from later overwriting the correct stack + * value with a stale register value. + */ +static void store_result(jit_ctx *ctx, vreg *dst) { + // Clear any stale binding - the correct value is now on stack + if (dst->current != NULL) { + dst->current->holds = NULL; + dst->current = NULL; + } +} + +/** + * Load a virtual register from stack to a physical register + */ +static preg *fetch(jit_ctx *ctx, vreg *r) { + preg *p; + + // HVOID registers have size 0 and no value to load + if (r->size == 0) + return UNUSED; + + // Check if already in a register + if (r->current != NULL && r->current->kind != RSTACK) { + // Lock the register to prevent eviction during subsequent allocations + RLOCK(r->current); + return r->current; + } + + // Allocate a register + p = alloc_reg(ctx, r, RCPU); + + // If the register we got already holds something, evict it + if (p->holds != NULL) + free_reg(ctx, p); + + // Load from stack + int size = r->size; + int offset = r->stackPos; + + if (IS_FLOAT(r)) { + ldr_stack_fp(ctx, p->id, offset, size); + } else { + ldr_stack(ctx, p->id, offset, size); + } + + // Bind vreg to register and lock it to prevent eviction by subsequent allocs + reg_bind(ctx, r, p); + RLOCK(p); + + return p; +} + +/** + * Copy data between locations (register, stack, immediate) + * This is the main data movement workhorse function + */ +static void copy(jit_ctx *ctx, vreg *dst, preg *dst_p, vreg *src, preg *src_p) { + if (src_p->kind == RCONST) { + // Load immediate into destination + int64_t val = src_p->id; + + if (IS_FLOAT(dst)) { + // Load float constant: load bits as integer, then move to FP register + preg *d = (dst_p && dst_p->kind == RFPU) ? dst_p : alloc_fpu(ctx); + + if (val == 0) { + // FMOV Dd, XZR - zero the FP register + EMIT32(ctx, (1 << 31) | (0 << 29) | (0x1E << 24) | (1 << 22) | (1 << 21) | (7 << 16) | (31 << 5) | d->id); + } else { + // Load bits to GPR, then FMOV to FPR + load_immediate(ctx, val, RTMP, true); + // FMOV Dd, Xn: sf=1, S=0, type=01, rmode=00, opcode=00111, Rn, Rd + EMIT32(ctx, (0x9E670000) | (RTMP << 5) | d->id); + } + + if (dst_p == NULL || dst_p != d) { + reg_bind(ctx, dst, d); + } + } else { + // Load integer immediate + preg *d = (dst_p && dst_p->kind == RCPU) ? dst_p : fetch(ctx, dst); + load_immediate(ctx, val, d->id, dst->size == 8); + if (dst_p == NULL || dst_p != d) { + reg_bind(ctx, dst, d); + } + } + } else if (src_p->kind == RCPU && dst_p && dst_p->kind == RCPU) { + // Register to register + mov_reg_reg(ctx, dst_p->id, src_p->id, dst->size == 8); + } else if (src_p->kind == RFPU && dst_p && dst_p->kind == RFPU) { + // FP register to FP register + fmov_reg_reg(ctx, dst_p->id, src_p->id, dst->size == 8); + } else { + // Generic case: fetch src, store to dst + preg *s = (src_p && (src_p->kind == RCPU || src_p->kind == RFPU)) ? src_p : fetch(ctx, src); + preg *d = (dst_p && (dst_p->kind == RCPU || dst_p->kind == RFPU)) ? dst_p : fetch(ctx, dst); + + if (IS_FLOAT(dst)) { + fmov_reg_reg(ctx, d->id, s->id, dst->size == 8); + } else { + mov_reg_reg(ctx, d->id, s->id, dst->size == 8); + } + + reg_bind(ctx, dst, d); + } +} + +// ============================================================================ +// Opcode Handlers +// ============================================================================ + +/** + * OMov - Move/copy a value from one register to another + */ +static void op_mov(jit_ctx *ctx, vreg *dst, vreg *src) { + preg *r = fetch(ctx, src); + + // Handle special case for HF32 (32-bit float) + // Ensure it's in an FP register + if (src->t->kind == HF32 && r->kind != RFPU) { + r = alloc_fpu(ctx); + // Load from stack to FP register + ldr_stack_fp(ctx, r->id, src->stackPos, src->size); + reg_bind(ctx, src, r); + } + + // Store to destination stack slot + store(ctx, dst, r); + + // Clear dst's old register binding to prevent stale value from being spilled + // The correct value is now on the stack from store() above + if (dst->current != NULL) { + dst->current->holds = NULL; + dst->current = NULL; + } +} + +/** + * Store a constant value to a virtual register + */ +static void store_const(jit_ctx *ctx, vreg *dst, int64_t val) { + preg *p; + + if (IS_FLOAT(dst)) { + // Allocate FPU register for float constants + p = alloc_fpu(ctx); + if (p->holds != NULL) + free_reg(ctx, p); + + if (val == 0) { + // FMOV Dd, XZR - zero the FP register + EMIT32(ctx, (1 << 31) | (0 << 29) | (0x1E << 24) | (1 << 22) | (1 << 21) | (7 << 16) | (31 << 5) | p->id); + } else { + // Load bits to GPR, then FMOV to FPR + load_immediate(ctx, val, RTMP, true); + // FMOV Dd, Xn: sf=1, S=0, type=01, rmode=00, opcode=00111, Rn, Rd + EMIT32(ctx, (0x9E670000) | (RTMP << 5) | p->id); + } + } else { + p = alloc_reg(ctx, dst, RCPU); + if (p->holds != NULL) + free_reg(ctx, p); + load_immediate(ctx, val, p->id, dst->size == 8); + } + + reg_bind(ctx, dst, p); + store(ctx, dst, p); // Constants must be stored immediately for correct loop initialization +} + +// ============================================================================ +// Arithmetic Operations +// ============================================================================ + +// Forward declaration for op_call_native (used by floating-point modulo) +static void op_call_native(jit_ctx *ctx, vreg *dst, hl_type *ftype, void *func_ptr, vreg **args, int nargs); +// Forward declaration for prepare_call_args (used by op_jump for dynamic comparisons) +static int prepare_call_args(jit_ctx *ctx, hl_type **arg_types, vreg **args, int nargs, bool is_native); + +/** + * Binary arithmetic/logic operations handler + * Handles: OAdd, OSub, OMul, OSDiv, OUDiv, OSMod, OUMod, OAnd, OOr, OXor, shifts + */ +static void op_binop(jit_ctx *ctx, vreg *dst, vreg *a, vreg *b, hl_op op) { + bool is_64bit = dst->size == 8; + int sf = is_64bit ? 1 : 0; + + // Handle floating-point operations + if (IS_FLOAT(dst)) { + preg *pa = fetch(ctx, a); + preg *pb = fetch(ctx, b); + preg *pd; + + // If dst == a, reuse pa as destination to avoid clobbering issues + // when reg_bind tries to store the old (now stale) value + if (dst == a) { + pd = pa; + } else { + pd = alloc_fpu(ctx); + if (pd->holds != NULL) + free_reg(ctx, pd); + } + + int type = (dst->t->kind == HF64) ? 0x01 : 0x00; // 01=double, 00=single + + switch (op) { + case OAdd: + // FADD Vd, Vn, Vm + encode_fp_arith(ctx, 0, 0, type, pb->id, 0x02, pa->id, pd->id); + break; + case OSub: + // FSUB Vd, Vn, Vm + encode_fp_arith(ctx, 0, 0, type, pb->id, 0x03, pa->id, pd->id); + break; + case OMul: + // FMUL Vd, Vn, Vm + encode_fp_arith(ctx, 0, 0, type, pb->id, 0x00, pa->id, pd->id); + break; + case OSDiv: + case OUDiv: // Same as OSDiv for floats + // FDIV Vd, Vn, Vm + encode_fp_arith(ctx, 0, 0, type, pb->id, 0x01, pa->id, pd->id); + break; + case OSMod: + case OUMod: { + // Floating-point modulo: call fmod/fmodf from C library + // Need to discard pa/pb since op_call_native will spill + discard(ctx, pa); + discard(ctx, pb); + void *mod_func = (dst->t->kind == HF64) ? (void*)fmod : (void*)fmodf; + vreg *args[2] = { a, b }; + op_call_native(ctx, dst, NULL, mod_func, args, 2); + return; // op_call_native handles result storage + } + default: + JIT_ASSERT(0); // Invalid FP operation + } + + reg_bind(ctx, dst, pd); + mark_dirty(ctx, dst); + return; + } + + // Integer operations + preg *pa = fetch(ctx, a); + preg *pb = fetch(ctx, b); + preg *pd; + + // If dst == a, reuse pa as destination to avoid clobbering issues + // when reg_bind tries to store the old (now stale) value + if (dst == a) { + pd = pa; + } else { + pd = alloc_cpu(ctx, RCPU); + if (pd->holds != NULL) + free_reg(ctx, pd); + } + + switch (op) { + case OAdd: + // ADD Xd, Xn, Xm + encode_add_sub_reg(ctx, sf, 0, 0, 0, pb->id, 0, pa->id, pd->id); + break; + + case OSub: + // SUB Xd, Xn, Xm + encode_add_sub_reg(ctx, sf, 1, 0, 0, pb->id, 0, pa->id, pd->id); + break; + + case OMul: + // MUL Xd, Xn, Xm (using MADD with XZR as addend) + encode_madd_msub(ctx, sf, 0, pb->id, XZR, pa->id, pd->id); + break; + + case OSDiv: + // SDIV Xd, Xn, Xm (signed division) + // Note: encode_div U=1 means SDIV, U=0 means UDIV (per ARM ISA) + encode_div(ctx, sf, 1, pb->id, pa->id, pd->id); + break; + + case OUDiv: + // UDIV Xd, Xn, Xm (unsigned division) + encode_div(ctx, sf, 0, pb->id, pa->id, pd->id); + break; + + case OSMod: { + // Signed modulo with special case handling: + // - divisor == 0: return 0 (avoid returning dividend) + // - divisor == -1: return 0 (avoid MIN % -1 overflow) + // CBZ divisor, zero_case + int jz = BUF_POS(); + encode_cbz_cbnz(ctx, sf, 0, 0, pb->id); // CBZ + + // CMP divisor, #-1; B.EQ zero_case + // CMN is ADD setting flags, so CMN Xn, #1 checks if Xn == -1 + encode_add_sub_imm(ctx, sf, 1, 1, 0, 1, pb->id, XZR); // CMN divisor, #1 + int jneg1 = BUF_POS(); + encode_branch_cond(ctx, 0, COND_EQ); // B.EQ + + // Normal path: remainder = dividend - (quotient * divisor) + encode_div(ctx, sf, 1, pb->id, pa->id, RTMP); // RTMP = a / b (signed) + encode_madd_msub(ctx, sf, 1, pb->id, pa->id, RTMP, pd->id); // pd = a - (RTMP * b) + int jend = BUF_POS(); + encode_branch_uncond(ctx, 0); // B end + + // Zero case: return 0 + int zero_pos = BUF_POS(); + // MOV pd, #0 (using ORR with XZR) + encode_logical_reg(ctx, sf, 0x01, 0, 0, XZR, 0, XZR, pd->id); // ORR pd, XZR, XZR + + patch_jump(ctx, jz, zero_pos); + patch_jump(ctx, jneg1, zero_pos); + patch_jump(ctx, jend, BUF_POS()); + break; + } + + case OUMod: { + // Unsigned modulo with special case: + // - divisor == 0: return 0 + // CBZ divisor, zero_case + int jz = BUF_POS(); + encode_cbz_cbnz(ctx, sf, 0, 0, pb->id); // CBZ + + // Normal path + encode_div(ctx, sf, 0, pb->id, pa->id, RTMP); // RTMP = a / b (unsigned) + encode_madd_msub(ctx, sf, 1, pb->id, pa->id, RTMP, pd->id); // pd = a - (RTMP * b) + int jend = BUF_POS(); + encode_branch_uncond(ctx, 0); // B end + + // Zero case: return 0 + int zero_pos = BUF_POS(); + encode_logical_reg(ctx, sf, 0x01, 0, 0, XZR, 0, XZR, pd->id); // ORR pd, XZR, XZR + + patch_jump(ctx, jz, zero_pos); + patch_jump(ctx, jend, BUF_POS()); + break; + } + + case OAnd: + // AND Xd, Xn, Xm + encode_logical_reg(ctx, sf, 0x00, 0, 0, pb->id, 0, pa->id, pd->id); + break; + + case OOr: + // ORR Xd, Xn, Xm + encode_logical_reg(ctx, sf, 0x01, 0, 0, pb->id, 0, pa->id, pd->id); + break; + + case OXor: + // EOR Xd, Xn, Xm + encode_logical_reg(ctx, sf, 0x02, 0, 0, pb->id, 0, pa->id, pd->id); + break; + + case OShl: + // LSL Xd, Xn, Xm (logical shift left) + encode_shift_reg(ctx, sf, 0x00, pb->id, pa->id, pd->id); + break; + + case OUShr: + // LSR Xd, Xn, Xm (logical shift right - unsigned) + encode_shift_reg(ctx, sf, 0x01, pb->id, pa->id, pd->id); + break; + + case OSShr: + // ASR Xd, Xn, Xm (arithmetic shift right - signed) + encode_shift_reg(ctx, sf, 0x02, pb->id, pa->id, pd->id); + break; + + default: + JIT_ASSERT(0); // Unknown operation + } + + // Mask result for sub-32-bit integer types (UI8, UI16) + // AArch64 doesn't have 8/16-bit registers like x86, so we need explicit masking + if (dst->t->kind == HUI8 || dst->t->kind == HBOOL) { + // AND Wd, Wd, #0xFF (sf=0, opc=0, N=0, immr=0, imms=7) + encode_logical_imm(ctx, 0, 0x00, 0, 0, 7, pd->id, pd->id); + } else if (dst->t->kind == HUI16) { + // AND Wd, Wd, #0xFFFF (sf=0, opc=0, N=0, immr=0, imms=15) + encode_logical_imm(ctx, 0, 0x00, 0, 0, 15, pd->id, pd->id); + } + + reg_bind(ctx, dst, pd); + mark_dirty(ctx, dst); +} + +/** + * Unary negation (ONeg) + */ +static void op_neg(jit_ctx *ctx, vreg *dst, vreg *a) { + if (IS_FLOAT(a)) { + // FNEG Vd, Vn + preg *pa = fetch(ctx, a); + preg *pd = alloc_fpu(ctx); + + if (pd->holds != NULL) + free_reg(ctx, pd); + + int type = (dst->t->kind == HF64) ? 0x01 : 0x00; + encode_fp_1src(ctx, 0, 0, type, 0x02, pa->id, pd->id); + + reg_bind(ctx, dst, pd); + mark_dirty(ctx, dst); + } else { + // NEG Xd, Xn (implemented as SUB Xd, XZR, Xn) + preg *pa = fetch(ctx, a); + preg *pd = alloc_cpu(ctx, RCPU); + + if (pd->holds != NULL) + free_reg(ctx, pd); + + int sf = (dst->size == 8) ? 1 : 0; + encode_add_sub_reg(ctx, sf, 1, 0, 0, pa->id, 0, XZR, pd->id); + + // Mask result for sub-32-bit integer types + if (dst->t->kind == HUI8 || dst->t->kind == HBOOL) { + encode_logical_imm(ctx, 0, 0x00, 0, 0, 7, pd->id, pd->id); + } else if (dst->t->kind == HUI16) { + encode_logical_imm(ctx, 0, 0x00, 0, 0, 15, pd->id, pd->id); + } + + reg_bind(ctx, dst, pd); + mark_dirty(ctx, dst); + } +} + +/** + * Logical NOT (ONot) - boolean negation + */ +static void op_not(jit_ctx *ctx, vreg *dst, vreg *a) { + // XOR with 1 (boolean NOT) + preg *pa = fetch(ctx, a); + preg *pd = alloc_cpu(ctx, RCPU); + + if (pd->holds != NULL) + free_reg(ctx, pd); + + // Load immediate 1 + load_immediate(ctx, 1, RTMP, false); + + // EOR Wd, Wn, Wtmp (32-bit XOR with 1) + encode_logical_reg(ctx, 0, 0x02, 0, 0, RTMP, 0, pa->id, pd->id); + + reg_bind(ctx, dst, pd); + mark_dirty(ctx, dst); +} + +/** + * Increment (OIncr) + */ +static void op_incr(jit_ctx *ctx, vreg *dst) { + // ADD Xd, Xd, #1 with memory writeback + preg *pd = fetch(ctx, dst); + int sf = (dst->size == 8) ? 1 : 0; + + // ADD Xd, Xn, #1 + encode_add_sub_imm(ctx, sf, 0, 0, 0, 1, pd->id, pd->id); + + // Mask result for sub-32-bit integer types + if (dst->t->kind == HUI8 || dst->t->kind == HBOOL) { + encode_logical_imm(ctx, 0, 0x00, 0, 0, 7, pd->id, pd->id); + } else if (dst->t->kind == HUI16) { + encode_logical_imm(ctx, 0, 0x00, 0, 0, 15, pd->id, pd->id); + } + + mark_dirty(ctx, dst); +} + +/** + * Decrement (ODecr) + */ +static void op_decr(jit_ctx *ctx, vreg *dst) { + // SUB Xd, Xd, #1 with memory writeback + preg *pd = fetch(ctx, dst); + int sf = (dst->size == 8) ? 1 : 0; + + // SUB Xd, Xn, #1 + encode_add_sub_imm(ctx, sf, 1, 0, 0, 1, pd->id, pd->id); + + // Mask result for sub-32-bit integer types + if (dst->t->kind == HUI8 || dst->t->kind == HBOOL) { + encode_logical_imm(ctx, 0, 0x00, 0, 0, 7, pd->id, pd->id); + } else if (dst->t->kind == HUI16) { + encode_logical_imm(ctx, 0, 0x00, 0, 0, 15, pd->id, pd->id); + } + + mark_dirty(ctx, dst); +} + +// ============================================================================ +// Type Conversion Operations +// ============================================================================ + +/** + * Convert to integer (OToInt) + * Handles: float->int, i32->i64 sign extension, and int->int copy + */ +static void op_toint(jit_ctx *ctx, vreg *dst, vreg *src) { + // Same register optimization + if (dst == src) return; + + // Case 1: Float to integer conversion + if (IS_FLOAT(src)) { + preg *ps = fetch(ctx, src); + preg *pd = alloc_cpu(ctx, RCPU); + + if (pd->holds != NULL) + free_reg(ctx, pd); + + int sf = (dst->size == 8) ? 1 : 0; + int type = (src->t->kind == HF64) ? 0x01 : 0x00; + + // FCVTZS Xd, Vn (float to signed int, round toward zero) + encode_fcvt_int(ctx, sf, 0, type, 0x03, 0x00, ps->id, pd->id); + + reg_bind(ctx, dst, pd); + mark_dirty(ctx, dst); + return; + } + + // Case 2: i32 to i64 sign extension + if (dst->size == 8 && src->size == 4) { + preg *ps = fetch(ctx, src); + preg *pd = alloc_cpu(ctx, RCPU); + + if (pd->holds != NULL) + free_reg(ctx, pd); + + Arm64Reg src_r = (ps->kind == RCPU) ? (Arm64Reg)ps->id : RTMP; + if (ps->kind == RCONST) { + load_immediate(ctx, ps->id, src_r, false); + } else if (ps->kind != RCPU) { + ldr_stack(ctx, src_r, src->stackPos, src->size); + } + + // SXTW Xd, Wn (sign extend word to doubleword) + // Encoding: 0x93407c00 | (Rn << 5) | Rd + EMIT32(ctx, 0x93407c00 | (src_r << 5) | pd->id); + + reg_bind(ctx, dst, pd); + mark_dirty(ctx, dst); + return; + } + + // Case 3: Integer to integer copy (same size or truncation) + preg *ps = fetch(ctx, src); + preg *pd = alloc_cpu(ctx, RCPU); + + if (pd->holds != NULL) + free_reg(ctx, pd); + + Arm64Reg src_r = (ps->kind == RCPU) ? (Arm64Reg)ps->id : RTMP; + if (ps->kind == RCONST) { + load_immediate(ctx, ps->id, src_r, src->size == 8); + } else if (ps->kind != RCPU) { + ldr_stack(ctx, src_r, src->stackPos, src->size); + } + + // MOV Xd, Xn (or MOV Wd, Wn for 32-bit) + int sf = (dst->size == 8) ? 1 : 0; + mov_reg_reg(ctx, pd->id, src_r, sf); + + reg_bind(ctx, dst, pd); + mark_dirty(ctx, dst); +} + +/** + * Convert signed integer to float, or convert between float precisions (OToSFloat) + * Handles: integer -> float (SCVTF), F64 -> F32, F32 -> F64 (FCVT) + */ +static void op_tosfloat(jit_ctx *ctx, vreg *dst, vreg *src) { + // Handle float-to-float precision conversions + if (src->t->kind == HF64 && dst->t->kind == HF32) { + // F64 -> F32: FCVT Sd, Dn + preg *ps = fetch(ctx, src); + preg *pd = alloc_fpu(ctx); + if (pd->holds != NULL) + free_reg(ctx, pd); + + Arm64FpReg src_r = (ps->kind == RFPU) ? (Arm64FpReg)ps->id : V16; + if (ps->kind != RFPU) { + ldr_stack_fp(ctx, src_r, src->stackPos, src->size); + } + + // FCVT Sd, Dn: type=1 (double source), opcode=4 (convert to single) + encode_fp_1src(ctx, 0, 0, 1, 4, src_r, pd->id); + + reg_bind(ctx, dst, pd); + mark_dirty(ctx, dst); + return; + } + + if (src->t->kind == HF32 && dst->t->kind == HF64) { + // F32 -> F64: FCVT Dd, Sn + preg *ps = fetch(ctx, src); + preg *pd = alloc_fpu(ctx); + if (pd->holds != NULL) + free_reg(ctx, pd); + + Arm64FpReg src_r = (ps->kind == RFPU) ? (Arm64FpReg)ps->id : V16; + if (ps->kind != RFPU) { + ldr_stack_fp(ctx, src_r, src->stackPos, src->size); + } + + // FCVT Dd, Sn: type=0 (single source), opcode=5 (convert to double) + encode_fp_1src(ctx, 0, 0, 0, 5, src_r, pd->id); + + reg_bind(ctx, dst, pd); + mark_dirty(ctx, dst); + return; + } + + // Integer to float conversion (original behavior) + preg *ps = fetch(ctx, src); + preg *pd = alloc_fpu(ctx); + + if (pd->holds != NULL) + free_reg(ctx, pd); + + int sf = (src->size == 8) ? 1 : 0; + int type = (dst->t->kind == HF64) ? 0x01 : 0x00; + + // SCVTF Vd, Xn (signed int to float) + encode_int_fcvt(ctx, sf, 0, type, 0x00, 0x02, ps->id, pd->id); + + reg_bind(ctx, dst, pd); + mark_dirty(ctx, dst); +} + +/** + * Convert unsigned integer to float (OToUFloat) + */ +static void op_toufloat(jit_ctx *ctx, vreg *dst, vreg *src) { + preg *ps = fetch(ctx, src); + preg *pd = alloc_fpu(ctx); + + if (pd->holds != NULL) + free_reg(ctx, pd); + + int sf = (src->size == 8) ? 1 : 0; + int type = (dst->t->kind == HF64) ? 0x01 : 0x00; + + // UCVTF Vd, Xn (unsigned int to float) + encode_int_fcvt(ctx, sf, 0, type, 0x00, 0x03, ps->id, pd->id); + + reg_bind(ctx, dst, pd); + mark_dirty(ctx, dst); +} + +// ============================================================================ +// Jump Patching +// ============================================================================ + +/** + * Add a jump to the patch list + * Also mark the target opcode so we know to discard registers when we reach it + */ +static void register_jump(jit_ctx *ctx, int pos, int target) { + jlist *j = (jlist*)malloc(sizeof(jlist)); + j->pos = pos; + j->target = target; + j->next = ctx->jumps; + ctx->jumps = j; + + // Mark target as a jump destination (like x86 does) + // This tells us to discard register bindings when we reach this opcode + if (target > 0 && target < ctx->maxOps && ctx->opsPos[target] == 0) + ctx->opsPos[target] = -1; +} + +/** + * Patch a jump instruction with the correct offset + * AArch64 branches use instruction offsets (divide byte offset by 4) + */ +static void patch_jump(jit_ctx *ctx, int pos, int target_pos) { + unsigned int *code = (unsigned int*)(ctx->startBuf + pos); + int offset = target_pos - pos; // Byte offset + int insn_offset = offset / 4; // Instruction offset + + // Check if this is a conditional branch (B.cond) or unconditional (B) + unsigned int insn = *code; + unsigned int opcode = (insn >> 24) & 0xFF; + + if (opcode == 0x54) { + // B.cond - 19-bit signed offset + // Range: ±1MB (±0x40000 instructions, ±0x100000 bytes) + if (insn_offset < -0x40000 || insn_offset >= 0x40000) { + printf("JIT Error: Conditional branch offset too large: %d\n", insn_offset); + JIT_ASSERT(0); + } + // Clear old offset, set new offset (bits 5-23) + *code = (insn & 0xFF00001F) | ((insn_offset & 0x7FFFF) << 5); + } else if ((opcode & 0xFC) == 0x14) { + // B or BL - 26-bit signed offset + // Range: ±128MB (±0x2000000 instructions, ±0x8000000 bytes) + if (insn_offset < -0x2000000 || insn_offset >= 0x2000000) { + printf("JIT Error: Branch offset too large: %d\n", insn_offset); + JIT_ASSERT(0); + } + // Clear old offset, set new offset (bits 0-25) + *code = (insn & 0xFC000000) | (insn_offset & 0x3FFFFFF); + } else if ((opcode & 0x7E) == 0x34) { + // CBZ/CBNZ - 19-bit signed offset + if (insn_offset < -0x40000 || insn_offset >= 0x40000) { + printf("JIT Error: CBZ/CBNZ offset too large: %d\n", insn_offset); + JIT_ASSERT(0); + } + *code = (insn & 0xFF00001F) | ((insn_offset & 0x7FFFF) << 5); + } else { + printf("JIT Error: Unknown branch instruction at %d: 0x%08X\n", pos, insn); + JIT_ASSERT(0); + } +} + +// ============================================================================ +// Control Flow & Comparisons +// ============================================================================ + +/** + * Map HashLink condition to AArch64 condition code + */ +static ArmCondition hl_cond_to_arm(hl_op op, bool is_float) { + switch (op) { + case OJEq: return COND_EQ; // Equal + case OJNotEq: return COND_NE; // Not equal + case OJSLt: return is_float ? COND_MI : COND_LT; // Signed less than + case OJSGte: return is_float ? COND_PL : COND_GE; // Signed greater or equal + case OJSGt: return COND_GT; // Signed greater than + case OJSLte: return COND_LE; // Signed less or equal + case OJULt: return COND_LO; // Unsigned less than (carry clear) + case OJUGte: return COND_HS; // Unsigned greater or equal (carry set) + // Float NaN-aware comparisons (includes unordered case) + case OJNotLt: return COND_HS; // Not less than (C=1: >=, or unordered) + case OJNotGte: return COND_LT; // Not greater/equal (N!=V: <, or unordered) + default: + JIT_ASSERT(0); + return COND_AL; + } +} + +/** + * Conditional and comparison jumps + * + * Handles special cases for dynamic types: + * - HDYN/HFUN: Call hl_dyn_compare() to compare dynamic values + * - HTYPE: Call hl_same_type() to compare type objects + * - HNULL: Compare boxed values (Null) + * - HVIRTUAL: Compare virtual objects with underlying values + */ +static void op_jump(jit_ctx *ctx, vreg *a, vreg *b, hl_op op, int target_opcode) { + // Spill all registers to stack BEFORE the branch. + // Target label will use discard_regs() and expect values on stack. + spill_regs(ctx); + spill_callee_saved(ctx); // Callee-saved must also be spilled at control flow merge + + // Handle dynamic and function type comparisons + if (a->t->kind == HDYN || b->t->kind == HDYN || a->t->kind == HFUN || b->t->kind == HFUN) { + // Call hl_dyn_compare(a, b) which returns: + // 0 if equal + // negative if a < b + // positive if a > b + // hl_invalid_comparison (0xAABBCCDD) for incomparable types + vreg *args[2] = { a, b }; + int stack_space = prepare_call_args(ctx, NULL, args, 2, true); + + // Load function pointer and call + load_immediate(ctx, (int64_t)hl_dyn_compare, RTMP, true); + EMIT32(ctx, 0xD63F0000 | (RTMP << 5)); // BLR RTMP + + // Clean up stack + if (stack_space > 0) { + encode_add_sub_imm(ctx, 1, 0, 0, 0, stack_space, SP_REG, SP_REG); + } + + // Handle ordered comparisons (OJSLt/OJSGt/OJSLte/OJSGte) - need to check for hl_invalid_comparison + if (op == OJSLt || op == OJSGt || op == OJSLte || op == OJSGte) { + // Compare result with hl_invalid_comparison (0xAABBCCDD) + // If equal, don't take the branch (skip the jump) + load_immediate(ctx, hl_invalid_comparison, RTMP, false); + encode_add_sub_reg(ctx, 0, 1, 1, 0, RTMP, 0, X0, XZR); // CMP W0, WTMP + int skip_pos = BUF_POS(); + encode_branch_cond(ctx, 0, COND_EQ); // B.EQ skip (if invalid comparison) + + // Valid comparison - compare result with 0 for sign flags + encode_add_sub_imm(ctx, 0, 1, 1, 0, 0, X0, XZR); // CMP W0, #0 + ArmCondition cond = hl_cond_to_arm(op, false); + int jump_pos = BUF_POS(); + encode_branch_cond(ctx, 0, cond); + register_jump(ctx, jump_pos, target_opcode); + + // Patch the skip branch to here + int skip_offset = (BUF_POS() - skip_pos) / 4; + *(int*)(ctx->startBuf + skip_pos) = (*(int*)(ctx->startBuf + skip_pos) & 0xFF00001F) | ((skip_offset & 0x7FFFF) << 5); + return; + } + + // For OJEq/OJNotEq: result == 0 means equal + // TST W0, W0 (sets flags based on W0 & W0) + encode_logical_reg(ctx, 0, 0x3, 0, 0, X0, 0, X0, XZR); // ANDS WZR, W0, W0 + + // Branch based on zero flag (only equality ops should reach here) + ArmCondition cond = (op == OJEq) ? COND_EQ : COND_NE; + int jump_pos = BUF_POS(); + encode_branch_cond(ctx, 0, cond); + register_jump(ctx, jump_pos, target_opcode); + return; + } + + // Handle type comparisons + if (a->t->kind == HTYPE) { + // Call hl_same_type(a, b) which returns bool + vreg *args[2] = { a, b }; + int stack_space = prepare_call_args(ctx, NULL, args, 2, true); + + load_immediate(ctx, (int64_t)hl_same_type, RTMP, true); + EMIT32(ctx, 0xD63F0000 | (RTMP << 5)); // BLR RTMP + + if (stack_space > 0) { + encode_add_sub_imm(ctx, 1, 0, 0, 0, stack_space, SP_REG, SP_REG); + } + + // Compare result with 1 (true): CMP W0, #1 = SUBS WZR, W0, #1 + // Note: S=1 is required both to set flags AND to make Rd=31 mean XZR (not SP) + encode_add_sub_imm(ctx, 0, 1, 1, 0, 1, X0, XZR); // CMP W0, #1 + + ArmCondition cond = (op == OJEq) ? COND_EQ : COND_NE; + int jump_pos = BUF_POS(); + encode_branch_cond(ctx, 0, cond); + register_jump(ctx, jump_pos, target_opcode); + return; + } + + // Handle HNULL (Null) comparisons + // HNULL values have their inner value at offset HDYN_VALUE (8) + if (a->t->kind == HNULL) { + preg *pa = fetch(ctx, a); + preg *pb = fetch(ctx, b); + Arm64Reg ra = (pa->kind == RCPU) ? (Arm64Reg)pa->id : RTMP; + Arm64Reg rb = (pb->kind == RCPU) ? (Arm64Reg)pb->id : RTMP2; + if (pa->kind != RCPU) ldr_stack(ctx, ra, a->stackPos, 8); + if (pb->kind != RCPU) ldr_stack(ctx, rb, b->stackPos, 8); + + if (op == OJEq) { + // if (a == b || (a && b && a->v == b->v)) goto target + // First: CMP a, b - if equal, jump to target + encode_add_sub_reg(ctx, 1, 1, 1, 0, rb, 0, ra, XZR); + int jump_pos1 = BUF_POS(); + encode_branch_cond(ctx, 0, COND_EQ); + register_jump(ctx, jump_pos1, target_opcode); + + // If a == NULL, skip (don't jump) + int skip_a = BUF_POS(); + encode_cbz_cbnz(ctx, 1, 0, 0, ra); // CBZ ra, skip + + // If b == NULL, skip (don't jump) + int skip_b = BUF_POS(); + encode_cbz_cbnz(ctx, 1, 0, 0, rb); // CBZ rb, skip + + // Load inner values: a->v and b->v (at offset HDYN_VALUE) + encode_ldr_str_imm(ctx, 0x03, 0, 0x01, HDYN_VALUE / 8, ra, ra); // LDR ra, [ra, #8] + encode_ldr_str_imm(ctx, 0x03, 0, 0x01, HDYN_VALUE / 8, rb, rb); // LDR rb, [rb, #8] + + // Compare inner values + encode_add_sub_reg(ctx, 1, 1, 1, 0, rb, 0, ra, XZR); + int jump_pos2 = BUF_POS(); + encode_branch_cond(ctx, 0, COND_EQ); + register_jump(ctx, jump_pos2, target_opcode); + + // Patch skip branches to here + int here = BUF_POS(); + int off_a = (here - skip_a) / 4; + int off_b = (here - skip_b) / 4; + *(int*)(ctx->startBuf + skip_a) = (*(int*)(ctx->startBuf + skip_a) & 0xFF00001F) | ((off_a & 0x7FFFF) << 5); + *(int*)(ctx->startBuf + skip_b) = (*(int*)(ctx->startBuf + skip_b) & 0xFF00001F) | ((off_b & 0x7FFFF) << 5); + } else if (op == OJNotEq) { + // if (a != b && (!a || !b || a->v != b->v)) goto target + // First: CMP a, b - if equal, skip entirely + encode_add_sub_reg(ctx, 1, 1, 1, 0, rb, 0, ra, XZR); + int skip_eq = BUF_POS(); + encode_branch_cond(ctx, 0, COND_EQ); // B.EQ skip (a == b means not-not-equal) + + // If a == NULL, goto target (NULL != non-NULL) + int jump_a = BUF_POS(); + encode_cbz_cbnz(ctx, 1, 0, 0, ra); // CBZ ra, target + register_jump(ctx, jump_a, target_opcode); + + // If b == NULL, goto target (non-NULL != NULL) + int jump_b = BUF_POS(); + encode_cbz_cbnz(ctx, 1, 0, 0, rb); // CBZ rb, target + register_jump(ctx, jump_b, target_opcode); + + // Load inner values + encode_ldr_str_imm(ctx, 0x03, 0, 0x01, HDYN_VALUE / 8, ra, ra); + encode_ldr_str_imm(ctx, 0x03, 0, 0x01, HDYN_VALUE / 8, rb, rb); + + // Compare inner values - if not equal, goto target + encode_add_sub_reg(ctx, 1, 1, 1, 0, rb, 0, ra, XZR); + int skip_cmp = BUF_POS(); + encode_branch_cond(ctx, 0, COND_EQ); // B.EQ skip (values equal, don't jump) + + // Values not equal - jump to target + int jump_ne = BUF_POS(); + encode_branch_uncond(ctx, 0); + register_jump(ctx, jump_ne, target_opcode); + + // Patch skip branches + int here = BUF_POS(); + int off_eq = (here - skip_eq) / 4; + int off_cmp = (here - skip_cmp) / 4; + *(int*)(ctx->startBuf + skip_eq) = (*(int*)(ctx->startBuf + skip_eq) & 0xFF00001F) | ((off_eq & 0x7FFFF) << 5); + *(int*)(ctx->startBuf + skip_cmp) = (*(int*)(ctx->startBuf + skip_cmp) & 0xFF00001F) | ((off_cmp & 0x7FFFF) << 5); + } else { + jit_error("Unsupported comparison op for HNULL"); + } + return; + } + + // Handle HVIRTUAL comparisons + // Virtual objects have a 'value' pointer at offset HL_WSIZE (8) + if (a->t->kind == HVIRTUAL) { + preg *pa = fetch(ctx, a); + preg *pb = fetch(ctx, b); + Arm64Reg ra = (pa->kind == RCPU) ? (Arm64Reg)pa->id : RTMP; + Arm64Reg rb = (pb->kind == RCPU) ? (Arm64Reg)pb->id : RTMP2; + if (pa->kind != RCPU) ldr_stack(ctx, ra, a->stackPos, 8); + if (pb->kind != RCPU) ldr_stack(ctx, rb, b->stackPos, 8); + + if (b->t->kind == HOBJ) { + // Comparing virtual to object: compare a->value with b + if (op == OJEq) { + // if (a ? (b && a->value == b) : (b == NULL)) goto target + int ja = BUF_POS(); + encode_cbz_cbnz(ctx, 1, 0, 0, ra); // CBZ ra, check_b_null + + // a != NULL: check if b != NULL and a->value == b + int jb = BUF_POS(); + encode_cbz_cbnz(ctx, 1, 0, 0, rb); // CBZ rb, skip (a!=NULL, b==NULL: not equal) + + // Load a->value and compare with b + encode_ldr_str_imm(ctx, 0x03, 0, 0x01, HL_WSIZE / 8, ra, ra); // LDR ra, [ra, #8] + encode_add_sub_reg(ctx, 1, 1, 1, 0, rb, 0, ra, XZR); + int jvalue = BUF_POS(); + encode_branch_uncond(ctx, 0); // B to_cmp + + // a == NULL: check if b == NULL + int here_ja = BUF_POS(); + int off_ja = (here_ja - ja) / 4; + *(int*)(ctx->startBuf + ja) = (*(int*)(ctx->startBuf + ja) & 0xFF00001F) | ((off_ja & 0x7FFFF) << 5); + encode_add_sub_reg(ctx, 1, 1, 1, 0, rb, 0, XZR, XZR); // CMP rb, #0 (TST rb) + + // Patch jvalue to here (to_cmp) + int here_jv = BUF_POS(); + int off_jv = (here_jv - jvalue) / 4; + *(int*)(ctx->startBuf + jvalue) = 0x14000000 | (off_jv & 0x3FFFFFF); + + // Now flags are set - branch if equal + int jump_pos = BUF_POS(); + encode_branch_cond(ctx, 0, COND_EQ); + register_jump(ctx, jump_pos, target_opcode); + + // Patch jb to skip + int here_jb = BUF_POS(); + int off_jb = (here_jb - jb) / 4; + *(int*)(ctx->startBuf + jb) = (*(int*)(ctx->startBuf + jb) & 0xFF00001F) | ((off_jb & 0x7FFFF) << 5); + } else if (op == OJNotEq) { + // if (a ? (b == NULL || a->value != b) : (b != NULL)) goto target + int ja = BUF_POS(); + encode_cbz_cbnz(ctx, 1, 0, 0, ra); // CBZ ra, check_b_notnull + + // a != NULL: jump if b == NULL + int jump_b_null = BUF_POS(); + encode_cbz_cbnz(ctx, 1, 0, 0, rb); // CBZ rb, target + register_jump(ctx, jump_b_null, target_opcode); + + // Load a->value and compare with b + encode_ldr_str_imm(ctx, 0x03, 0, 0x01, HL_WSIZE / 8, ra, ra); + encode_add_sub_reg(ctx, 1, 1, 1, 0, rb, 0, ra, XZR); + int jvalue = BUF_POS(); + encode_branch_uncond(ctx, 0); // B to_cmp + + // a == NULL: check if b != NULL + int here_ja = BUF_POS(); + int off_ja = (here_ja - ja) / 4; + *(int*)(ctx->startBuf + ja) = (*(int*)(ctx->startBuf + ja) & 0xFF00001F) | ((off_ja & 0x7FFFF) << 5); + encode_add_sub_reg(ctx, 1, 1, 1, 0, rb, 0, XZR, XZR); // CMP rb, #0 + + // Patch jvalue + int here_jv = BUF_POS(); + int off_jv = (here_jv - jvalue) / 4; + *(int*)(ctx->startBuf + jvalue) = 0x14000000 | (off_jv & 0x3FFFFFF); + + // Branch if not equal + int jump_pos = BUF_POS(); + encode_branch_cond(ctx, 0, COND_NE); + register_jump(ctx, jump_pos, target_opcode); + } else { + jit_error("Unsupported comparison op for HVIRTUAL vs HOBJ"); + } + return; + } + + // Both are HVIRTUAL - compare underlying values + if (op == OJEq) { + // if (a == b || (a && b && a->value && b->value && a->value == b->value)) goto + encode_add_sub_reg(ctx, 1, 1, 1, 0, rb, 0, ra, XZR); + int jump_eq = BUF_POS(); + encode_branch_cond(ctx, 0, COND_EQ); + register_jump(ctx, jump_eq, target_opcode); + + // Check a != NULL + int skip_a = BUF_POS(); + encode_cbz_cbnz(ctx, 1, 0, 0, ra); + // Check b != NULL + int skip_b = BUF_POS(); + encode_cbz_cbnz(ctx, 1, 0, 0, rb); + + // Load a->value + encode_ldr_str_imm(ctx, 0x03, 0, 0x01, HL_WSIZE / 8, ra, ra); + int skip_av = BUF_POS(); + encode_cbz_cbnz(ctx, 1, 0, 0, ra); // CBZ if a->value == NULL + + // Load b->value + encode_ldr_str_imm(ctx, 0x03, 0, 0x01, HL_WSIZE / 8, rb, rb); + int skip_bv = BUF_POS(); + encode_cbz_cbnz(ctx, 1, 0, 0, rb); // CBZ if b->value == NULL + + // Compare values + encode_add_sub_reg(ctx, 1, 1, 1, 0, rb, 0, ra, XZR); + int jump_val = BUF_POS(); + encode_branch_cond(ctx, 0, COND_EQ); + register_jump(ctx, jump_val, target_opcode); + + // Patch all skips to here + int here = BUF_POS(); + int patches[] = { skip_a, skip_b, skip_av, skip_bv }; + for (int i = 0; i < 4; i++) { + int off = (here - patches[i]) / 4; + *(int*)(ctx->startBuf + patches[i]) = (*(int*)(ctx->startBuf + patches[i]) & 0xFF00001F) | ((off & 0x7FFFF) << 5); + } + } else if (op == OJNotEq) { + // if (a != b && (!a || !b || !a->value || !b->value || a->value != b->value)) goto + encode_add_sub_reg(ctx, 1, 1, 1, 0, rb, 0, ra, XZR); + int skip_eq = BUF_POS(); + encode_branch_cond(ctx, 0, COND_EQ); // Skip if a == b + + // If a == NULL, jump + int jump_a = BUF_POS(); + encode_cbz_cbnz(ctx, 1, 0, 0, ra); + register_jump(ctx, jump_a, target_opcode); + + // If b == NULL, jump + int jump_b = BUF_POS(); + encode_cbz_cbnz(ctx, 1, 0, 0, rb); + register_jump(ctx, jump_b, target_opcode); + + // Load a->value + encode_ldr_str_imm(ctx, 0x03, 0, 0x01, HL_WSIZE / 8, ra, ra); + int jump_av = BUF_POS(); + encode_cbz_cbnz(ctx, 1, 0, 0, ra); + register_jump(ctx, jump_av, target_opcode); + + // Load b->value + encode_ldr_str_imm(ctx, 0x03, 0, 0x01, HL_WSIZE / 8, rb, rb); + int jump_bv = BUF_POS(); + encode_cbz_cbnz(ctx, 1, 0, 0, rb); + register_jump(ctx, jump_bv, target_opcode); + + // Compare - if not equal, jump + encode_add_sub_reg(ctx, 1, 1, 1, 0, rb, 0, ra, XZR); + int skip_val = BUF_POS(); + encode_branch_cond(ctx, 0, COND_EQ); + + // Not equal - jump to target + int jump_ne = BUF_POS(); + encode_branch_uncond(ctx, 0); + register_jump(ctx, jump_ne, target_opcode); + + // Patch skips + int here = BUF_POS(); + int off_eq = (here - skip_eq) / 4; + int off_val = (here - skip_val) / 4; + *(int*)(ctx->startBuf + skip_eq) = (*(int*)(ctx->startBuf + skip_eq) & 0xFF00001F) | ((off_eq & 0x7FFFF) << 5); + *(int*)(ctx->startBuf + skip_val) = (*(int*)(ctx->startBuf + skip_val) & 0xFF00001F) | ((off_val & 0x7FFFF) << 5); + } else { + jit_error("Unsupported comparison op for HVIRTUAL"); + } + return; + } + + // Handle HOBJ/HSTRUCT vs HVIRTUAL (swap operands) + if ((a->t->kind == HOBJ || a->t->kind == HSTRUCT) && b->t->kind == HVIRTUAL) { + // Swap and recurse - the HVIRTUAL case handles HOBJ on the right + op_jump(ctx, b, a, op, target_opcode); + return; + } + + // Handle String EQUALITY comparison (value-based per Haxe spec) + // hl_str_cmp only returns 0 (equal) or 1 (not equal), so it can only be used + // for OJEq/OJNotEq. For ordered comparisons, fall through to compareFun path. + if ((op == OJEq || op == OJNotEq) && is_string_type(a->t) && is_string_type(b->t)) { + // Spill before call + spill_regs(ctx); + spill_callee_saved(ctx); + + // Call hl_str_cmp(a, b) - returns 0 if equal, non-zero if not equal + vreg *args[2] = { a, b }; + int stack_space = prepare_call_args(ctx, NULL, args, 2, true); + load_immediate(ctx, (int64_t)hl_str_cmp, RTMP, true); + EMIT32(ctx, 0xD63F0000 | (RTMP << 5)); // BLR RTMP + if (stack_space > 0) { + encode_add_sub_imm(ctx, 1, 0, 0, 0, stack_space, SP_REG, SP_REG); + } + + // Result in X0: 0 = equal, non-zero = not equal + // TST X0, X0 sets Z flag (Z=1 if X0==0) + encode_logical_reg(ctx, 1, 0x3, 0, 0, X0, 0, X0, XZR); // TST X0, X0 + + // Branch based on op (only EQ or NE) + ArmCondition cond = (op == OJEq) ? COND_EQ : COND_NE; + int jump_pos = BUF_POS(); + encode_branch_cond(ctx, 0, cond); + register_jump(ctx, jump_pos, target_opcode); + return; + } + + // Handle HOBJ/HSTRUCT with compareFun (e.g., String) + // Use hl_get_obj_rt() to ensure runtime object is initialized (like x86 does) + // NOTE: compareFun is a FUNCTION INDEX, not a function pointer! + if ((a->t->kind == HOBJ || a->t->kind == HSTRUCT) && hl_get_obj_rt(a->t)->compareFun) { + int compareFunIndex = (int)(int_val)hl_get_obj_rt(a->t)->compareFun; + preg *pa = fetch(ctx, a); + preg *pb = fetch(ctx, b); + Arm64Reg ra = (pa->kind == RCPU) ? (Arm64Reg)pa->id : RTMP; + Arm64Reg rb = (pb->kind == RCPU) ? (Arm64Reg)pb->id : RTMP2; + if (pa->kind != RCPU) ldr_stack(ctx, ra, a->stackPos, 8); + if (pb->kind != RCPU) ldr_stack(ctx, rb, b->stackPos, 8); + + if (op == OJEq) { + // if (a == b || (a && b && cmp(a,b) == 0)) goto target + // First check pointer equality + encode_add_sub_reg(ctx, 1, 1, 1, 0, rb, 0, ra, XZR); // CMP ra, rb + int jump_eq = BUF_POS(); + encode_branch_cond(ctx, 0, COND_EQ); + register_jump(ctx, jump_eq, target_opcode); + + // If a == NULL, skip + int skip_a = BUF_POS(); + encode_cbz_cbnz(ctx, 1, 0, 0, ra); + + // If b == NULL, skip + int skip_b = BUF_POS(); + encode_cbz_cbnz(ctx, 1, 0, 0, rb); + + // Call compareFun(a, b) - compareFunIndex is a function index, not a pointer! + vreg *args[2] = { a, b }; + int stack_space = prepare_call_args(ctx, NULL, args, 2, true); + emit_call_findex(ctx, compareFunIndex, stack_space); + + // If result == 0, goto target + encode_logical_reg(ctx, 0, 0x3, 0, 0, X0, 0, X0, XZR); // TST W0, W0 + int skip_cmp = BUF_POS(); + encode_branch_cond(ctx, 0, COND_NE); // Skip if result != 0 + + // Jump to target + int jump_target = BUF_POS(); + encode_branch_uncond(ctx, 0); + register_jump(ctx, jump_target, target_opcode); + + // Patch all skips to here + int here = BUF_POS(); + int patches[] = { skip_a, skip_b, skip_cmp }; + for (int i = 0; i < 3; i++) { + int off = (here - patches[i]) / 4; + *(int*)(ctx->startBuf + patches[i]) = (*(int*)(ctx->startBuf + patches[i]) & 0xFF00001F) | ((off & 0x7FFFF) << 5); + } + } else if (op == OJNotEq) { + // if (a != b && (!a || !b || cmp(a,b) != 0)) goto target + // First check pointer equality - if equal, skip entirely + encode_add_sub_reg(ctx, 1, 1, 1, 0, rb, 0, ra, XZR); // CMP ra, rb + int skip_eq = BUF_POS(); + encode_branch_cond(ctx, 0, COND_EQ); + + // If a == NULL, goto target + int jump_a = BUF_POS(); + encode_cbz_cbnz(ctx, 1, 0, 0, ra); + register_jump(ctx, jump_a, target_opcode); + + // If b == NULL, goto target + int jump_b = BUF_POS(); + encode_cbz_cbnz(ctx, 1, 0, 0, rb); + register_jump(ctx, jump_b, target_opcode); + + // Call compareFun(a, b) - compareFunIndex is a function index, not a pointer! + vreg *args[2] = { a, b }; + int stack_space = prepare_call_args(ctx, NULL, args, 2, true); + emit_call_findex(ctx, compareFunIndex, stack_space); + + // If result != 0, goto target + encode_logical_reg(ctx, 0, 0x3, 0, 0, X0, 0, X0, XZR); // TST W0, W0 + int skip_cmp = BUF_POS(); + encode_branch_cond(ctx, 0, COND_EQ); // Skip if result == 0 + + // Jump to target + int jump_target = BUF_POS(); + encode_branch_uncond(ctx, 0); + register_jump(ctx, jump_target, target_opcode); + + // Patch skips to here + int here = BUF_POS(); + int off_eq = (here - skip_eq) / 4; + int off_cmp = (here - skip_cmp) / 4; + *(int*)(ctx->startBuf + skip_eq) = (*(int*)(ctx->startBuf + skip_eq) & 0xFF00001F) | ((off_eq & 0x7FFFF) << 5); + *(int*)(ctx->startBuf + skip_cmp) = (*(int*)(ctx->startBuf + skip_cmp) & 0xFF00001F) | ((off_cmp & 0x7FFFF) << 5); + } else { + // For OJSGt, OJSGte, OJSLt, OJSLte: if (a && b && cmp(a,b) ?? 0) goto + int skip_a = BUF_POS(); + encode_cbz_cbnz(ctx, 1, 0, 0, ra); + + int skip_b = BUF_POS(); + encode_cbz_cbnz(ctx, 1, 0, 0, rb); + + // Call compareFun(a, b) - compareFunIndex is a function index, not a pointer! + vreg *args[2] = { a, b }; + int stack_space = prepare_call_args(ctx, NULL, args, 2, true); + emit_call_findex(ctx, compareFunIndex, stack_space); + + // Compare result with 0: CMP W0, #0 + encode_add_sub_imm(ctx, 0, 1, 1, 0, 0, X0, XZR); // CMP W0, #0 + + // Branch based on condition + ArmCondition cond = hl_cond_to_arm(op, false); + int jump_pos = BUF_POS(); + encode_branch_cond(ctx, 0, cond); + register_jump(ctx, jump_pos, target_opcode); + + // Patch skips to here + int here = BUF_POS(); + int off_a = (here - skip_a) / 4; + int off_b = (here - skip_b) / 4; + *(int*)(ctx->startBuf + skip_a) = (*(int*)(ctx->startBuf + skip_a) & 0xFF00001F) | ((off_a & 0x7FFFF) << 5); + *(int*)(ctx->startBuf + skip_b) = (*(int*)(ctx->startBuf + skip_b) & 0xFF00001F) | ((off_b & 0x7FFFF) << 5); + } + return; + } + + // Standard comparison for other types + bool is_float = IS_FLOAT(a); + preg *pa = fetch(ctx, a); + preg *pb = fetch(ctx, b); + + if (is_float) { + // Floating-point comparison: FCMP Vn, Vm + int type = (a->t->kind == HF64) ? 0x01 : 0x00; + encode_fp_compare(ctx, 0, 0, type, pb->id, 0, pa->id); + } else { + // Integer comparison: CMP Xn, Xm (implemented as SUBS XZR, Xn, Xm) + int sf = (a->size == 8) ? 1 : 0; + encode_add_sub_reg(ctx, sf, 1, 1, 0, pb->id, 0, pa->id, XZR); + } + + // Emit conditional branch + ArmCondition cond = hl_cond_to_arm(op, is_float); + int jump_pos = BUF_POS(); + encode_branch_cond(ctx, 0, cond); // Offset will be patched later + + // Register for patching + register_jump(ctx, jump_pos, target_opcode); +} + +/** + * Simple conditional jumps (OJTrue, OJFalse, OJNull, OJNotNull) + */ +static void op_jcond(jit_ctx *ctx, vreg *a, hl_op op, int target_opcode) { + // Spill all registers to stack BEFORE the branch. + // Target label will use discard_regs() and expect values on stack. + spill_regs(ctx); + spill_callee_saved(ctx); // Callee-saved must also be spilled at control flow merge + + preg *pa = fetch(ctx, a); + int jump_pos = BUF_POS(); + + // Determine which condition to test + bool test_zero = (op == OJFalse || op == OJNull); + + // Use CBZ (compare and branch if zero) or CBNZ (compare and branch if non-zero) + int sf = (a->size == 8) ? 1 : 0; + int op_bit = test_zero ? 0 : 1; // 0=CBZ, 1=CBNZ + + encode_cbz_cbnz(ctx, sf, op_bit, 0, pa->id); // Offset will be patched + + // Register for patching + register_jump(ctx, jump_pos, target_opcode); +} + +/** + * Unconditional jump (OJAlways) + */ +static void op_jalways(jit_ctx *ctx, int target_opcode) { + // Spill all registers to stack BEFORE the branch. + // Target label will use discard_regs() and expect values on stack. + spill_regs(ctx); + spill_callee_saved(ctx); // Callee-saved must also be spilled at control flow merge + + int jump_pos = BUF_POS(); + encode_branch_uncond(ctx, 0); // Offset will be patched + + // Register for patching + register_jump(ctx, jump_pos, target_opcode); +} + +/** + * Discard all register bindings at merge points (labels). + * + * Used at labels where control flow can come from multiple paths. + * Clears register↔vreg bindings so subsequent operations load from stack. + * + * With dirty tracking: If reached via fallthrough (not a jump), registers + * might still be dirty and need to be spilled first. Registers reached via + * jump are already clean because spill_regs() is called before all jumps. + */ +static void discard_regs(jit_ctx *ctx) { + int i; + // Handle CPU scratch registers (X0-X17) + // NOTE: This function must NOT emit any code! + // At labels, spill_regs() + spill_callee_saved() is called BEFORE this (for fallthrough). + // We just clear bindings here - values are already on stack. + for (i = 0; i < 18; i++) { + preg *r = &ctx->pregs[i]; + if (r->holds) { + r->holds->dirty = 0; + r->holds->current = NULL; + r->holds = NULL; + } + } + // Handle callee-saved CPU registers (X19-X26) + // At merge points, callee-saved must also be discarded for consistent state + for (i = 0; i < RCPU_CALLEE_ALLOC_COUNT; i++) { + preg *r = REG_AT(RCPU_CALLEE_ALLOC[i]); + if (r->holds) { + r->holds->dirty = 0; + r->holds->current = NULL; + r->holds = NULL; + } + } + // Handle ALL FPU registers (V0-V31) at merge points + // At labels, control flow may come from different paths with different allocations + for (i = 0; i < RFPU_COUNT; i++) { + preg *r = &ctx->pregs[RCPU_COUNT + i]; + if (r->holds) { + r->holds->dirty = 0; + r->holds->current = NULL; + r->holds = NULL; + } + } +} + +/** + * Label marker (OLabel) - just records position for jump targets + * At a label, control flow could come from multiple places, + * so we must invalidate all register associations. + * + * IMPORTANT: No code is emitted here! The main loop calls spill_regs() + * BEFORE this function for the fallthrough path. Jump paths have already + * spilled before jumping. We just clear bindings so subsequent ops + * load from stack. + */ +static void op_label(jit_ctx *ctx) { + // Just clear bindings - spill_regs() was already called in main loop + discard_regs(ctx); +} + +// ============================================================================ +// Memory Operations +// ============================================================================ + +/* + * Load byte/halfword/word from memory + * OGetI8/OGetI16/OGetI32: dst = *(type*)(base + offset) + */ +static void op_get_mem(jit_ctx *ctx, vreg *dst, vreg *base, int offset, int size) { + preg *base_reg = fetch(ctx, base); + preg *dst_reg = alloc_dst(ctx, dst); + + Arm64Reg base_r = (base_reg->kind == RCPU) ? (Arm64Reg)base_reg->id : RTMP; + if (base_reg->kind != RCPU) { + ldr_stack(ctx, base_r, base->stackPos, base->size); + } + + // Handle float and integer cases separately + if (IS_FLOAT(dst)) { + // Float: load into FPU register + Arm64FpReg dst_r = (dst_reg->kind == RFPU) ? (Arm64FpReg)dst_reg->id : V16; + int size_bits = (size == 8) ? 0x03 : 0x02; // D or S + + if (offset >= 0 && offset < (1 << 12) * size) { + int imm12 = offset / size; + encode_ldr_str_imm(ctx, size_bits, 1, 0x01, imm12, base_r, dst_r); // V=1 for FP + } else { + load_immediate(ctx, offset, RTMP2, false); + encode_add_sub_reg(ctx, 1, 0, 0, 0, RTMP2, 0, base_r, RTMP); + encode_ldr_str_imm(ctx, size_bits, 1, 0x01, 0, RTMP, dst_r); // V=1 for FP + } + + str_stack_fp(ctx, dst_r, dst->stackPos, dst->size); + } else { + // Integer/pointer: load into CPU register + // Use RTMP2 as temp (not RTMP) because str_stack's fallback uses RTMP internally. + // If we loaded into RTMP, str_stack would clobber the value. + Arm64Reg dst_r = (dst_reg->kind == RCPU) ? (Arm64Reg)dst_reg->id : RTMP2; + + // Load with offset + // LDR Xd, [Xn, #offset] or LDRB/LDRH for smaller sizes + if (offset >= 0 && offset < (1 << 12) * size) { + // Fits in immediate offset + int imm12 = offset / size; + // size: 1=LDRB, 2=LDRH, 4=LDR(W), 8=LDR(X) + int size_bits = (size == 1) ? 0x00 : (size == 2) ? 0x01 : (size == 4) ? 0x02 : 0x03; + encode_ldr_str_imm(ctx, size_bits, 0, 0x01, imm12, base_r, dst_r); + } else { + // Offset too large - compute effective address in RTMP, then load into dst_r + load_immediate(ctx, offset, RTMP2, false); + encode_add_sub_reg(ctx, 1, 0, 0, 0, RTMP2, 0, base_r, RTMP); + // LDR dst_r, [RTMP] + int size_bits = (size == 1) ? 0x00 : (size == 2) ? 0x01 : (size == 4) ? 0x02 : 0x03; + encode_ldr_str_imm(ctx, size_bits, 0, 0x01, 0, RTMP, dst_r); + } + + // Always store to stack - it's the source of truth for later loads + // (registers may be clobbered by subsequent calls) + str_stack(ctx, dst_r, dst->stackPos, dst->size); + } + + // Release the base register - discard() will store if dirty + discard(ctx, base_reg); +} + +/* + * Store byte/halfword/word to memory + * OSetI8/OSetI16/OSetI32: *(type*)(base + offset) = value + */ +static void op_set_mem(jit_ctx *ctx, vreg *base, int offset, vreg *value, int size) { + preg *base_reg = fetch(ctx, base); + preg *value_reg = fetch(ctx, value); + + /* + * IMPORTANT: Load value FIRST, then base. + * ldr_stack's fallback path uses RTMP internally, so if we load base into RTMP + * first, then load value from stack, RTMP would get clobbered. + * By loading value first (into RTMP2 or FPU reg), any RTMP usage is harmless. + * Then we load base into RTMP, which is safe since value is already loaded. + */ + + // Handle float and integer cases separately + if (IS_FLOAT(value)) { + // Float: load value first into FPU register + Arm64FpReg value_r = (value_reg->kind == RFPU) ? (Arm64FpReg)value_reg->id : V16; + if (value_reg->kind != RFPU) { + ldr_stack_fp(ctx, value_r, value->stackPos, value->size); + } + + // Now load base (safe - value is already in FPU reg) + Arm64Reg base_r = (base_reg->kind == RCPU) ? (Arm64Reg)base_reg->id : RTMP; + if (base_reg->kind != RCPU) { + ldr_stack(ctx, base_r, base->stackPos, base->size); + } + + int size_bits = (size == 8) ? 0x03 : 0x02; // D or S + + if (offset >= 0 && offset < (1 << 12) * size) { + int imm12 = offset / size; + encode_ldr_str_imm(ctx, size_bits, 1, 0x00, imm12, base_r, value_r); // V=1 for FP + } else { + load_immediate(ctx, offset, RTMP2, false); + encode_add_sub_reg(ctx, 1, 0, 0, 0, RTMP2, 0, base_r, RTMP); + encode_ldr_str_imm(ctx, size_bits, 1, 0x00, 0, RTMP, value_r); // V=1 for FP + } + } else { + // Integer/pointer: load value first into CPU register + Arm64Reg value_r = (value_reg->kind == RCPU) ? (Arm64Reg)value_reg->id : RTMP2; + if (value_reg->kind == RCONST) { + load_immediate(ctx, value_reg->id, value_r, value->size == 8); + } else if (value_reg->kind != RCPU) { + ldr_stack(ctx, value_r, value->stackPos, value->size); + } + + // Now load base (safe - value is already in RTMP2 or CPU reg) + Arm64Reg base_r = (base_reg->kind == RCPU) ? (Arm64Reg)base_reg->id : RTMP; + if (base_reg->kind != RCPU) { + ldr_stack(ctx, base_r, base->stackPos, base->size); + } + + // Store with offset + // STR Xd, [Xn, #offset] or STRB/STRH for smaller sizes + if (offset >= 0 && offset < (1 << 12) * size) { + // Fits in immediate offset + int imm12 = offset / size; + int size_bits = (size == 1) ? 0x00 : (size == 2) ? 0x01 : (size == 4) ? 0x02 : 0x03; + encode_ldr_str_imm(ctx, size_bits, 0, 0x00, imm12, base_r, value_r); + } else { + // Offset too large - load offset to temp register + if (value_r == RTMP2) { + // Value is already in RTMP2, use a different temp + load_immediate(ctx, offset, X9, false); + encode_add_sub_reg(ctx, 1, 0, 0, 0, X9, 0, base_r, RTMP); + } else { + load_immediate(ctx, offset, RTMP2, false); + encode_add_sub_reg(ctx, 1, 0, 0, 0, RTMP2, 0, base_r, RTMP); + } + // STR value_r, [RTMP] + int size_bits = (size == 1) ? 0x00 : (size == 2) ? 0x01 : (size == 4) ? 0x02 : 0x03; + encode_ldr_str_imm(ctx, size_bits, 0, 0x00, 0, RTMP, value_r); + } + } + + discard(ctx, base_reg); + discard(ctx, value_reg); +} + +/* + * Load byte/halfword/word from memory with register offset + * OGetI8/OGetI16/OGetMem: dst = *(type*)(base + offset_reg) + * Unlike op_get_mem which takes an immediate offset, this takes an offset vreg + */ +/* + * IMPORTANT: We must load offset BEFORE base when base uses RTMP, + * because ldr_stack's fallback path uses RTMP as a temporary. + * Order: offset -> base -> compute address -> load + */ +static void op_get_mem_reg(jit_ctx *ctx, vreg *dst, vreg *base, vreg *offset, int size) { + preg *base_reg = fetch(ctx, base); + preg *offset_reg = fetch(ctx, offset); + preg *dst_reg = alloc_dst(ctx, dst); + + // Step 1: Load offset FIRST (may clobber RTMP in fallback, but we haven't used it yet) + Arm64Reg offset_r = (offset_reg->kind == RCPU) ? (Arm64Reg)offset_reg->id : RTMP2; + if (offset_reg->kind == RCONST) { + load_immediate(ctx, offset_reg->id, offset_r, false); + } else if (offset_reg->kind != RCPU) { + ldr_stack(ctx, offset_r, offset->stackPos, offset->size); + } + + // Step 2: Load base (if it needs RTMP, the value will stay in RTMP) + Arm64Reg base_r = (base_reg->kind == RCPU) ? (Arm64Reg)base_reg->id : RTMP; + if (base_reg->kind != RCPU) { + ldr_stack(ctx, base_r, base->stackPos, base->size); + } + + // Step 3: Compute effective address: RTMP = base + offset + encode_add_sub_reg(ctx, 1, 0, 0, 0, offset_r, 0, base_r, RTMP); + + // Load from [RTMP] - handle float vs integer types + int size_bits = (size == 1) ? 0x00 : (size == 2) ? 0x01 : (size == 4) ? 0x02 : 0x03; + + if (IS_FLOAT(dst)) { + // Float load: use FPU register and V=1 + Arm64FpReg dst_fp = (dst_reg->kind == RFPU) ? (Arm64FpReg)dst_reg->id : V16; + if (dst_fp == V16) { + preg *pv16 = PVFPR(16); + if (pv16->holds != NULL) free_reg(ctx, pv16); + } + encode_ldr_str_imm(ctx, size_bits, 1, 0x01, 0, RTMP, dst_fp); // V=1 for FP + str_stack_fp(ctx, dst_fp, dst->stackPos, dst->size); + } else { + // Integer load + Arm64Reg dst_r = (dst_reg->kind == RCPU) ? (Arm64Reg)dst_reg->id : X9; + if (dst_r == X9) { + preg *px9 = &ctx->pregs[X9]; + if (px9->holds != NULL) free_reg(ctx, px9); + } + encode_ldr_str_imm(ctx, size_bits, 0, 0x01, 0, RTMP, dst_r); + // For byte/halfword loads, the result is zero-extended automatically by LDRB/LDRH + str_stack(ctx, dst_r, dst->stackPos, dst->size); + } + + discard(ctx, base_reg); + discard(ctx, offset_reg); +} + +/* + * Store byte/halfword/word to memory with register offset + * OSetI8/OSetI16/OSetMem: *(type*)(base + offset_reg) = value + * Unlike op_set_mem which takes an immediate offset, this takes an offset vreg + * + * IMPORTANT: We must load the value BEFORE computing the address in RTMP, + * because ldr_stack's fallback path for large/unaligned offsets uses RTMP + * as a temporary register. + */ +static void op_set_mem_reg(jit_ctx *ctx, vreg *base, vreg *offset, vreg *value, int size) { + preg *base_reg = fetch(ctx, base); + preg *offset_reg = fetch(ctx, offset); + preg *value_reg = fetch(ctx, value); + + int size_bits = (size == 1) ? 0x00 : (size == 2) ? 0x01 : (size == 4) ? 0x02 : 0x03; + + // Step 1: Load value FIRST (before using RTMP for address computation) + // ldr_stack's fallback path uses RTMP, so we must do this before RTMP holds the address + Arm64FpReg value_fp = V16; + Arm64Reg value_r = X9; + + if (IS_FLOAT(value)) { + value_fp = (value_reg->kind == RFPU) ? (Arm64FpReg)value_reg->id : V16; + if (value_reg->kind != RFPU) { + // Ensure V16 is free before using it + if (value_fp == V16) { + preg *pv16 = PVFPR(16); + if (pv16->holds != NULL) free_reg(ctx, pv16); + } + ldr_stack_fp(ctx, value_fp, value->stackPos, value->size); + } + } else { + value_r = (value_reg->kind == RCPU) ? (Arm64Reg)value_reg->id : X9; + if (value_reg->kind == RCONST) { + // Ensure X9 is free if we are using it + if (value_r == X9) { + preg *px9 = &ctx->pregs[X9]; + if (px9->holds != NULL) free_reg(ctx, px9); + } + load_immediate(ctx, value_reg->id, value_r, value->size == 8); + } else if (value_reg->kind != RCPU) { + // Ensure X9 is free if we are using it + if (value_r == X9) { + preg *px9 = &ctx->pregs[X9]; + if (px9->holds != NULL) free_reg(ctx, px9); + } + ldr_stack(ctx, value_r, value->stackPos, value->size); + } + } + + // Step 2: Load base and offset (these may also use RTMP in fallback, but that's ok + // since we compute the final address in RTMP at the end) + Arm64Reg base_r = (base_reg->kind == RCPU) ? (Arm64Reg)base_reg->id : RTMP; + if (base_reg->kind != RCPU) { + ldr_stack(ctx, base_r, base->stackPos, base->size); + } + + Arm64Reg offset_r = (offset_reg->kind == RCPU) ? (Arm64Reg)offset_reg->id : RTMP2; + if (offset_reg->kind == RCONST) { + load_immediate(ctx, offset_reg->id, offset_r, false); + } else if (offset_reg->kind != RCPU) { + ldr_stack(ctx, offset_r, offset->stackPos, offset->size); + } + + // Step 3: Compute effective address: RTMP = base + offset + encode_add_sub_reg(ctx, 1, 0, 0, 0, offset_r, 0, base_r, RTMP); + + // Step 4: Store to [RTMP] + if (IS_FLOAT(value)) { + encode_ldr_str_imm(ctx, size_bits, 1, 0x00, 0, RTMP, value_fp); // V=1 for FP + } else { + encode_ldr_str_imm(ctx, size_bits, 0, 0x00, 0, RTMP, value_r); + } + + discard(ctx, base_reg); + discard(ctx, offset_reg); + discard(ctx, value_reg); +} + +/* + * Field access: dst = obj->field + * OField: dst = *(obj + field_offset) + * + * Special handling for HPACKED -> HSTRUCT: return address of inline storage + * instead of loading a value (LEA semantics). + */ +static void op_field(jit_ctx *ctx, vreg *dst, vreg *obj, int field_index) { + hl_runtime_obj *rt = hl_get_obj_rt(obj->t); + int offset = rt->fields_indexes[field_index]; + + // Check for packed field -> struct destination (LEA semantics) + if (dst->t->kind == HSTRUCT) { + hl_type *ft = hl_obj_field_fetch(obj->t, field_index)->t; + if (ft->kind == HPACKED) { + // Return address of inline storage: dst = &obj->field + preg *p_obj = fetch(ctx, obj); + preg *p_dst = alloc_dst(ctx, dst); // Allocates register, binds to dst, marks dirty + + Arm64Reg obj_r = (p_obj->kind == RCPU) ? (Arm64Reg)p_obj->id : RTMP; + if (p_obj->kind != RCPU) { + ldr_stack(ctx, obj_r, obj->stackPos, obj->size); + } + + Arm64Reg dst_r = (Arm64Reg)p_dst->id; // alloc_dst always returns RCPU for non-float + + // ADD dst, obj, #offset (equivalent to LEA) + if (offset >= 0 && offset < 4096) { + encode_add_sub_imm(ctx, 1, 0, 0, 0, offset, obj_r, dst_r); + } else { + load_immediate(ctx, offset, RTMP2, false); + encode_add_sub_reg(ctx, 1, 0, 0, 0, RTMP2, 0, obj_r, dst_r); + } + + // Don't call store_result - alloc_dst already set up the binding + // The value will be spilled when needed + discard(ctx, p_obj); + return; + } + } + + op_get_mem(ctx, dst, obj, offset, dst->size); +} + +/* + * Field assignment: obj->field = value + * OSetField: *(obj + field_offset) = value + * + * Special handling for HSTRUCT -> HPACKED: must copy struct byte-by-byte + * because HPACKED means the struct is stored inline, not as a pointer. + */ +static void op_set_field(jit_ctx *ctx, vreg *obj, int field_index, vreg *value) { + hl_runtime_obj *rt = hl_get_obj_rt(obj->t); + int field_offset = rt->fields_indexes[field_index]; + + // Check for struct-to-packed-field assignment + if (value->t->kind == HSTRUCT) { + hl_type *ft = hl_obj_field_fetch(obj->t, field_index)->t; + if (ft->kind == HPACKED) { + // Copy struct byte-by-byte + hl_runtime_obj *frt = hl_get_obj_rt(ft->tparam); + + // Load obj pointer into RTMP and value pointer into RTMP2. + // This is simpler than trying to manage register allocation for the copy. + preg *p_obj = fetch(ctx, obj); + preg *p_val = fetch(ctx, value); + + // Always load to scratch registers to avoid conflicts with copy temp + Arm64Reg obj_r = RTMP; + Arm64Reg val_r = RTMP2; + + if (p_obj->kind == RCPU) { + // Move from allocated register to RTMP: ORR RTMP, XZR, Rm + encode_logical_reg(ctx, 1, 0x01, 0, 0, (Arm64Reg)p_obj->id, 0, XZR, obj_r); + } else { + ldr_stack(ctx, obj_r, obj->stackPos, obj->size); + } + + if (p_val->kind == RCPU) { + // Move from allocated register to RTMP2: ORR RTMP2, XZR, Rm + encode_logical_reg(ctx, 1, 0x01, 0, 0, (Arm64Reg)p_val->id, 0, XZR, val_r); + } else { + ldr_stack(ctx, val_r, value->stackPos, value->size); + } + + // Use X9 for data copy, X10 for large offset computation + // Evict both if they're holding values + preg *p_x9 = &ctx->pregs[X9]; + preg *p_x10 = &ctx->pregs[X10]; + if (p_x9->holds != NULL) { + free_reg(ctx, p_x9); + } + if (p_x10->holds != NULL) { + free_reg(ctx, p_x10); + } + + Arm64Reg tmp = X9; + int offset = 0; + while (offset < frt->size) { + int remain = frt->size - offset; + int copy_size = remain >= HL_WSIZE ? HL_WSIZE : (remain >= 4 ? 4 : (remain >= 2 ? 2 : 1)); + int size_bits = (copy_size == 8) ? 0x03 : (copy_size == 4) ? 0x02 : (copy_size == 2) ? 0x01 : 0x00; + + // Load from source: LDR tmp, [val_r, #offset] + // Source offset starts at 0 and increments by copy_size, so always aligned + encode_ldr_str_imm(ctx, size_bits, 0, 0x01, offset / copy_size, val_r, tmp); + + // Store to dest: STR tmp, [obj_r + field_offset + offset] + // Dest offset may not be aligned to copy_size, so compute address explicitly + int dest_offset = field_offset + offset; + if ((dest_offset % copy_size) == 0 && dest_offset >= 0 && dest_offset < (1 << 12) * copy_size) { + // Aligned and fits in immediate - use scaled offset + encode_ldr_str_imm(ctx, size_bits, 0, 0x00, dest_offset / copy_size, obj_r, tmp); + } else { + // Misaligned or large offset - compute address in X10 + load_immediate(ctx, dest_offset, X10, false); + encode_add_sub_reg(ctx, 1, 0, 0, 0, X10, 0, obj_r, X10); + encode_ldr_str_imm(ctx, size_bits, 0, 0x00, 0, X10, tmp); + } + + offset += copy_size; + } + + discard(ctx, p_obj); + discard(ctx, p_val); + return; + } + } + + op_set_mem(ctx, obj, field_offset, value, value->size); +} + +/* + * Array element access: dst = array[index] + * OGetArray: dst = hl_aptr(array)[index] + * + * varray layout: { hl_type *t, hl_type *at, int size, int __pad } = 24 bytes + * Data is INLINE immediately after the header (not via a pointer!) + * hl_aptr(a,t) = (t*)(((varray*)(a))+1) = array + sizeof(varray) + * + * CArray (HABSTRACT) layout: raw memory, no header + * For HOBJ/HSTRUCT: return address of element (LEA) + * For other types: load value (LDR) + */ +/* + * IMPORTANT: We must load index BEFORE array when array uses RTMP, + * because ldr_stack's fallback path uses RTMP as a temporary. + * Order: index -> array -> compute address -> load + */ +static void op_get_array(jit_ctx *ctx, vreg *dst, vreg *array, vreg *index) { + preg *array_reg = fetch(ctx, array); + preg *index_reg = fetch(ctx, index); + preg *dst_reg = alloc_dst(ctx, dst); + + // CArrays (HABSTRACT) have different layout - no header, and for HOBJ/HSTRUCT + // we return the address (LEA) rather than loading the value + bool is_carray = (array->t->kind == HABSTRACT); + bool is_lea = is_carray && (dst->t->kind == HOBJ || dst->t->kind == HSTRUCT); + + int elem_size; + if (is_carray) { + if (is_lea) { + // For HOBJ/HSTRUCT in CArray, element size is the runtime object size + hl_runtime_obj *rt = hl_get_obj_rt(dst->t); + elem_size = rt->size; + } else { + // For other types in CArray, element size is pointer size + elem_size = sizeof(void*); + } + } else { + elem_size = hl_type_size(dst->t); + } + + // Step 1: Load index FIRST (may clobber RTMP in fallback, but we haven't used it yet) + Arm64Reg index_r = (index_reg->kind == RCPU) ? (Arm64Reg)index_reg->id : RTMP2; + if (index_reg->kind == RCONST) { + load_immediate(ctx, index_reg->id, index_r, false); + } else if (index_reg->kind != RCPU) { + ldr_stack(ctx, index_r, index->stackPos, index->size); + } + + // Step 2: Load array (if it needs RTMP, the value will stay in RTMP) + Arm64Reg array_r = (array_reg->kind == RCPU) ? (Arm64Reg)array_reg->id : RTMP; + if (array_reg->kind != RCPU) { + ldr_stack(ctx, array_r, array->stackPos, array->size); + } + + Arm64Reg dst_r = (dst_reg->kind == RCPU) ? (Arm64Reg)dst_reg->id : X9; + if (dst_r == X9) { + preg *px9 = &ctx->pregs[X9]; + if (px9->holds != NULL) free_reg(ctx, px9); + } + + // Step 3: Calculate element address + // For varray: array + sizeof(varray) + index * elem_size + // For CArray: array + index * elem_size (no header) + + if (is_carray) { + // CArray: no header offset, start from array_r directly + // Scale index by elem_size + if (elem_size == 1) { + encode_add_sub_reg(ctx, 1, 0, 0, SHIFT_LSL, index_r, 0, array_r, RTMP); + } else if (elem_size == 2 || elem_size == 4 || elem_size == 8) { + int shift = (elem_size == 2) ? 1 : (elem_size == 4) ? 2 : 3; + encode_add_sub_reg(ctx, 1, 0, 0, SHIFT_LSL, index_r, shift, array_r, RTMP); + } else { + // Non-power-of-2: compute index * elem_size in RTMP2, then add + load_immediate(ctx, elem_size, RTMP2, false); + encode_madd_msub(ctx, 1, 0, RTMP2, XZR, index_r, RTMP2); + encode_add_sub_reg(ctx, 1, 0, 0, 0, RTMP2, 0, array_r, RTMP); + } + } else { + // varray: add sizeof(varray) header offset first + encode_add_sub_imm(ctx, 1, 0, 0, 0, sizeof(varray), array_r, RTMP); + + // Add scaled index offset: RTMP = RTMP + (index_r << shift) + if (elem_size == 1) { + encode_add_sub_reg(ctx, 1, 0, 0, SHIFT_LSL, index_r, 0, RTMP, RTMP); + } else if (elem_size == 2 || elem_size == 4 || elem_size == 8) { + int shift = (elem_size == 2) ? 1 : (elem_size == 4) ? 2 : 3; + encode_add_sub_reg(ctx, 1, 0, 0, SHIFT_LSL, index_r, shift, RTMP, RTMP); + } else { + // Non-power-of-2: scale index into RTMP2, then add + load_immediate(ctx, elem_size, RTMP2, false); + encode_madd_msub(ctx, 1, 0, RTMP2, XZR, index_r, RTMP2); + encode_add_sub_reg(ctx, 1, 0, 0, 0, RTMP2, 0, RTMP, RTMP); + } + } + + if (is_lea) { + // LEA: just move the computed address to dst + mov_reg_reg(ctx, dst_r, RTMP, true); + str_stack(ctx, dst_r, dst->stackPos, dst->size); + } else if (IS_FLOAT(dst)) { + // Float load: use FP register with V=1 + preg *pv0 = PVFPR(0); + if (pv0->holds != NULL && pv0->holds != dst) { + free_reg(ctx, pv0); + } + int size_bits = (dst->size == 8) ? 0x03 : 0x02; // F64 or F32 + encode_ldr_str_imm(ctx, size_bits, 1, 0x01, 0, RTMP, V0); // V=1 for FP + str_stack_fp(ctx, V0, dst->stackPos, dst->size); + // Clear dst's old binding - value is now on stack, not in a register + if (dst->current != NULL) { + dst->current->holds = NULL; + dst->current = NULL; + } + } else { + // Integer load + int size_bits = (elem_size == 1) ? 0x00 : (elem_size == 2) ? 0x01 : (elem_size == 4) ? 0x02 : 0x03; + encode_ldr_str_imm(ctx, size_bits, 0, 0x01, 0, RTMP, dst_r); + str_stack(ctx, dst_r, dst->stackPos, dst->size); + } + + discard(ctx, array_reg); + discard(ctx, index_reg); +} + +/* + * Array element assignment: array[index] = value + * OSetArray: hl_aptr(array)[index] = value + * + * varray layout: { hl_type *t, hl_type *at, int size, int __pad } = 24 bytes + * Data is INLINE immediately after the header (not via a pointer!) + * + * CArray (HABSTRACT) layout: raw memory, no header + * For HOBJ/HSTRUCT: copy entire struct from value (which is address from LEA) + * For other types: store value directly + */ +/* + * IMPORTANT: We must load value and index BEFORE array when array uses RTMP, + * because ldr_stack's fallback path uses RTMP as a temporary. + * Order: value -> index -> array -> compute address -> store + */ +static void op_set_array(jit_ctx *ctx, vreg *array, vreg *index, vreg *value) { + preg *array_reg = fetch(ctx, array); + preg *index_reg = fetch(ctx, index); + preg *value_reg = fetch(ctx, value); + + // CArrays (HABSTRACT) have different semantics + bool is_carray = (array->t->kind == HABSTRACT); + bool is_struct_copy = is_carray && (value->t->kind == HOBJ || value->t->kind == HSTRUCT); + + int elem_size; + if (is_carray) { + if (is_struct_copy) { + // For HOBJ/HSTRUCT in CArray, element size is the runtime object size + hl_runtime_obj *rt = hl_get_obj_rt(value->t); + elem_size = rt->size; + } else { + // For other types in CArray, element size is pointer size + elem_size = sizeof(void*); + } + } else { + elem_size = hl_type_size(value->t); + } + + // Step 1: Load value FIRST (before using RTMP for address computation) + // For struct copy, value is a pointer to the source struct + // For floats, use FP register; for integers, use CPU register + Arm64Reg value_r = X9; + Arm64FpReg value_fp = V16; + bool is_float_value = IS_FLOAT(value); + + if (is_float_value) { + if (value_reg->kind == RFPU) { + value_fp = (Arm64FpReg)value_reg->id; + } else { + // Ensure V16 is free before using it + preg *pv16 = PVFPR(16); + if (pv16->holds != NULL) free_reg(ctx, pv16); + ldr_stack_fp(ctx, value_fp, value->stackPos, value->size); + } + } else { + value_r = (value_reg->kind == RCPU) ? (Arm64Reg)value_reg->id : X9; + if (value_reg->kind == RCONST) { + // Ensure X9 is free if we are using it + if (value_r == X9) { + preg *px9 = &ctx->pregs[X9]; + if (px9->holds != NULL) free_reg(ctx, px9); + } + load_immediate(ctx, value_reg->id, value_r, value->size == 8); + } else if (value_reg->kind != RCPU) { + // Ensure X9 is free if we are using it + if (value_r == X9) { + preg *px9 = &ctx->pregs[X9]; + if (px9->holds != NULL) free_reg(ctx, px9); + } + ldr_stack(ctx, value_r, value->stackPos, value->size); + } + } + + // Step 2: Load index (may clobber RTMP in fallback, but we haven't used it yet) + Arm64Reg index_r = (index_reg->kind == RCPU) ? (Arm64Reg)index_reg->id : RTMP2; + if (index_reg->kind == RCONST) { + load_immediate(ctx, index_reg->id, index_r, false); + } else if (index_reg->kind != RCPU) { + ldr_stack(ctx, index_r, index->stackPos, index->size); + } + + // Step 3: Load array (if it needs RTMP, the value will stay in RTMP) + Arm64Reg array_r = (array_reg->kind == RCPU) ? (Arm64Reg)array_reg->id : RTMP; + if (array_reg->kind != RCPU) { + ldr_stack(ctx, array_r, array->stackPos, array->size); + } + + // Step 4: Calculate element address + // For varray: array + sizeof(varray) + index * elem_size + // For CArray: array + index * elem_size (no header) + + if (is_carray) { + // CArray: no header offset + if (elem_size == 1) { + encode_add_sub_reg(ctx, 1, 0, 0, SHIFT_LSL, index_r, 0, array_r, RTMP); + } else if (elem_size == 2 || elem_size == 4 || elem_size == 8) { + int shift = (elem_size == 2) ? 1 : (elem_size == 4) ? 2 : 3; + encode_add_sub_reg(ctx, 1, 0, 0, SHIFT_LSL, index_r, shift, array_r, RTMP); + } else { + // Non-power-of-2: compute index * elem_size + load_immediate(ctx, elem_size, RTMP2, false); + encode_madd_msub(ctx, 1, 0, RTMP2, XZR, index_r, RTMP2); + encode_add_sub_reg(ctx, 1, 0, 0, 0, RTMP2, 0, array_r, RTMP); + } + } else { + // varray: add sizeof(varray) header offset first + encode_add_sub_imm(ctx, 1, 0, 0, 0, sizeof(varray), array_r, RTMP); + + // Add scaled index offset + if (elem_size == 1) { + encode_add_sub_reg(ctx, 1, 0, 0, SHIFT_LSL, index_r, 0, RTMP, RTMP); + } else if (elem_size == 2 || elem_size == 4 || elem_size == 8) { + int shift = (elem_size == 2) ? 1 : (elem_size == 4) ? 2 : 3; + encode_add_sub_reg(ctx, 1, 0, 0, SHIFT_LSL, index_r, shift, RTMP, RTMP); + } else { + load_immediate(ctx, elem_size, RTMP2, false); + encode_madd_msub(ctx, 1, 0, RTMP2, XZR, index_r, RTMP2); + encode_add_sub_reg(ctx, 1, 0, 0, 0, RTMP2, 0, RTMP, RTMP); + } + } + + if (is_struct_copy) { + // Copy struct from value (pointer) to RTMP (destination) + // value_r points to source struct, RTMP points to destination + // Use X10 as temporary for copy (not value_r which we need as source base) + int offset = 0; + while (offset < elem_size) { + int remain = elem_size - offset; + int copy_size, size_bits; + if (remain >= 8) { + copy_size = 8; + size_bits = 0x03; + } else if (remain >= 4) { + copy_size = 4; + size_bits = 0x02; + } else if (remain >= 2) { + copy_size = 2; + size_bits = 0x01; + } else { + copy_size = 1; + size_bits = 0x00; + } + // Load from source: X10 = [value_r + offset] + encode_ldur_stur(ctx, size_bits, 0, 0x01, offset, value_r, X10); + // Store to dest: [RTMP + offset] = X10 + encode_ldur_stur(ctx, size_bits, 0, 0x00, offset, RTMP, X10); + offset += copy_size; + } + } else if (is_float_value) { + // Float store: STR Vn, [RTMP] with V=1 + int size_bits = (value->size == 8) ? 0x03 : 0x02; // F64 or F32 + encode_ldr_str_imm(ctx, size_bits, 1, 0x00, 0, RTMP, value_fp); // V=1 for FP + } else { + // Integer store: STR Xn, [RTMP] + int size_bits = (elem_size == 1) ? 0x00 : (elem_size == 2) ? 0x01 : (elem_size == 4) ? 0x02 : 0x03; + encode_ldr_str_imm(ctx, size_bits, 0, 0x00, 0, RTMP, value_r); + } + + discard(ctx, array_reg); + discard(ctx, index_reg); + discard(ctx, value_reg); +} + +/* + * Global variable access: dst = globals[index] + * OGetGlobal: Use PC-relative addressing with ADRP + LDR + */ +static void op_get_global(jit_ctx *ctx, vreg *dst, int global_index) { + preg *dst_reg = alloc_dst(ctx, dst); + + // Get global address from module + void **globals = (void**)ctx->m->globals_data; + void *global_addr = &globals[global_index]; + + // Load global address to RTMP2 + load_immediate(ctx, (int64_t)global_addr, RTMP2, true); + + if (IS_FLOAT(dst)) { + // Float: load into FPU register + Arm64FpReg dst_r = (dst_reg->kind == RFPU) ? (Arm64FpReg)dst_reg->id : V16; + // LDR Vn, [RTMP2] - floating point load + // size: 0x02=32-bit (S), 0x03=64-bit (D) + encode_ldr_str_imm(ctx, dst->size == 8 ? 0x03 : 0x02, 1, 0x01, 0, RTMP2, dst_r); + // Store to stack + str_stack_fp(ctx, dst_r, dst->stackPos, dst->size); + } else { + // Integer/pointer: load into CPU register + Arm64Reg dst_r = (dst_reg->kind == RCPU) ? (Arm64Reg)dst_reg->id : RTMP; + // LDR Xn, [RTMP2] + encode_ldr_str_imm(ctx, dst->size == 8 ? 0x03 : 0x02, 0, 0x01, 0, RTMP2, dst_r); + // Store to stack + str_stack(ctx, dst_r, dst->stackPos, dst->size); + } +} + +/* + * Global variable assignment: globals[index] = value + * OSetGlobal + */ +static void op_set_global(jit_ctx *ctx, int global_index, vreg *value) { + preg *value_reg = fetch(ctx, value); + + // Get global address from module + void **globals = (void**)ctx->m->globals_data; + void *global_addr = &globals[global_index]; + + // Load global address to RTMP2 + load_immediate(ctx, (int64_t)global_addr, RTMP2, true); + + if (IS_FLOAT(value)) { + // Float: store from FPU register + Arm64FpReg value_r = (value_reg->kind == RFPU) ? (Arm64FpReg)value_reg->id : V16; + if (value_reg->kind != RFPU) { + // Load from stack into temp FPU register + ldr_stack_fp(ctx, value_r, value->stackPos, value->size); + } + // STR Vn, [RTMP2] - floating point store + encode_ldr_str_imm(ctx, value->size == 8 ? 0x03 : 0x02, 1, 0x00, 0, RTMP2, value_r); + } else { + // Integer/pointer: store from CPU register + Arm64Reg value_r = (value_reg->kind == RCPU) ? (Arm64Reg)value_reg->id : RTMP; + if (value_reg->kind == RCONST) { + load_immediate(ctx, value_reg->id, value_r, value->size == 8); + } else if (value_reg->kind != RCPU) { + ldr_stack(ctx, value_r, value->stackPos, value->size); + } + // STR Xn, [RTMP2] + encode_ldr_str_imm(ctx, value->size == 8 ? 0x03 : 0x02, 0, 0x00, 0, RTMP2, value_r); + } + + discard(ctx, value_reg); +} + +// ============================================================================ +// Reference Operations +// ============================================================================ + +/* + * Create reference: dst = &src + * ORef: dst = address of vreg + * + * IMPORTANT: After taking a reference to a vreg, that vreg may be modified + * through the reference (via OSetref). We must: + * 1. Ensure src is spilled to stack (in case it's only in a register) + * 2. Invalidate src's register binding so future reads go to stack + */ +static void op_ref(jit_ctx *ctx, vreg *dst, vreg *src) { + // First, ensure src is on stack and invalidate its register binding + // (like x86's scratch(ra->current)) + if (src->current != NULL) { + // Spill to stack if in a register + store(ctx, src, src->current); + // Invalidate the binding so future reads go to stack + src->current->holds = NULL; + src->current = NULL; + } + + preg *dst_reg = alloc_dst(ctx, dst); + Arm64Reg dst_r = (dst_reg->kind == RCPU) ? (Arm64Reg)dst_reg->id : RTMP; + + // Calculate stack address: FP + src->stackPos + if (src->stackPos >= 0) { + // ADD dst_r, FP, #stackPos + encode_add_sub_imm(ctx, 1, 0, 0, 0, src->stackPos, FP, dst_r); + } else { + // SUB dst_r, FP, #(-stackPos) + encode_add_sub_imm(ctx, 1, 1, 0, 0, -src->stackPos, FP, dst_r); + } + + // Always store to stack - source of truth for later loads + str_stack(ctx, dst_r, dst->stackPos, dst->size); +} + +/* + * Dereference: dst = *src + * OUnref: Load value from pointer + */ +static void op_unref(jit_ctx *ctx, vreg *dst, vreg *src) { + preg *src_reg = fetch(ctx, src); + + // Load the pointer (always integer register since it's an address) + Arm64Reg src_r = (src_reg->kind == RCPU) ? (Arm64Reg)src_reg->id : RTMP; + if (src_reg->kind != RCPU) { + ldr_stack(ctx, src_r, src->stackPos, src->size); + } + + int size_bits = (dst->size == 1) ? 0x00 : (dst->size == 2) ? 0x01 : (dst->size == 4) ? 0x02 : 0x03; + + if (IS_FLOAT(dst)) { + // Float dereference: LDR Vd, [src_r] + preg *dst_reg = alloc_dst(ctx, dst); + Arm64FpReg dst_r = (dst_reg->kind == RFPU) ? (Arm64FpReg)dst_reg->id : V16; + encode_ldr_str_imm(ctx, size_bits, 1, 0x01, 0, src_r, dst_r); + str_stack_fp(ctx, dst_r, dst->stackPos, dst->size); + } else { + // Integer dereference: LDR Xd, [src_r] + preg *dst_reg = alloc_dst(ctx, dst); + Arm64Reg dst_r = (dst_reg->kind == RCPU) ? (Arm64Reg)dst_reg->id : RTMP2; + encode_ldr_str_imm(ctx, size_bits, 0, 0x01, 0, src_r, dst_r); + str_stack(ctx, dst_r, dst->stackPos, dst->size); + } + + discard(ctx, src_reg); +} + +/* + * Set reference: *dst = src + * OSetref: Store value to pointer + */ +static void op_setref(jit_ctx *ctx, vreg *dst, vreg *src) { + preg *dst_reg = fetch(ctx, dst); + preg *src_reg = fetch(ctx, src); + + Arm64Reg dst_r = (dst_reg->kind == RCPU) ? (Arm64Reg)dst_reg->id : RTMP; + if (dst_reg->kind != RCPU) { + ldr_stack(ctx, dst_r, dst->stackPos, dst->size); + } + + Arm64Reg src_r = (src_reg->kind == RCPU) ? (Arm64Reg)src_reg->id : RTMP2; + if (src_reg->kind == RCONST) { + load_immediate(ctx, src_reg->id, src_r, src->size == 8); + } else if (src_reg->kind != RCPU) { + ldr_stack(ctx, src_r, src->stackPos, src->size); + } + + // Store to pointer: STR src_r, [dst_r] + int size_bits = (src->size == 1) ? 0x00 : (src->size == 2) ? 0x01 : (src->size == 4) ? 0x02 : 0x03; + encode_ldr_str_imm(ctx, size_bits, 0, 0x00, 0, dst_r, src_r); + + discard(ctx, dst_reg); + discard(ctx, src_reg); +} + +// ============================================================================ +// Comparison Operations (result stored, not branching) +// ============================================================================ + +/* + * Equality comparison: dst = (a == b) + * OEq/ONeq/OLt/OGte/etc: Store comparison result as boolean + */ +static void op_compare(jit_ctx *ctx, vreg *dst, vreg *a, vreg *b, hl_op op) { + preg *a_reg = fetch(ctx, a); + preg *b_reg = fetch(ctx, b); + preg *dst_reg = alloc_dst(ctx, dst); + + Arm64Reg a_r = (a_reg->kind == RCPU) ? (Arm64Reg)a_reg->id : RTMP; + if (a_reg->kind == RCONST) { + load_immediate(ctx, a_reg->id, a_r, a->size == 8); + } else if (a_reg->kind != RCPU) { + ldr_stack(ctx, a_r, a->stackPos, a->size); + } + + Arm64Reg b_r = (b_reg->kind == RCPU) ? (Arm64Reg)b_reg->id : RTMP2; + if (b_reg->kind == RCONST) { + load_immediate(ctx, b_reg->id, b_r, b->size == 8); + } else if (b_reg->kind != RCPU) { + ldr_stack(ctx, b_r, b->stackPos, b->size); + } + + Arm64Reg dst_r = (dst_reg->kind == RCPU) ? (Arm64Reg)dst_reg->id : X9; + if (dst_r == X9) { + preg *px9 = &ctx->pregs[X9]; + if (px9->holds != NULL) free_reg(ctx, px9); + } + + bool is_float = IS_FLOAT(a); + + if (is_float) { + // Floating-point comparison + Arm64FpReg fa_r = (a_reg->kind == RFPU) ? (Arm64FpReg)a_reg->id : V16; + Arm64FpReg fb_r = (b_reg->kind == RFPU) ? (Arm64FpReg)b_reg->id : V17; + + if (fa_r == V16) { + preg *pv16 = PVFPR(16); + if (pv16->holds != NULL) free_reg(ctx, pv16); + } + if (fb_r == V17) { + preg *pv17 = PVFPR(17); + if (pv17->holds != NULL) free_reg(ctx, pv17); + } + + if (a_reg->kind != RFPU) { + // Load from stack to FP register + ldr_stack_fp(ctx, fa_r, a->stackPos, a->size); + } + if (b_reg->kind != RFPU) { + ldr_stack_fp(ctx, fb_r, b->stackPos, b->size); + } + + // FCMP fa_r, fb_r + int is_double = a->size == 8 ? 1 : 0; + encode_fp_compare(ctx, 0, is_double, is_double, fb_r, 0, fa_r); + } else { + // Integer comparison: CMP a_r, b_r + encode_add_sub_reg(ctx, a->size == 8 ? 1 : 0, 1, 1, 0, b_r, 0, a_r, XZR); + } + + // Get condition code for this operation + ArmCondition cond = hl_cond_to_arm(op, is_float); + + // CSET dst_r, cond (Set register to 1 if condition true, 0 otherwise) + // Encoding: CSINC dst, XZR, XZR, !cond + // This sets dst = (cond) ? 1 : 0 + int inv_cond = cond ^ 1; // Invert condition + // CSINC: sf=0, op=0, S=0, Rm=XZR, cond=inv_cond, o2=1, Rn=XZR, Rd=dst_r + EMIT32(ctx,(0 << 31) | (0 << 30) | (0xD4 << 21) | (XZR << 16) | (inv_cond << 12) | (1 << 10) | (XZR << 5) | dst_r); + + // Always store to stack - source of truth for later loads + str_stack(ctx, dst_r, dst->stackPos, dst->size); + + discard(ctx, a_reg); + discard(ctx, b_reg); +} + +// ============================================================================ +// Type and Object Operations +// ============================================================================ + +/* + * Get object type: dst = obj->type + * OType: Load type pointer from object + */ +static void op_type(jit_ctx *ctx, vreg *dst, vreg *obj) { + preg *obj_reg = fetch(ctx, obj); + preg *dst_reg = alloc_dst(ctx, dst); + + Arm64Reg obj_r = (obj_reg->kind == RCPU) ? (Arm64Reg)obj_reg->id : RTMP; + if (obj_reg->kind != RCPU) { + ldr_stack(ctx, obj_r, obj->stackPos, obj->size); + } + + Arm64Reg dst_r = (dst_reg->kind == RCPU) ? (Arm64Reg)dst_reg->id : RTMP2; + + // Load type pointer from object header (first field at offset 0) + // LDR dst_r, [obj_r] + encode_ldr_str_imm(ctx, 0x03, 0, 0x01, 0, obj_r, dst_r); + + // Always store to stack - source of truth for later loads + str_stack(ctx, dst_r, dst->stackPos, dst->size); + + discard(ctx, obj_reg); +} + +/* + * OGetThis: Load a field from the "this" object (R(0)) + * Equivalent to OField but implicitly uses R(0) as the object + */ +static void op_get_this(jit_ctx *ctx, vreg *dst, int field_idx) { + vreg *this_vreg = R(0); + op_field(ctx, dst, this_vreg, field_idx); +} + +/* + * Get the dynamic cast function for a given type + */ +static void *get_dyncast(hl_type *t) { + switch (t->kind) { + case HF32: + return hl_dyn_castf; + case HF64: + return hl_dyn_castd; + case HI64: + case HGUID: + return hl_dyn_casti64; + case HI32: + case HUI16: + case HUI8: + case HBOOL: + return hl_dyn_casti; + default: + return hl_dyn_castp; + } +} + +/* + * Cast operation (safe cast with runtime check) + * OSafeCast: dst = (target_type)obj or NULL if cast fails + */ +static void op_safe_cast(jit_ctx *ctx, vreg *dst, vreg *obj, hl_type *target_type) { + // Special case: Null to T - unbox with null check + if (obj->t->kind == HNULL && obj->t->tparam->kind == dst->t->kind) { + int jnull, jend; + + switch (dst->t->kind) { + case HUI8: + case HUI16: + case HI32: + case HBOOL: + case HI64: + case HGUID: + { + preg *tmp = fetch(ctx, obj); + Arm64Reg r = (tmp->kind == RCPU) ? tmp->id : RTMP; + if (tmp->kind != RCPU) { + ldr_stack(ctx, r, obj->stackPos, obj->size); + } + // Test for null + encode_add_sub_imm(ctx, 1, 1, 1, 0, 0, r, XZR); // CMP r, #0 + jnull = BUF_POS(); + encode_branch_cond(ctx, 0, COND_EQ); // B.EQ null_path + + // Non-null: load value from offset 8 with correct size + // Size determines scale: 0x00=1, 0x01=2, 0x02=4, 0x03=8 + // So offset = 8 / scale to get byte offset 8 + int size_code; + int scaled_offset; + switch (dst->size) { + case 1: size_code = 0x00; scaled_offset = 8; break; // LDRB [r, #8] + case 2: size_code = 0x01; scaled_offset = 4; break; // LDRH [r, #8] + case 4: size_code = 0x02; scaled_offset = 2; break; // LDR W [r, #8] + default: size_code = 0x03; scaled_offset = 1; break; // LDR X [r, #8] + } + // The LDR below clobbers r. If obj is dirty in r, save it to stack first. + // This preserves obj's value (the dynamic pointer) for later use. + if (obj->dirty && obj->current == tmp) { + str_stack(ctx, r, obj->stackPos, obj->size); + obj->dirty = 0; + } + encode_ldr_str_imm(ctx, size_code, 0, 0x01, scaled_offset, r, r); + jend = BUF_POS(); + encode_branch_uncond(ctx, 0); // B end + + // Null path: set to zero + patch_jump(ctx, jnull, BUF_POS()); + load_immediate(ctx, 0, r, dst->size == 8); + + // End + patch_jump(ctx, jend, BUF_POS()); + str_stack(ctx, r, dst->stackPos, dst->size); + // Clear binding - register no longer holds obj's original value + discard(ctx, tmp); + // Invalidate dst's old binding since we wrote directly to stack + if (dst->current) { + dst->current->holds = NULL; + dst->current = NULL; + } + } + return; + + case HF32: + case HF64: + { + preg *tmp = fetch(ctx, obj); + Arm64Reg r = (tmp->kind == RCPU) ? tmp->id : RTMP; + if (tmp->kind != RCPU) { + ldr_stack(ctx, r, obj->stackPos, obj->size); + } + // Evict any vreg currently bound to V0 before using it + preg *pv0 = PVFPR(0); + if (pv0->holds != NULL && pv0->holds != dst) { + free_reg(ctx, pv0); + } + // Test for null + encode_add_sub_imm(ctx, 1, 1, 1, 0, 0, r, XZR); // CMP r, #0 + jnull = BUF_POS(); + encode_branch_cond(ctx, 0, COND_EQ); // B.EQ null_path + + // Non-null: load float from offset 8 + encode_ldr_str_imm(ctx, (dst->size == 8) ? 0x03 : 0x02, 1, 0x01, 8 / dst->size, r, V0); + jend = BUF_POS(); + encode_branch_uncond(ctx, 0); // B end + + // Null path: set to zero + patch_jump(ctx, jnull, BUF_POS()); + // FMOV Vd, XZR + EMIT32(ctx, (1 << 31) | (0 << 29) | (0x1E << 24) | (1 << 22) | (1 << 21) | (7 << 16) | (31 << 5) | V0); + + // End + patch_jump(ctx, jend, BUF_POS()); + str_stack_fp(ctx, V0, dst->stackPos, dst->size); + // Clear binding - register no longer holds obj's original value + discard(ctx, tmp); + // Invalidate dst's old binding since we wrote directly to stack + if (dst->current) { + dst->current->holds = NULL; + dst->current = NULL; + } + } + return; + + default: + break; + } + } + + // General case: call runtime cast function + spill_regs(ctx); + + // Get stack address of obj + // LEA X0, [FP, #obj->stackPos] or similar + if (obj->stackPos >= 0) { + encode_add_sub_imm(ctx, 1, 0, 0, 0, obj->stackPos, FP, X0); + } else { + encode_add_sub_imm(ctx, 1, 1, 0, 0, -obj->stackPos, FP, X0); + } + + // Set up arguments based on destination type + void *cast_func = get_dyncast(dst->t); + switch (dst->t->kind) { + case HF32: + case HF64: + case HI64: + // 2 args: ptr, src_type + load_immediate(ctx, (int64_t)obj->t, X1, true); + break; + default: + // 3 args: ptr, src_type, dst_type + load_immediate(ctx, (int64_t)obj->t, X1, true); + load_immediate(ctx, (int64_t)dst->t, X2, true); + break; + } + + // Call cast function + load_immediate(ctx, (int64_t)cast_func, RTMP, true); + EMIT32(ctx, 0xD63F0000 | (RTMP << 5)); // BLR RTMP + + // Store result and clear stale binding + if (IS_FLOAT(dst)) { + str_stack_fp(ctx, V0, dst->stackPos, dst->size); + } else { + str_stack(ctx, X0, dst->stackPos, dst->size); + } + store_result(ctx, dst); +} + +/* + * Null coalescing: dst = (a != null) ? a : b + * OCoalesce/ONullCheck + */ +static void op_null_check(jit_ctx *ctx, vreg *dst, int hashed_name) { + // Check if dst is null and call hl_null_access if so + preg *dst_reg = fetch(ctx, dst); + + Arm64Reg dst_r = (dst_reg->kind == RCPU) ? (Arm64Reg)dst_reg->id : RTMP; + if (dst_reg->kind != RCPU) { + ldr_stack(ctx, dst_r, dst->stackPos, dst->size); + } + + // Compare with zero: CMP dst_r, #0 (actually SUBS XZR, dst_r, #0) + encode_add_sub_imm(ctx, 1, 1, 1, 0, 0, dst_r, XZR); + + // If not zero (not null), skip error handling: B.NE skip + int bne_pos = BUF_POS(); + encode_branch_cond(ctx, 0, COND_NE); // B.NE (will patch offset) + + // Null path: call hl_null_access or jit_null_fail + // NOTE: Do NOT call spill_regs() here! hl_null_access never returns (it throws), + // and spill_regs() would corrupt compile-time register bindings for the non-null path. + if (hashed_name) { + load_immediate(ctx, hashed_name, X0, true); + load_immediate(ctx, (int64_t)jit_null_fail, RTMP, true); + } else { + load_immediate(ctx, (int64_t)hl_null_access, RTMP, true); + } + EMIT32(ctx, 0xD63F0000 | (RTMP << 5)); // BLR RTMP + // hl_null_access doesn't return (it throws), but we don't emit anything after + + // Patch the B.NE to skip here + int skip_pos = BUF_POS(); + int bne_offset = (skip_pos - bne_pos) / 4; + ctx->buf.b = ctx->startBuf + bne_pos; + encode_branch_cond(ctx, bne_offset, COND_NE); + ctx->buf.b = ctx->startBuf + skip_pos; + + discard(ctx, dst_reg); +} + +/* + * Object/memory allocation operations + * These typically call into the runtime allocator + */ +static void op_new(jit_ctx *ctx, vreg *dst, hl_type *type) { + // Call runtime allocator based on type kind + // Different type kinds require different allocation functions: + // - HOBJ/HSTRUCT: hl_alloc_obj(type) + // - HDYNOBJ: hl_alloc_dynobj() - no arguments! + // - HVIRTUAL: hl_alloc_virtual(type) + + // Spill all caller-saved registers BEFORE the call + spill_regs(ctx); + + void *alloc_func; + int has_type_arg = 1; + + switch (type->kind) { + case HOBJ: + case HSTRUCT: + alloc_func = (void*)hl_alloc_obj; + break; + case HDYNOBJ: + alloc_func = (void*)hl_alloc_dynobj; + has_type_arg = 0; // hl_alloc_dynobj takes no arguments + break; + case HVIRTUAL: + alloc_func = (void*)hl_alloc_virtual; + break; + default: + // Unsupported type for ONew + printf("op_new: unsupported type kind %d\n", type->kind); + return; + } + + // Load type address to X0 (first argument) if needed + if (has_type_arg) { + load_immediate(ctx, (int64_t)type, X0, true); + } + + // Load function pointer and call + load_immediate(ctx, (int64_t)alloc_func, RTMP, true); + + // Call allocator: BLR RTMP + EMIT32(ctx, (0xD63F0000) | (RTMP << 5)); + + // Result is in X0 - always store to stack first (source of truth for later loads) + str_stack(ctx, X0, dst->stackPos, dst->size); + + // Also keep in a register if allocated + preg *dst_reg = alloc_dst(ctx, dst); + if (dst_reg->kind == RCPU && (Arm64Reg)dst_reg->id != X0) { + mov_reg_reg(ctx, (Arm64Reg)dst_reg->id, X0, 8); + } +} + +/* + * String/bytes operations + */ +static void op_string(jit_ctx *ctx, vreg *dst, int string_index) { + // Load UTF-16 string from module string table + preg *dst_reg = alloc_dst(ctx, dst); + Arm64Reg dst_r = (dst_reg->kind == RCPU) ? (Arm64Reg)dst_reg->id : RTMP; + + // Get UTF-16 string pointer (hl_get_ustring converts from UTF-8 and caches) + const uchar *string_ptr = hl_get_ustring(ctx->m->code, string_index); + + // Load string address + load_immediate(ctx, (int64_t)string_ptr, dst_r, true); + + // Always store to stack - source of truth for later loads + str_stack(ctx, dst_r, dst->stackPos, dst->size); +} + +static void op_bytes(jit_ctx *ctx, vreg *dst, int bytes_index) { + // Load bytes from module bytes table + preg *dst_reg = alloc_dst(ctx, dst); + Arm64Reg dst_r = (dst_reg->kind == RCPU) ? (Arm64Reg)dst_reg->id : RTMP; + + // Get bytes pointer from module - use bytes_pos lookup for version >= 5 + char *bytes_ptr; + if (ctx->m->code->version >= 5) + bytes_ptr = ctx->m->code->bytes + ctx->m->code->bytes_pos[bytes_index]; + else + bytes_ptr = ctx->m->code->strings[bytes_index]; + + // Load bytes address + load_immediate(ctx, (int64_t)bytes_ptr, dst_r, true); + + // Always store to stack - source of truth for later loads + str_stack(ctx, dst_r, dst->stackPos, dst->size); +} + +// Forward declaration for prepare_call_args (defined later) +static int prepare_call_args(jit_ctx *ctx, hl_type **arg_types, vreg **args, int nargs, bool is_native); + +/* + * Virtual/method calls + * OCallMethod/OCallThis/OCallClosure + */ +// ============================================================================ +// Dynamic Object Helpers +// ============================================================================ + +/** + * Get the appropriate dynamic set function for a type + */ +static void *get_dynset(hl_type *t) { + switch (t->kind) { + case HF32: + return hl_dyn_setf; + case HF64: + return hl_dyn_setd; + case HI64: + case HGUID: + return hl_dyn_seti64; + case HI32: + case HUI16: + case HUI8: + case HBOOL: + return hl_dyn_seti; + default: + return hl_dyn_setp; + } +} + +/** + * Get the appropriate dynamic get function for a type + */ +static void *get_dynget(hl_type *t) { + switch (t->kind) { + case HF32: + return hl_dyn_getf; + case HF64: + return hl_dyn_getd; + case HI64: + case HGUID: + return hl_dyn_geti64; + case HI32: + case HUI16: + case HUI8: + case HBOOL: + return hl_dyn_geti; + default: + return hl_dyn_getp; + } +} + +// ============================================================================ +// Method and Function Calls +// ============================================================================ + +static void op_call_method_obj(jit_ctx *ctx, vreg *dst, vreg *obj, int method_index, vreg **args, int nargs) { + // HOBJ method call: obj->type->vobj_proto[method_index](obj, args...) + + // Spill all caller-saved registers BEFORE the call + spill_regs(ctx); + + // Now fetch obj (will load from stack since we just spilled) + preg *obj_reg = fetch(ctx, obj); + + Arm64Reg obj_r = (obj_reg->kind == RCPU) ? (Arm64Reg)obj_reg->id : RTMP; + if (obj_reg->kind != RCPU) { + ldr_stack(ctx, obj_r, obj->stackPos, obj->size); + } + + // Load type from obj[0] + encode_ldr_str_imm(ctx, 0x03, 0, 0x01, 0, obj_r, RTMP); // RTMP = obj->type + // Load vobj_proto from type[16] (HL_WSIZE*2 = offset index 2) + encode_ldr_str_imm(ctx, 0x03, 0, 0x01, 2, RTMP, RTMP); // RTMP = type->vobj_proto + // Load method pointer from proto[method_index] into RTMP2 + // NOTE: We use RTMP2 here because prepare_call_args uses RTMP for stack calculations + encode_ldr_str_imm(ctx, 0x03, 0, 0x01, method_index, RTMP, RTMP2); + + discard(ctx, obj_reg); + + // Prepare call with obj as first argument + vreg **full_args = (vreg**)malloc(sizeof(vreg*) * (nargs + 1)); + full_args[0] = obj; + for (int i = 0; i < nargs; i++) { + full_args[i + 1] = args[i]; + } + + // Prepare arguments (this uses RTMP, but method pointer is safe in RTMP2) + int stack_space = prepare_call_args(ctx, NULL, full_args, nargs + 1, false); + free(full_args); + + // Call method: BLR RTMP2 + EMIT32(ctx,(0xD63F0000) | (RTMP2 << 5)); + + // Clean up stack + if (stack_space > 0) { + encode_add_sub_imm(ctx, 1, 0, 0, 0, stack_space, SP_REG, SP_REG); + } + + // Store return value + if (dst && dst->t->kind != HVOID) { + preg *p = alloc_dst(ctx, dst); + if (IS_FLOAT(dst)) { + if (p->kind == RFPU && (Arm64FpReg)p->id != V0) { + fmov_reg_reg(ctx, (Arm64FpReg)p->id, V0, dst->size); + } else if (p->kind == RSTACK) { + str_stack_fp(ctx, V0, dst->stackPos, dst->size); + } + } else { + if (p->kind == RCPU && (Arm64Reg)p->id != X0) { + mov_reg_reg(ctx, (Arm64Reg)p->id, X0, dst->size); + } else if (p->kind == RSTACK) { + str_stack(ctx, X0, dst->stackPos, dst->size); + } + } + } +} + +// ============================================================================ +// Function Calls +// ============================================================================ + +/* + * Prepare arguments for a function call according to AAPCS64: + * - First 8 integer/pointer args in X0-X7 + * - First 8 floating-point args in V0-V7 + * - Additional args on stack (16-byte aligned) + * - Returns the total stack space needed for overflow args + */ +static int prepare_call_args(jit_ctx *ctx, hl_type **arg_types, vreg **args, int nargs, bool is_native) { + int int_reg_count = 0; + int fp_reg_count = 0; + int stack_offset = 0; + + // First pass: count args and calculate stack space needed + for (int i = 0; i < nargs; i++) { + bool is_fp = IS_FLOAT(args[i]); + int *reg_count = is_fp ? &fp_reg_count : &int_reg_count; + + if (*reg_count >= CALL_NREGS) { + // Arg goes on stack + stack_offset += 8; // Each stack arg takes 8 bytes (aligned) + } + (*reg_count)++; + } + + // Align stack to 16 bytes + if (stack_offset & 15) + stack_offset = (stack_offset + 15) & ~15; + + // Allocate stack space for overflow args if needed + if (stack_offset > 0) { + // SUB SP, SP, #stack_offset + encode_add_sub_imm(ctx, 1, 1, 0, 0, stack_offset, SP_REG, SP_REG); + } + + // Second pass: move arguments to their locations + int_reg_count = 0; + fp_reg_count = 0; + int current_stack_offset = 0; + + // After spill_regs(), all values are on stack. + // Load arguments directly to their destination registers to avoid + // the register allocation problem where fetch() reuses registers. + for (int i = 0; i < nargs; i++) { + vreg *arg = args[i]; + bool is_fp = IS_FLOAT(arg); + + if (is_fp) { + if (fp_reg_count < CALL_NREGS) { + // Load directly to FP argument register + Arm64FpReg dest_reg = FP_CALL_REGS[fp_reg_count]; + ldr_stack_fp(ctx, dest_reg, arg->stackPos, arg->size); + fp_reg_count++; + } else { + // Overflow: load to temp, then store to stack + ldr_stack_fp(ctx, V16, arg->stackPos, arg->size); + encode_ldr_str_imm(ctx, arg->size == 4 ? 0x02 : 0x03, 1, 0x00, + current_stack_offset / (arg->size == 4 ? 4 : 8), + SP_REG, V16); + current_stack_offset += 8; + } + } else { + // Integer/pointer argument + if (int_reg_count < CALL_NREGS) { + // Load directly to integer argument register + Arm64Reg dest_reg = CALL_REGS[int_reg_count]; + ldr_stack(ctx, dest_reg, arg->stackPos, arg->size); + int_reg_count++; + } else { + // Overflow: load to temp, then store to stack + ldr_stack(ctx, RTMP, arg->stackPos, arg->size); + encode_ldr_str_imm(ctx, arg->size == 8 ? 0x03 : 0x02, 0, 0x00, + current_stack_offset / (arg->size == 8 ? 8 : 4), + SP_REG, RTMP); + current_stack_offset += 8; + } + } + } + + return stack_offset; +} + +/* + * Call a native C function + */ +static void op_call_native(jit_ctx *ctx, vreg *dst, hl_type *ftype, void *func_ptr, vreg **args, int nargs) { + // Spill all caller-saved registers BEFORE the call + spill_regs(ctx); + + // Prepare arguments (arg_types not actually used by prepare_call_args) + int stack_space = prepare_call_args(ctx, NULL, args, nargs, true); + + // Load function pointer to RTMP + load_immediate(ctx, (int64_t)func_ptr, RTMP, true); + + // BLR RTMP (Branch with Link to Register) + // Encoding: 1101 0110 0011 1111 0000 00rr rrr0 0000 + // where rrrrr = RTMP register number + EMIT32(ctx,(0xD63F0000) | (RTMP << 5)); + + // Clean up stack if we allocated space for args + if (stack_space > 0) { + // ADD SP, SP, #stack_space + encode_add_sub_imm(ctx, 1, 0, 0, 0, stack_space, SP_REG, SP_REG); + } + + // Store return value if needed + if (dst && dst->t->kind != HVOID) { + // Always store to stack first (source of truth for later loads) + if (IS_FLOAT(dst)) { + str_stack_fp(ctx, V0, dst->stackPos, dst->size); + } else { + str_stack(ctx, X0, dst->stackPos, dst->size); + } + + // Also keep in a register if allocated to a different one + preg *p = alloc_dst(ctx, dst); + if (IS_FLOAT(dst)) { + if (p->kind == RFPU && (Arm64FpReg)p->id != V0) { + fmov_reg_reg(ctx, (Arm64FpReg)p->id, V0, dst->size); + } + } else { + if (p->kind == RCPU && (Arm64Reg)p->id != X0) { + mov_reg_reg(ctx, (Arm64Reg)p->id, X0, dst->size); + } + } + } +} + +/* + * Call a native function with a known absolute address + * The address is embedded directly in the instruction stream (no patching needed) + */ +static void call_native(jit_ctx *ctx, void *nativeFun, int stack_space) { + // Emit indirect call sequence with the address embedded inline: + // LDR X17, #12 ; load target address from PC+12 + // BLR X17 ; call + // B #12 ; skip over the literal + // .quad addr ; 8-byte absolute address (embedded now, not patched later) + + EMIT32(ctx, 0x58000071); // LDR X17, #12 + EMIT32(ctx, 0xD63F0220); // BLR X17 + EMIT32(ctx, 0x14000003); // B #12 (skip 3 instructions = 12 bytes) + + // Embed the native function address directly + uint64_t addr = (uint64_t)nativeFun; + EMIT32(ctx, (uint32_t)(addr & 0xFFFFFFFF)); // Low 32 bits + EMIT32(ctx, (uint32_t)((addr >> 32) & 0xFFFFFFFF)); // High 32 bits + + // Clean up stack if we allocated space for args + if (stack_space > 0) { + encode_add_sub_imm(ctx, 1, 0, 0, 0, stack_space, SP_REG, SP_REG); + } +} + +/* + * Emit a call to a function by its index (without spill/prepare - for use when those are already done) + * Used by compareFun and other places that set up args manually + */ +static void emit_call_findex(jit_ctx *ctx, int findex, int stack_space) { + int fid = findex < 0 ? -1 : ctx->m->functions_indexes[findex]; + bool isNative = fid >= ctx->m->code->nfunctions; + + if (fid < 0) { + jit_error("Invalid function index"); + } else if (isNative) { + // Native function - address is already resolved + call_native(ctx, ctx->m->functions_ptrs[findex], stack_space); + } else { + // JIT function - use indirect call via literal pool (patched later) + EMIT32(ctx, 0x58000071); // LDR X17, #12 + EMIT32(ctx, 0xD63F0220); // BLR X17 + + // Register literal position for patching + jlist *j = (jlist*)hl_malloc(&ctx->galloc, sizeof(jlist)); + j->pos = BUF_POS() + 4; // Position of the 8-byte literal (after B instruction) + j->target = findex; + j->next = ctx->calls; + ctx->calls = j; + + EMIT32(ctx, 0x14000003); // B #12 (skip 3 instructions = 12 bytes) + EMIT32(ctx, 0); // Low 32 bits placeholder + EMIT32(ctx, 0); // High 32 bits placeholder + + // Clean up stack if we allocated space for args + if (stack_space > 0) { + encode_add_sub_imm(ctx, 1, 0, 0, 0, stack_space, SP_REG, SP_REG); + } + } +} + +/* + * Call a HashLink function (native or JIT-compiled) + * For OCall0-OCall4, OCallN + */ +static void op_call_hl(jit_ctx *ctx, vreg *dst, int findex, vreg **args, int nargs) { + // Spill all caller-saved registers BEFORE the call + // This must happen before prepare_call_args to save values that might be clobbered + spill_regs(ctx); + + // Prepare arguments + int stack_space = prepare_call_args(ctx, NULL, args, nargs, false); + + // Check if this is a native function or JIT function + int fid = findex < 0 ? -1 : ctx->m->functions_indexes[findex]; + bool isNative = fid >= ctx->m->code->nfunctions; + + if (fid < 0) { + // Invalid function index + jit_error("Invalid function index"); + } else if (isNative) { + // Native function - address is already resolved, call directly + call_native(ctx, ctx->m->functions_ptrs[findex], stack_space); + } else { + // JIT function - use indirect call via literal pool (patched later) + // During JIT compilation, functions_ptrs contains CODE OFFSETS. + // The conversion to absolute addresses happens in hl_jit_code. + // + // Sequence: + // LDR X17, #12 ; load target address from PC+12 + // BLR X17 ; call + // B #12 ; skip over the literal + // .quad addr ; 8-byte address placeholder (patched later) + + EMIT32(ctx, 0x58000071); // LDR X17, #12 + EMIT32(ctx, 0xD63F0220); // BLR X17 + + // Register literal position for patching + jlist *j = (jlist*)hl_malloc(&ctx->galloc, sizeof(jlist)); + j->pos = BUF_POS() + 4; // Position of the 8-byte literal (after B instruction) + j->target = findex; + j->next = ctx->calls; + ctx->calls = j; + + EMIT32(ctx, 0x14000003); // B #12 (skip 3 instructions = 12 bytes) + EMIT32(ctx, 0); // Low 32 bits placeholder + EMIT32(ctx, 0); // High 32 bits placeholder + + // Clean up stack if we allocated space for args + if (stack_space > 0) { + encode_add_sub_imm(ctx, 1, 0, 0, 0, stack_space, SP_REG, SP_REG); + } + } + + // Note: spill_regs was already called before prepare_call_args + + // Store return value if needed + if (dst && dst->t->kind != HVOID) { + // Always store to stack first (source of truth for later loads) + if (IS_FLOAT(dst)) { + str_stack_fp(ctx, V0, dst->stackPos, dst->size); + } else { + str_stack(ctx, X0, dst->stackPos, dst->size); + } + + // Also keep in a register if allocated to a different one + preg *p = alloc_dst(ctx, dst); + if (IS_FLOAT(dst)) { + if (p->kind == RFPU && (Arm64FpReg)p->id != V0) { + fmov_reg_reg(ctx, (Arm64FpReg)p->id, V0, dst->size); + } + } else { + if (p->kind == RCPU && (Arm64Reg)p->id != X0) { + mov_reg_reg(ctx, (Arm64Reg)p->id, X0, dst->size); + } + } + } +} + +// ============================================================================ +// C↔HL Trampolines +// ============================================================================ + +static void *call_jit_c2hl = NULL; +static void *call_jit_hl2c = NULL; + +// Maximum args for dynamic calls +#define MAX_ARGS 64 + +/** + * Wrapper function for HL->C calls - unpacks arguments and calls the wrapped function. + * Called from jit_hl2c trampoline. + */ +static vdynamic *jit_wrapper_call(vclosure_wrapper *c, char *stack_args, void **regs) { + vdynamic *args[MAX_ARGS]; + int i; + int nargs = c->cl.t->fun->nargs; + int nextCpu = 1; // Skip X0 which holds the closure pointer + int nextFpu = 0; + + if (nargs > MAX_ARGS) + hl_error("Too many arguments for wrapped call"); + + for (i = 0; i < nargs; i++) { + hl_type *t = c->cl.t->fun->args[i]; + + if (t->kind == HF32 || t->kind == HF64) { + // Float argument + if (nextFpu < CALL_NREGS) { + // In FP register - regs[CALL_NREGS + fpu_index] + args[i] = hl_make_dyn(regs + CALL_NREGS + nextFpu, &hlt_f64); + nextFpu++; + } else { + // On stack + args[i] = hl_make_dyn(stack_args, &hlt_f64); + stack_args += 8; + } + } else { + // Integer/pointer argument + if (nextCpu < CALL_NREGS) { + // In CPU register + if (hl_is_dynamic(t)) { + args[i] = *(vdynamic**)(regs + nextCpu); + } else { + args[i] = hl_make_dyn(regs + nextCpu, t); + } + nextCpu++; + } else { + // On stack + if (hl_is_dynamic(t)) { + args[i] = *(vdynamic**)stack_args; + } else { + args[i] = hl_make_dyn(stack_args, t); + } + stack_args += 8; + } + } + } + return hl_dyn_call(c->wrappedFun, args, nargs); +} + +/** + * Wrapper for pointer-returning HL->C calls + */ +static void *jit_wrapper_ptr(vclosure_wrapper *c, char *stack_args, void **regs) { + vdynamic *ret = jit_wrapper_call(c, stack_args, regs); + hl_type *tret = c->cl.t->fun->ret; + switch (tret->kind) { + case HVOID: + return NULL; + case HUI8: + case HUI16: + case HI32: + case HBOOL: + return (void*)(int_val)hl_dyn_casti(&ret, &hlt_dyn, tret); + case HI64: + case HGUID: + return (void*)(int_val)hl_dyn_casti64(&ret, &hlt_dyn); + default: + return hl_dyn_castp(&ret, &hlt_dyn, tret); + } +} + +/** + * Wrapper for float-returning HL->C calls + */ +static double jit_wrapper_d(vclosure_wrapper *c, char *stack_args, void **regs) { + vdynamic *ret = jit_wrapper_call(c, stack_args, regs); + return hl_dyn_castd(&ret, &hlt_dyn); +} + +/** + * Select which register to use for an argument based on type and position. + * Returns register ID or -1 if should go on stack. + */ +static int select_call_reg_c2hl(int *nextCpu, int *nextFpu, hl_type *t) { + if (t->kind == HF32 || t->kind == HF64) { + if (*nextFpu < CALL_NREGS) + return RCPU_COUNT + (*nextFpu)++; // FPU register + return -1; // Stack + } else { + if (*nextCpu < CALL_NREGS) + return (*nextCpu)++; // CPU register + return -1; // Stack + } +} + +/** + * Get the stack size for a type + */ +static int stack_size_c2hl(hl_type *t) { + switch (t->kind) { + case HUI8: + case HBOOL: + return 1; + case HUI16: + return 2; + case HI32: + case HF32: + return 4; + default: + return 8; + } +} + +/** + * Callback function that prepares arguments and calls the JIT trampoline. + * Called from C code to invoke JIT-compiled functions. + */ +static void *callback_c2hl(void *_f, hl_type *t, void **args, vdynamic *ret) { + void **f = (void**)_f; + // Stack layout: + // [0..size) = stack args (pushed in reverse) + // [size..size+CALL_NREGS*8) = integer register args (X0-X7) + // [size+CALL_NREGS*8..size+CALL_NREGS*16) = FP register args (V0-V7) + unsigned char stack[MAX_ARGS * 16]; + int nextCpu = 0, nextFpu = 0; + int mappedRegs[MAX_ARGS]; + + // Zero-initialize the stack to avoid passing garbage to unused registers + // The jit_c2hl trampoline loads ALL 8 int + 8 FP registers unconditionally + memset(stack, 0, sizeof(stack)); + + if (t->fun->nargs > MAX_ARGS) + hl_error("Too many arguments for dynamic call"); + + // First pass: determine register assignments and stack size + int i, size = 0; + for (i = 0; i < t->fun->nargs; i++) { + hl_type *at = t->fun->args[i]; + int creg = select_call_reg_c2hl(&nextCpu, &nextFpu, at); + mappedRegs[i] = creg; + if (creg < 0) { + int tsize = stack_size_c2hl(at); + if (tsize < 8) tsize = 8; // Align to 8 bytes on stack + size += tsize; + } + } + + // Align stack size to 16 bytes + int pad = (-size) & 15; + size += pad; + + // Second pass: copy arguments to appropriate locations + int pos = 0; + for (i = 0; i < t->fun->nargs; i++) { + hl_type *at = t->fun->args[i]; + void *v = args[i]; + int creg = mappedRegs[i]; + void *store; + + if (creg >= 0) { + if (creg >= RCPU_COUNT) { + // FP register - stored after integer registers + store = stack + size + CALL_NREGS * 8 + (creg - RCPU_COUNT) * 8; + } else { + // Integer register + store = stack + size + creg * 8; + } + switch (at->kind) { + case HBOOL: + case HUI8: + *(int64*)store = *(unsigned char*)v; + break; + case HUI16: + *(int64*)store = *(unsigned short*)v; + break; + case HI32: + *(int64*)store = *(int*)v; + break; + case HF32: + *(double*)store = *(float*)v; + break; + case HF64: + *(double*)store = *(double*)v; + break; + case HI64: + case HGUID: + *(int64*)store = *(int64*)v; + break; + default: + *(void**)store = v; + break; + } + } else { + // Stack argument + store = stack + pos; + int tsize = 8; + switch (at->kind) { + case HBOOL: + case HUI8: + *(int64*)store = *(unsigned char*)v; + break; + case HUI16: + *(int64*)store = *(unsigned short*)v; + break; + case HI32: + case HF32: + *(int64*)store = *(int*)v; + break; + case HF64: + *(double*)store = *(double*)v; + break; + case HI64: + case HGUID: + *(int64*)store = *(int64*)v; + break; + default: + *(void**)store = v; + break; + } + pos += tsize; + } + } + + pos += pad; + pos >>= 3; // Convert to 64-bit units + + // Call the trampoline with: function pointer, reg args pointer, stack args end + switch (t->fun->ret->kind) { + case HUI8: + case HUI16: + case HI32: + case HBOOL: + ret->v.i = ((int (*)(void *, void *, void *))call_jit_c2hl)(*f, (void**)stack + pos, stack); + return &ret->v.i; + case HI64: + case HGUID: + ret->v.i64 = ((int64 (*)(void *, void *, void *))call_jit_c2hl)(*f, (void**)stack + pos, stack); + return &ret->v.i64; + case HF32: + ret->v.f = ((float (*)(void *, void *, void *))call_jit_c2hl)(*f, (void**)stack + pos, stack); + return &ret->v.f; + case HF64: + ret->v.d = ((double (*)(void *, void *, void *))call_jit_c2hl)(*f, (void**)stack + pos, stack); + return &ret->v.d; + default: + return ((void *(*)(void *, void *, void *))call_jit_c2hl)(*f, (void**)stack + pos, stack); + } +} + +/** + * Generate the HL-to-C trampoline. + * Called from C code with a vclosure_wrapper* in X0 and native args in X1-X7, V0-V7. + * Saves registers and calls jit_wrapper_ptr or jit_wrapper_d based on return type. + */ +static void jit_hl2c(jit_ctx *ctx) { + hl_type_fun *ft = NULL; + + // Function prologue - save frame + // STP X29, X30, [SP, #-16]! + encode_ldp_stp(ctx, 0x02, 0, 0x03, -2, LR, SP_REG, FP); + // MOV X29, SP + mov_reg_reg(ctx, FP, SP_REG, true); + + // Allocate space for saved registers: 8 CPU regs + 8 FP regs = 16 * 8 = 128 bytes + // SUB SP, SP, #128 + encode_add_sub_imm(ctx, 1, 1, 0, 0, 128, SP_REG, SP_REG); + + // Trampoline marker: MOV W17, #0xE001 (HL2C trampoline) + EMIT32(ctx, 0x52800011 | (0xE001 << 5)); + + // Save integer argument registers X0-X7 at [SP, #0..63] + for (int i = 0; i < CALL_NREGS; i++) { + encode_ldr_str_imm(ctx, 0x03, 0, 0x00, i, SP_REG, i); // STR Xi, [SP, #i*8] + } + + // Save FP argument registers V0-V7 at [SP, #64..127] + for (int i = 0; i < CALL_NREGS; i++) { + encode_ldr_str_imm(ctx, 0x03, 1, 0x00, 8 + i, SP_REG, i); // STR Di, [SP, #(8+i)*8] + } + + // X0 = closure pointer (vclosure_wrapper*) + // Check return type: closure->t->fun->ret->kind + // X9 = X0->t + encode_ldr_str_imm(ctx, 0x03, 0, 0x01, 0, X0, X9); + // X9 = X9->fun (hl_type->fun is at offset 8 on 64-bit) + encode_ldr_str_imm(ctx, 0x03, 0, 0x01, 1, X9, X9); + // X9 = X9->ret (hl_type_fun->ret offset) + int ret_offset = (int)(int_val)&ft->ret; + if (ret_offset < 4096 && (ret_offset % 8) == 0) { + encode_ldr_str_imm(ctx, 0x03, 0, 0x01, ret_offset / 8, X9, X9); + } else { + load_immediate(ctx, ret_offset, RTMP, true); + encode_ldr_str_reg(ctx, 0x03, 0, 0x01, RTMP, 0x03, 0, X9, X9); + } + // W9 = X9->kind (hl_type->kind is at offset 0, 32-bit) + encode_ldr_str_imm(ctx, 0x02, 0, 0x01, 0, X9, X9); + + // Compare with HF64 and HF32 + // CMP W9, #HF64 + encode_add_sub_imm(ctx, 0, 1, 1, 0, HF64, X9, XZR); + int jfloat1 = BUF_POS(); + encode_branch_cond(ctx, 0, COND_EQ); // B.EQ float_path + + // CMP W9, #HF32 + encode_add_sub_imm(ctx, 0, 1, 1, 0, HF32, X9, XZR); + int jfloat2 = BUF_POS(); + encode_branch_cond(ctx, 0, COND_EQ); // B.EQ float_path + + // Integer/pointer path: call jit_wrapper_ptr(closure, stack_args, regs) + // X0 = closure (reload from saved regs) + encode_ldr_str_imm(ctx, 0x03, 0, 0x01, 0, SP_REG, X0); + // X1 = stack_args (FP + 16 is return address area, args start after saved frame) + encode_add_sub_imm(ctx, 1, 0, 0, 0, 16, FP, X1); + // X2 = regs pointer (SP) + mov_reg_reg(ctx, X2, SP_REG, true); + + load_immediate(ctx, (int64_t)jit_wrapper_ptr, RTMP, true); + EMIT32(ctx, 0xD63F0000 | (RTMP << 5)); // BLR RTMP + + // Result in X0, jump to exit + int jexit = BUF_POS(); + encode_branch_uncond(ctx, 0); // B exit + + // Float path + int float_pos = BUF_POS(); + patch_jump(ctx, jfloat1, float_pos); + patch_jump(ctx, jfloat2, float_pos); + + // X0 = closure (reload) + encode_ldr_str_imm(ctx, 0x03, 0, 0x01, 0, SP_REG, X0); + // X1 = stack_args + encode_add_sub_imm(ctx, 1, 0, 0, 0, 16, FP, X1); + // X2 = regs pointer + mov_reg_reg(ctx, X2, SP_REG, true); + + load_immediate(ctx, (int64_t)jit_wrapper_d, RTMP, true); + EMIT32(ctx, 0xD63F0000 | (RTMP << 5)); // BLR RTMP + // Result in V0 + + // Exit path + int exit_pos = BUF_POS(); + patch_jump(ctx, jexit, exit_pos); + + // Restore frame and return + // MOV SP, X29 + mov_reg_reg(ctx, SP_REG, FP, true); + // LDP X29, X30, [SP], #16 + encode_ldp_stp(ctx, 0x02, 0, 0x01, 2, LR, SP_REG, FP); + // RET + encode_branch_reg(ctx, 0x02, LR); +} + +/** + * Generate the C-to-HL trampoline. + * Input: X0 = function pointer, X1 = reg args pointer, X2 = stack args end + * The trampoline loads arguments from the prepared stack and calls the function. + */ +static void jit_c2hl(jit_ctx *ctx) { + // Save callee-saved registers and set up frame + // STP X29, X30, [SP, #-16]! + encode_ldp_stp(ctx, 0x02, 0, 0x03, -2, LR, SP_REG, FP); + // MOV X29, SP + mov_reg_reg(ctx, FP, SP_REG, true); + + // Trampoline marker: MOV W17, #0xE002 (C2HL trampoline) + EMIT32(ctx, 0x52800011 | (0xE002 << 5)); + + // Save function pointer to X9 (caller-saved, will survive loads) + // MOV X9, X0 + mov_reg_reg(ctx, X9, X0, true); + + // Save stack args pointers to X10, X11 + // MOV X10, X1 (reg args pointer) + // MOV X11, X2 (stack args end) + mov_reg_reg(ctx, X10, X1, true); + mov_reg_reg(ctx, X11, X2, true); + + // Load integer register arguments X0-X7 from [X10] + for (int i = 0; i < CALL_NREGS; i++) { + // LDR Xi, [X10, #i*8] + encode_ldr_str_imm(ctx, 0x03, 0, 0x01, i, X10, CALL_REGS[i]); + } + + // Load FP register arguments V0-V7 from [X10 + CALL_NREGS*8] + for (int i = 0; i < CALL_NREGS; i++) { + // LDR Di, [X10, #(CALL_NREGS + i)*8] + // Using 64-bit FP load: size=11, opc=01 + EMIT32(ctx,0xFD400000 | (((CALL_NREGS + i) & 0x1FF) << 10) | (X10 << 5) | FP_CALL_REGS[i]); + } + + // Push stack args: loop from X11 to X10, pushing each 8-byte value + // Calculate how many stack args: (X10 - X11) / 8 + // Compare X10 and X11 + int loop_start = BUF_POS(); + // CMP X10, X11 + encode_add_sub_reg(ctx, 1, 1, 1, 0, X11, 0, X10, XZR); + + // B.EQ done (if X10 == X11, no more stack args) + int beq_pos = BUF_POS(); + EMIT32(ctx,0x54000000 | (COND_EQ & 0xF)); // B.EQ (will patch) + + // SUB X10, X10, #8 + encode_add_sub_imm(ctx, 1, 1, 0, 0, 8, X10, X10); + + // LDR X12, [X10] + encode_ldr_str_imm(ctx, 0x03, 0, 0x01, 0, X10, X12); + + // STR X12, [SP, #-16]! (push with pre-decrement, keeping 16-byte alignment) + // We'll push pairs to maintain alignment - but for simplicity, push 16 at a time + // SUB SP, SP, #16 + encode_add_sub_imm(ctx, 1, 1, 0, 0, 16, SP_REG, SP_REG); + // STR X12, [SP] + encode_ldr_str_imm(ctx, 0x03, 0, 0x00, 0, SP_REG, X12); + + // B loop_start + int b_offset = (loop_start - BUF_POS()) / 4; + EMIT32(ctx,0x14000000 | (b_offset & 0x3FFFFFF)); + + // Patch the B.EQ to jump here + int done_pos = BUF_POS(); + int beq_offset = (done_pos - beq_pos) / 4; + ctx->buf.w = (unsigned int*)(ctx->startBuf + beq_pos); + EMIT32(ctx,0x54000000 | ((beq_offset & 0x7FFFF) << 5) | (COND_EQ & 0xF)); + ctx->buf.w = (unsigned int*)(ctx->startBuf + done_pos); + + // Call the function: BLR X9 + EMIT32(ctx,0xD63F0000 | (X9 << 5)); + + // Restore frame and return + // MOV SP, X29 + mov_reg_reg(ctx, SP_REG, FP, true); + // LDP X29, X30, [SP], #16 + encode_ldp_stp(ctx, 0x02, 0, 0x01, 2, LR, SP_REG, FP); + // RET + encode_branch_reg(ctx, 0x02, LR); +} + +/** + * Get wrapper function for HL-to-C calls. + * This is used for callbacks from C code back into HashLink. + * Returns the jit_hl2c trampoline address. + */ +static void *get_wrapper(hl_type *t) { + return call_jit_hl2c; +} + +// ============================================================================ +// JIT API Implementation +// ============================================================================ + +// Forward declaration +static void hl_jit_init_module(jit_ctx *ctx, hl_module *m); + +jit_ctx *hl_jit_alloc() { + jit_ctx *ctx = (jit_ctx*)malloc(sizeof(jit_ctx)); + if (ctx == NULL) + return NULL; + memset(ctx, 0, sizeof(jit_ctx)); + return ctx; +} + +void hl_jit_free(jit_ctx *ctx, h_bool can_reset) { + if (ctx == NULL || ctx->freed) + return; + + // Mark as freed immediately to prevent double-free + ctx->freed = true; + + // Free and NULL each pointer atomically to prevent use-after-free window + if (ctx->startBuf) { + void *tmp = ctx->startBuf; + ctx->startBuf = NULL; + free(tmp); + } + if (ctx->vregs) { + void *tmp = ctx->vregs; + ctx->vregs = NULL; + free(tmp); + } + if (ctx->opsPos) { + void *tmp = ctx->opsPos; + ctx->opsPos = NULL; + free(tmp); + } + if (ctx->debug) { + void *tmp = ctx->debug; + ctx->debug = NULL; + free(tmp); + } + + // Clear remaining fields + ctx->buf.b = NULL; + ctx->bufSize = 0; + ctx->maxRegs = 0; + ctx->maxOps = 0; + ctx->calls = NULL; + // closure_list is managed by GC (allocated in falloc/galloc) + + // Free allocators before freeing ctx + hl_free(&ctx->falloc); + hl_free(&ctx->galloc); + + if (!can_reset) { +#ifdef GC_DEBUG + // Poison memory to catch use-after-free in debug builds + memset(ctx, 0xDD, sizeof(jit_ctx)); +#endif + free(ctx); + } +} + +void hl_jit_reset(jit_ctx *ctx, hl_module *m) { + ctx->freed = false; // Allow reuse after reset + ctx->debug = NULL; + hl_jit_init_module(ctx, m); +} + +/** + * Build a JIT helper function, ensuring buffer is allocated. + * Returns the position in the buffer where the function starts. + */ +static int jit_build(jit_ctx *ctx, void (*fbuild)(jit_ctx *)) { + int pos; + jit_buf(ctx); // Ensure buffer is allocated + pos = BUF_POS(); + fbuild(ctx); + return pos; +} + +/** + * Initialize module-specific data in JIT context. + */ +static void hl_jit_init_module(jit_ctx *ctx, hl_module *m) { + int i; + ctx->m = m; + ctx->closure_list = NULL; + + // Allocate debug info array if bytecode has debug info + if (m->code->hasdebug && m->code->nfunctions > 0) { + ctx->debug = (hl_debug_infos*)malloc(sizeof(hl_debug_infos) * m->code->nfunctions); + if (ctx->debug) + memset(ctx->debug, 0, sizeof(hl_debug_infos) * m->code->nfunctions); + } + + // Store float constants in the code buffer (like x86 does) + for (i = 0; i < m->code->nfloats; i++) { + jit_buf(ctx); + *ctx->buf.d++ = m->code->floats[i]; + } +} + +void hl_jit_init(jit_ctx *ctx, hl_module *m) { + hl_jit_init_module(ctx, m); + + // Generate C↔HL trampolines + ctx->c2hl = jit_build(ctx, jit_c2hl); + ctx->hl2c = jit_build(ctx, jit_hl2c); +} + +/** + * Allocate a static closure object. + * For native functions, the function pointer is set immediately. + * For JIT functions, the function pointer is stored temporarily as the findex + * and the closure is added to closure_list for later patching. + */ +static vclosure *alloc_static_closure(jit_ctx *ctx, int fid) { + hl_module *m = ctx->m; + vclosure *c = hl_malloc(&m->ctx.alloc, sizeof(vclosure)); + int fidx = m->functions_indexes[fid]; + c->hasValue = 0; + if (fidx >= m->code->nfunctions) { + // Native function - pointer is already resolved + c->t = m->code->natives[fidx - m->code->nfunctions].t; + c->fun = m->functions_ptrs[fid]; + c->value = NULL; + } else { + // JIT function - store fid temporarily, add to closure_list for patching + c->t = m->code->functions[fidx].type; + c->fun = (void*)(int_val)fid; + c->value = ctx->closure_list; + ctx->closure_list = c; + } + return c; +} + +int hl_jit_function(jit_ctx *ctx, hl_module *m, hl_function *f) { + int i, size = 0, opCount; + int codePos = BUF_POS(); + int nargs = f->type->fun->nargs; + unsigned short *debug16 = NULL; + int *debug32 = NULL; + + ctx->f = f; + ctx->m = m; + ctx->allocOffset = 0; + + // Allocate virtual register array if needed + if (f->nregs > ctx->maxRegs) { + free(ctx->vregs); + ctx->vregs = (vreg*)calloc(f->nregs + 1, sizeof(vreg)); + if (ctx->vregs == NULL) { + ctx->maxRegs = 0; + return -1; + } + ctx->maxRegs = f->nregs; + } + + // Allocate opcode position array if needed + if (f->nops > ctx->maxOps) { + free(ctx->opsPos); + ctx->opsPos = (int*)malloc(sizeof(int) * (f->nops + 1)); + if (ctx->opsPos == NULL) { + ctx->maxOps = 0; + return -1; + } + ctx->maxOps = f->nops; + } + + memset(ctx->opsPos, 0, (f->nops + 1) * sizeof(int)); + + // Clear/initialize physical registers + for (i = 0; i < RCPU_COUNT; i++) { + preg *p = &ctx->pregs[i]; + p->kind = RCPU; + p->id = i; + p->holds = NULL; + p->lock = 0; + } + for (i = 0; i < RFPU_COUNT; i++) { + preg *p = &ctx->pregs[RCPU_COUNT + i]; + p->kind = RFPU; + p->id = i; + p->holds = NULL; + p->lock = 0; + } + + // Initialize virtual registers + for (i = 0; i < f->nregs; i++) { + vreg *r = R(i); + r->t = f->regs[i]; + r->size = hl_type_size(r->t); + r->stackPos = 0; + r->current = NULL; + r->stack.holds = NULL; + r->stack.id = i; + r->stack.kind = RSTACK; + r->stack.lock = 0; + } + + // Calculate stack layout + // Arguments: first 8 integer args in X0-X7, first 8 FP args in V0-V7 + // Additional args on stack + size = 0; + int argsSize = 0; + int int_arg_count = 0; + int fp_arg_count = 0; + + for (i = 0; i < nargs; i++) { + vreg *r = R(i); + bool is_fp = IS_FLOAT(r); + int *arg_count = is_fp ? &fp_arg_count : &int_arg_count; + + if (*arg_count < CALL_NREGS) { + // Argument is in register - allocate stack space for it + size += r->size; + size += hl_pad_size(size, r->t); + r->stackPos = -size; + (*arg_count)++; + } else { + // Argument is on stack (caller's frame) + // +96 for saved callee-saved (64 bytes) + RTMP/RTMP2 (16 bytes) + FP/LR (16 bytes) + // Each stack arg occupies 8 bytes (matching caller's prepare_call_args) + r->stackPos = argsSize + 96; + argsSize += 8; + } + } + + // Local variables + for (i = nargs; i < f->nregs; i++) { + vreg *r = R(i); + size += r->size; + size += hl_pad_size(size, r->t); + r->stackPos = -size; + } + + // Align stack to 16 bytes + size += (-size) & 15; + ctx->totalRegsSize = size; + + jit_buf(ctx); + ctx->functionPos = BUF_POS(); + ctx->currentPos = 1; + + // Initialize Phase 2 callee-saved tracking + ctx->callee_saved_used = 0; + memset(ctx->stp_positions, 0, sizeof(ctx->stp_positions)); + memset(ctx->ldp_positions, 0, sizeof(ctx->ldp_positions)); + + // Function prologue - offset-based for selective NOP patching (Phase 2) + // Reserve space for callee-saved (64 bytes) + RTMP/RTMP2 (16 bytes) + FP/LR (16 bytes) = 96 bytes + encode_add_sub_imm(ctx, 1, 1, 0, 0, 96, SP_REG, SP_REG); // SUB SP, SP, #96 + + // Save RTMP/RTMP2 (X27, X28) - NOT NOPpable as they are used internally by JIT + stp_offset(ctx, RTMP, RTMP2, SP_REG, 80); // STP X27, X28, [SP, #80] + + // Save callee-saved at fixed offsets (NOPpable) - positions recorded for backpatching + ctx->stp_positions[0] = BUF_POS(); + stp_offset(ctx, X25, X26, SP_REG, 64); // STP X25, X26, [SP, #64] + + ctx->stp_positions[1] = BUF_POS(); + stp_offset(ctx, X23, X24, SP_REG, 48); // STP X23, X24, [SP, #48] + + ctx->stp_positions[2] = BUF_POS(); + stp_offset(ctx, X21, X22, SP_REG, 32); // STP X21, X22, [SP, #32] + + ctx->stp_positions[3] = BUF_POS(); + stp_offset(ctx, X19, X20, SP_REG, 16); // STP X19, X20, [SP, #16] + + // Save FP/LR at bottom (NOT NOPpable - always needed) + stp_offset(ctx, FP, LR, SP_REG, 0); // STP X29, X30, [SP, #0] + + // MOV X29, SP ; Set frame pointer (points to saved FP/LR) + mov_reg_reg(ctx, FP, SP_REG, true); + + // SUB SP, SP, #size ; Allocate stack space + if (size > 0) { + if (size < 4096) { + encode_add_sub_imm(ctx, 1, 1, 0, 0, size, SP_REG, SP_REG); + } else { + // Large stack frame - use multiple instructions + // Must use extended register form (UXTX) for SP, not shifted register + load_immediate(ctx, size, RTMP, true); + encode_add_sub_ext(ctx, 1, 1, 0, RTMP, 3, 0, SP_REG, SP_REG); // SUB SP, SP, RTMP, UXTX + } + } + + // Function marker: MOV W17, #(0xF000 | (findex & 0xFFF)) ; MOVK W17, #(findex >> 12), LSL #16 + // This encodes as 0xFnnnnnnn where nnnnnnn is the function index + // Distinguishes from opcode markers which are smaller numbers + { + int findex = f->findex; + int low12 = 0xF000 | (findex & 0xFFF); + int high = (findex >> 12) & 0xFFFF; + // MOV W17, #low12 + EMIT32(ctx, 0x52800011 | (low12 << 5)); + if (high != 0) { + // MOVK W17, #high, LSL #16 + EMIT32(ctx, 0x72A00011 | (high << 5)); + } + } + + // Store register arguments to their stack locations FIRST + // (before we clobber the argument registers with zero-init) + int_arg_count = 0; + fp_arg_count = 0; + for (i = 0; i < nargs && i < f->nregs; i++) { + vreg *r = R(i); + bool is_fp = IS_FLOAT(r); + int *arg_count = is_fp ? &fp_arg_count : &int_arg_count; + + if (*arg_count < CALL_NREGS) { + // This arg was in a register - store it to stack + // Skip void arguments (size 0) - they don't need storage + // but still consume a call register slot + if (r->size > 0) { + if (is_fp) { + str_stack_fp(ctx, FP_CALL_REGS[fp_arg_count], r->stackPos, r->size); + } else { + str_stack(ctx, CALL_REGS[int_arg_count], r->stackPos, r->size); + } + } + (*arg_count)++; + } + } + + // Zero-initialize local variables on stack (not arguments) + // This ensures reading unassigned locals returns null/0 + if (f->nregs > nargs) { + // Store zeros to each local variable slot using XZR + for (i = nargs; i < f->nregs; i++) { + vreg *r = R(i); + if (r->size > 0 && r->stackPos < 0) { + // Use str_stack with XZR as source - efficient and handles all offsets + if (r->size != 1 && r->size != 2 && r->size != 4 && r->size != 8) { + JIT_ASSERT(0); + } + str_stack(ctx, XZR, r->stackPos, r->size); + } + } + } + + ctx->opsPos[0] = BUF_POS(); + + // Initialize debug offset tracking + if (ctx->m->code->hasdebug) { + debug16 = (unsigned short*)malloc(sizeof(unsigned short) * (f->nops + 1)); + debug16[0] = (unsigned short)(BUF_POS() - codePos); + } + + // Main opcode translation loop + for (opCount = 0; opCount < f->nops; opCount++) { + hl_opcode *o = f->ops + opCount; + vreg *dst = R(o->p1); + vreg *ra = R(o->p2); + vreg *rb = R(o->p3); + + ctx->currentPos = opCount + 1; + jit_buf(ctx); + + // Emit opcode marker for debugging: MOV W17, #(opcode | (opCount << 8)) + // W17 is IP1, a scratch register. This encodes both the opcode type and index. + { + int marker = (o->op & 0xFF) | ((opCount & 0xFF) << 8); + EMIT32(ctx, 0x52800011 | ((marker & 0xFFFF) << 5)); // MOV W17, #marker + } + + // Before a label (merge point), spill dirty registers for fallthrough path. + // After spilling, update the label position so jumps bypass the spill code. + // discard_regs() in op_label just clears bindings (no code). + if (o->op == OLabel) { + spill_regs(ctx); + // Update label position AFTER spill - jumps should target here, + // not before the spill (which is only for fallthrough path) + ctx->opsPos[opCount] = BUF_POS(); + } + + // Emit code based on opcode + switch (o->op) { + case OMov: + case OUnsafeCast: + op_mov(ctx, dst, ra); + break; + + case OInt: + store_const(ctx, dst, m->code->ints[o->p2]); + break; + + case OBool: + store_const(ctx, dst, o->p2); + break; + + case ONull: + // Set register to NULL (0) + store_const(ctx, dst, 0); + break; + + case OFloat: { + // Load float constant from module + // Float constants are stored at the start of the code buffer (offset o->p2 * 8) + double float_val = m->code->floats[o->p2]; + preg *dst_reg = alloc_fpu(ctx); + + if (float_val == 0.0) { + // Zero out FP register: FMOV Dd, XZR + // FMOV Dd, XZR: sf=1, S=0, type=01, rmode=00, opcode=000111, Rn=31, Rd + EMIT32(ctx, (1 << 31) | (0 << 29) | (0x1E << 24) | (1 << 22) | (1 << 21) | (7 << 16) | (31 << 5) | dst_reg->id); + } else { + // Float constants are at the start of the code buffer + // Calculate PC-relative offset from current position to the float data + int float_offset = o->p2 * 8; // Offset from start of code buffer + int cur_pos = BUF_POS(); // Current position in code buffer + int pc_offset = float_offset - cur_pos; // PC-relative offset + + // LDR Dt,