diff --git a/hl.vcxproj b/hl.vcxproj index 88e95b28b..791c08677 100644 --- a/hl.vcxproj +++ b/hl.vcxproj @@ -45,55 +45,55 @@ Application true Unicode - v142 + v143 Application true Unicode - v142 + v143 Application false true Unicode - v142 + v143 Application false true Unicode - v142 + v143 Application false true Unicode - v120 + v143 Application false true Unicode - v142 + v143 Application false true Unicode - v142 + v143 Application false true Unicode - v120 + v143 @@ -361,14 +361,18 @@ + + + + diff --git a/hl.vcxproj.filters b/hl.vcxproj.filters index f86723996..9e66b8869 100644 --- a/hl.vcxproj.filters +++ b/hl.vcxproj.filters @@ -4,14 +4,18 @@ - + + + + + \ No newline at end of file diff --git a/libhl.vcxproj b/libhl.vcxproj index 40f1a2eff..1f86fe1a7 100644 --- a/libhl.vcxproj +++ b/libhl.vcxproj @@ -36,40 +36,40 @@ DynamicLibrary true - v142 + v143 Unicode DynamicLibrary false - v142 + v143 true Unicode DynamicLibrary false - v120 + v143 true Unicode DynamicLibrary true - v142 + v143 Unicode DynamicLibrary false - v142 + v143 true Unicode DynamicLibrary false - v120 + v143 true Unicode diff --git a/src/data_struct.c b/src/data_struct.c new file mode 100644 index 000000000..12a466216 --- /dev/null +++ b/src/data_struct.c @@ -0,0 +1,231 @@ +/* + * Copyright (C)2015-2026 Haxe Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ +#ifdef S_TYPE + +// is included by data_struct.h + +#ifdef S_MAP +# define S_ARGS S_KEY k, S_VALUE v +#else +# define S_ARGS S_VALUE k +# define S_KEY S_VALUE +# define keys values +#endif + +#ifndef S_DEFVAL +# define S_DEFVAL (S_VALUE)0 +#endif + +#ifndef S_CMP +# define S_CMP(a,b) a > b +#endif + +typedef struct { + int cur; + int max; + S_KEY *keys; +# ifdef S_MAP + S_VALUE *values; +# endif +} S_TYPE; + +typedef S_VALUE S_NAME(_value); + +INLINE static void S_NAME(check_size)( hl_alloc *alloc, S_TYPE *st ) { + if( st->cur == st->max ) { + int n = st->max ? (st->max << 1) : STRUCT_DEF_SIZE; + S_KEY *keys = (S_KEY*)hl_malloc(alloc,sizeof(S_KEY) * n); + memcpy(keys,st->keys,sizeof(S_KEY) * st->cur); + st->keys = keys; +# ifdef S_MAP + S_VALUE *vals = (S_VALUE*)hl_malloc(alloc,sizeof(S_VALUE) * n); + memcpy(vals,st->values,sizeof(S_VALUE) * st->cur); + st->values = vals; +# endif + st->max = n; + } +} + +#ifndef S_SORTED + +INLINE static void S_NAME(add_impl)( hl_alloc *alloc, S_TYPE *st, S_ARGS ) { + S_NAME(check_size)(alloc,st); + st->keys[st->cur] = k; +# ifdef S_MAP + st->values[st->cur] = v; +# endif + st->cur++; +} + +INLINE static bool S_NAME(exists)( S_TYPE st, S_KEY k ) { + for(int i=0;icur; + int pos; + while( min < max ) { + int mid = (min + max) >> 1; + S_KEY k2 = st->keys[mid]; + if( S_CMP(k,k2) ) min = mid + 1; else if( S_CMP(k2,k) ) max = mid; else return false; + } + S_NAME(check_size)(alloc,st); + pos = (min + max) >> 1; + memmove(st->keys + pos + 1, st->keys + pos, (st->cur - pos) * sizeof(S_KEY)); +# ifdef S_MAP + memmove(st->values + pos + 1, st->values + pos, (st->cur - pos) * sizeof(S_VALUE)); +# endif + st->keys[pos] = k; +# ifdef S_MAP + st->values[pos] = v; +# endif + st->cur++; + return true; +} + +#ifdef S_MAP +INLINE static void S_NAME(replace_impl)( hl_alloc *alloc, S_TYPE *st, S_ARGS ) { + int min = 0; + int max = st->cur; + int pos; + while( min < max ) { + int mid = (min + max) >> 1; + S_KEY k2 = st->keys[mid]; + if( k2 < k ) min = mid + 1; else if( k2 > k ) max = mid; else { + st->values[mid] = v; + return; + } + } + S_NAME(check_size)(alloc,st); + pos = (min + max) >> 1; + memmove(st->keys + pos + 1, st->keys + pos, (st->cur - pos) * sizeof(S_KEY)); + memmove(st->values + pos + 1, st->values + pos, (st->cur - pos) * sizeof(S_VALUE)); + st->keys[pos] = k; + st->values[pos] = v; + st->cur++; +} +#endif + +INLINE static bool S_NAME(exists)( S_TYPE st, S_KEY k ) { + int min = 0; + int max = st.cur; + while( min < max ) { + int mid = (min + max) >> 1; + S_KEY k2 = st.keys[mid]; + if( S_CMP(k,k2) ) min = mid + 1; else if( S_CMP(k2,k) ) max = mid; else return true; + } + return false; +} + +#ifdef S_MAP +INLINE static S_VALUE S_NAME(find)( S_TYPE st, S_KEY k ) { + int min = 0; + int max = st.cur; + while( min < max ) { + int mid = (min + max) >> 1; + S_KEY k2 = st.keys[mid]; + if( k2 < k ) min = mid + 1; else if( k2 > k ) max = mid; else return st.values[mid]; + } + return S_DEFVAL; +} +#endif + +INLINE static bool S_NAME(remove)( S_TYPE *st, S_KEY k ) { + int min = 0; + int max = st->cur; + while( min < max ) { + int mid = (min + max) >> 1; + S_KEY k2 = st->keys[mid]; + if( S_CMP(k,k2) ) min = mid + 1; else if( S_CMP(k2,k) ) max = mid; else { + int pos = mid; + memmove(st->keys + pos, st->keys + pos + 1, (st->cur - pos - 1) * sizeof(S_KEY)); +# ifdef S_MAP + memmove(st->values + pos, st->values + pos + 1, (st->cur - pos - 1) * sizeof(S_VALUE)); +# endif + st->cur--; + return true; + } + } + return false; +} + +#endif + +INLINE static void S_NAME(reset)( S_TYPE *st ) { + st->cur = 0; +} + +INLINE static S_VALUE *S_NAME(free)( S_TYPE *st ) { + st->cur = 0; + st->max = 0; + S_VALUE *vals = st->values; +# ifdef S_MAP + st->keys = NULL; +# endif + st->values = NULL; + return vals; +} + +INLINE static int S_NAME(count)( S_TYPE st ) { + return st.cur; +} + +INLINE static S_VALUE S_NAME(get)( S_TYPE st, int idx ) { + return st.values[idx]; +} + +INLINE static S_VALUE S_NAME(first)( S_TYPE st ) { + return st.cur == 0 ? S_DEFVAL : st.values[0]; +} + +INLINE static bool S_NAME(iter_next)( S_TYPE st, S_VALUE *val, int idx ) { + if( idx < st.cur ) *val = st.values[idx]; + return idx < st.cur; +} + +#undef S_NAME +#undef S_TYPE +#undef S_VALUE +#undef S_KEY +#undef S_ARGS +#undef STRUCT_NAME +#undef S_CMP +#undef S_DEFVAL +#undef keys + +#endif diff --git a/src/data_struct.h b/src/data_struct.h new file mode 100644 index 000000000..1bd52c652 --- /dev/null +++ b/src/data_struct.h @@ -0,0 +1,79 @@ +/* + * Copyright (C)2015-2026 Haxe Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ +#ifndef HL_DATA_STRUCT_H +#define HL_DATA_STRUCT_H + +#include + +#if defined(__GNUC__) || defined(__clang__) +#define INLINE inline __attribute__((always_inline)) +#elif defined(_MSC_VER) +#define INLINE __forceinline +#else +#define INLINE inline +#endif + +#define STRUCT_DEF_SIZE 2 +#define for_iter(name,var,set) name##__value var; for(int __idx=0;name##_iter_next(set,&var,__idx);__idx++) + +#define S_TYPE ptr_set +#define S_NAME(name) ptr_set_##name +#define S_VALUE void* +#include "data_struct.c" +#define ptr_set_add(set,v) ptr_set_add_impl(DEF_ALLOC,&(set),v) + +#define S_TYPE int_arr +#define S_NAME(name) int_arr_##name +#define S_VALUE int +#include "data_struct.c" +#define int_arr_add(set,v) int_arr_add_impl(DEF_ALLOC,&(set),v) + +#define S_SORTED + +#define S_TYPE int_set +#define S_NAME(name) int_set_##name +#define S_VALUE int +#include "data_struct.c" +#define int_set_add(set,v) int_set_add_impl(DEF_ALLOC,&(set),v) + +#define S_MAP + +#define S_TYPE int_map +#define S_NAME(name) int_map_##name +#define S_KEY int +#define S_VALUE int +#include "data_struct.c" +#define int_map_add(map,k,v) int_map_add_impl(DEF_ALLOC,&(map),k,v) +#define int_map_replace(map,k,v) int_map_replace_impl(DEF_ALLOC,&(map),k,v) + +#define S_TYPE ptr_map +#define S_NAME(name) ptr_map_##name +#define S_KEY int +#define S_VALUE void* +#include "data_struct.c" +#define ptr_map_add(map,k,v) ptr_map_add_impl(DEF_ALLOC,&(map),k,v) +#define ptr_map_replace(map,k,v) ptr_map_replace_impl(DEF_ALLOC,&(map),k,v) + +#undef S_MAP +#undef S_SORTED + +#endif diff --git a/src/hlmodule.h b/src/hlmodule.h index d8ea8c912..01ab8be2e 100644 --- a/src/hlmodule.h +++ b/src/hlmodule.h @@ -19,6 +19,9 @@ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. */ +#ifndef HL_MODULE_H +#define HL_MODULE_H + #include #include #include "opcodes.h" @@ -104,9 +107,6 @@ typedef struct { bool large; } hl_debug_infos; -typedef struct _jit_ctx jit_ctx; - - typedef struct { hl_code *code; int *types_hashes; @@ -120,6 +120,8 @@ typedef struct { #define WIN64_UNWIND_TABLES #endif +typedef struct _jit_ctx jit_ctx; + typedef struct { hl_code *code; int codesize; @@ -161,10 +163,4 @@ hl_type *hl_module_resolve_type( hl_module *m, hl_type *t, bool err ); void hl_profile_setup( int sample_count ); void hl_profile_end(); -jit_ctx *hl_jit_alloc(); -void hl_jit_free( jit_ctx *ctx, h_bool can_reset ); -void hl_jit_reset( jit_ctx *ctx, hl_module *m ); -void hl_jit_init( jit_ctx *ctx, hl_module *m ); -int hl_jit_function( jit_ctx *ctx, hl_module *m, hl_function *f ); -void *hl_jit_code( jit_ctx *ctx, hl_module *m, int *codesize, hl_debug_infos **debug, hl_module *previous ); -void hl_jit_patch_method( void *old_fun, void **new_fun_table ); +#endif diff --git a/src/jit.c b/src/jit.c index 7e4e6e88b..19e8a233c 100644 --- a/src/jit.c +++ b/src/jit.c @@ -19,4712 +19,94 @@ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. */ -#ifdef _MSC_VER -#pragma warning(disable:4820) -#endif -#include -#include -#include "hlsystem.h" +#include -#ifdef __arm__ -# error "JIT does not support ARM processors, only x86 and x86-64 are supported, please use HashLink/C native compilation instead" -#endif +static jit_ctx *current_ctx = NULL; -#ifdef HL_DEBUG -# define JIT_DEBUG -#endif - -typedef enum { - Eax = 0, - Ecx = 1, - Edx = 2, - Ebx = 3, - Esp = 4, - Ebp = 5, - Esi = 6, - Edi = 7, -#ifdef HL_64 - R8 = 8, - R9 = 9, - R10 = 10, - R11 = 11, - R12 = 12, - R13 = 13, - R14 = 14, - R15 = 15, -#endif - _LAST = 0xFF -} CpuReg; - -typedef enum { - MOV, - LEA, - PUSH, - ADD, - SUB, - IMUL, // only overflow flag changes compared to MUL - DIV, - IDIV, - CDQ, - CDQE, - POP, - RET, - CALL, - AND, - OR, - XOR, - CMP, - TEST, - NOP, - SHL, - SHR, - SAR, - INC, - DEC, - JMP, - // FPU - FSTP, - FSTP32, - FLD, - FLD32, - FLDCW, - // SSE - MOVSD, - MOVSS, - COMISD, - COMISS, - ADDSD, - SUBSD, - MULSD, - DIVSD, - ADDSS, - SUBSS, - MULSS, - DIVSS, - XORPD, - CVTSI2SD, - CVTSI2SS, - CVTSD2SI, - CVTSD2SS, - CVTSS2SD, - CVTSS2SI, - STMXCSR, - LDMXCSR, - // 8-16 bits - MOV8, - CMP8, - TEST8, - PUSH8, - MOV16, - CMP16, - TEST16, - // prefetchs - PREFETCHT0, - PREFETCHT1, - PREFETCHT2, - PREFETCHNTA, - PREFETCHW, - // -- - _CPU_LAST -} CpuOp; - -#define JAlways 0 -#define JOverflow 0x80 -#define JULt 0x82 -#define JUGte 0x83 -#define JEq 0x84 -#define JNeq 0x85 -#define JULte 0x86 -#define JUGt 0x87 -#define JParity 0x8A -#define JNParity 0x8B -#define JSLt 0x8C -#define JSGte 0x8D -#define JSLte 0x8E -#define JSGt 0x8F - -#define JCarry JLt -#define JZero JEq -#define JNotZero JNeq - -#define B(bv) *ctx->buf.b++ = (unsigned char)(bv) -#define W(wv) *ctx->buf.w++ = wv - -#ifdef HL_64 -# define W64(wv) *ctx->buf.w64++ = wv -#else -# define W64(wv) W(wv) -#endif - -static const int SIB_MULT[] = {-1, 0, 1, -1, 2, -1, -1, -1, 3}; - -#define MOD_RM(mod,reg,rm) B(((mod) << 6) | (((reg)&7) << 3) | ((rm)&7)) -#define SIB(mult,rmult,rbase) B((SIB_MULT[mult]<<6) | (((rmult)&7)<<3) | ((rbase)&7)) -#define IS_SBYTE(c) ( (c) >= -128 && (c) < 128 ) - -#define AddJump(how,local) { if( (how) == JAlways ) { B(0xE9); } else { B(0x0F); B(how); }; local = BUF_POS(); W(0); } -#define AddJump_small(how,local) { if( (how) == JAlways ) { B(0xEB); } else B(how - 0x10); local = BUF_POS() | 0x40000000; B(0); } -#define XJump(how,local) AddJump(how,local) -#define XJump_small(how,local) AddJump_small(how,local) - -#define MAX_OP_SIZE 256 - -#define BUF_POS() ((int)(ctx->buf.b - ctx->startBuf)) -#define RTYPE(r) r->t->kind - -#ifdef HL_64 -# define RESERVE_ADDRESS 0x8000000000000000 -#else -# define RESERVE_ADDRESS 0x80000000 -#endif - -#if defined(HL_WIN_CALL) && defined(HL_64) -# define IS_WINCALL64 1 -#else -# define IS_WINCALL64 0 -#endif - -typedef struct jlist jlist; -struct jlist { - int pos; - int target; - jlist *next; -}; - -typedef struct vreg vreg; - -typedef enum { - RCPU = 0, - RFPU = 1, - RSTACK = 2, - RCONST = 3, - RADDR = 4, - RMEM = 5, - RUNUSED = 6, - RCPU_CALL = 1 | 8, - RCPU_8BITS = 1 | 16 -} preg_kind; - -typedef struct { - preg_kind kind; - int id; - int lock; - vreg *holds; -} preg; - -struct vreg { - int stackPos; - int size; - hl_type *t; - preg *current; - preg stack; -}; - -#define REG_AT(i) (ctx->pregs + (i)) - -#ifdef HL_64 -# define RCPU_COUNT 16 -# define RFPU_COUNT 16 -# ifdef HL_WIN_CALL -# define CALL_NREGS 4 -# define RCPU_SCRATCH_COUNT 7 -# define RFPU_SCRATCH_COUNT 6 -static const int RCPU_SCRATCH_REGS[] = { Eax, Ecx, Edx, R8, R9, R10, R11 }; -static const CpuReg CALL_REGS[] = { Ecx, Edx, R8, R9 }; -# else -# define CALL_NREGS 6 // TODO : XMM6+XMM7 are FPU reg parameters -# define RCPU_SCRATCH_COUNT 9 -# define RFPU_SCRATCH_COUNT 16 -static const int RCPU_SCRATCH_REGS[] = { Eax, Ecx, Edx, Esi, Edi, R8, R9, R10, R11 }; -static const CpuReg CALL_REGS[] = { Edi, Esi, Edx, Ecx, R8, R9 }; -# endif -#else -# define CALL_NREGS 0 -# define RCPU_COUNT 8 -# define RFPU_COUNT 8 -# define RCPU_SCRATCH_COUNT 3 -# define RFPU_SCRATCH_COUNT 8 -static const int RCPU_SCRATCH_REGS[] = { Eax, Ecx, Edx }; -#endif - -#define XMM(i) ((i) + RCPU_COUNT) -#define PXMM(i) REG_AT(XMM(i)) -#define REG_IS_FPU(i) ((i) >= RCPU_COUNT) - -#define PEAX REG_AT(Eax) -#define PESP REG_AT(Esp) -#define PEBP REG_AT(Ebp) - -#define REG_COUNT (RCPU_COUNT + RFPU_COUNT) - -#define ID2(a,b) ((a) | ((b)<<8)) -#define R(id) (ctx->vregs + (id)) -#define ASSERT(i) { printf("JIT ERROR %d (jit.c line %d)\n",i,(int)__LINE__); jit_exit(); } -#define IS_FLOAT(r) ((r)->t->kind == HF64 || (r)->t->kind == HF32) -#define RLOCK(r) if( (r)->lock < ctx->currentPos ) (r)->lock = ctx->currentPos -#define RUNLOCK(r) if( (r)->lock == ctx->currentPos ) (r)->lock = 0 - -#define BREAK() B(0xCC) - -static preg _unused = { RUNUSED, 0, 0, NULL }; -static preg *UNUSED = &_unused; - -struct _jit_ctx { - union { - unsigned char *b; - unsigned int *w; - unsigned long long *w64; - int *i; - double *d; - } buf; - vreg *vregs; - preg pregs[REG_COUNT]; - vreg *savedRegs[REG_COUNT]; - int savedLocks[REG_COUNT]; - int *opsPos; - int maxRegs; - int maxOps; - int bufSize; - int totalRegsSize; - int functionPos; - int allocOffset; - int currentPos; - int nativeArgsCount; - unsigned char *startBuf; - hl_module *m; - hl_function *f; - jlist *jumps; - jlist *calls; - jlist *switchs; - hl_alloc falloc; // cleared per-function - hl_alloc galloc; - vclosure *closure_list; - hl_debug_infos *debug; - int c2hl; - int hl2c; - void *static_functions[8]; - bool static_function_offset; -#ifdef WIN64_UNWIND_TABLES - int unwind_offset; - int nunwind; - PRUNTIME_FUNCTION unwind_table; -#endif -}; - -#ifdef WIN64_UNWIND_TABLES - -typedef enum _UNWIND_OP_CODES -{ - UWOP_PUSH_NONVOL = 0, /* info == register number */ - UWOP_ALLOC_LARGE, /* no info, alloc size in next 2 slots */ - UWOP_ALLOC_SMALL, /* info == size of allocation / 8 - 1 */ - UWOP_SET_FPREG, /* no info, FP = RSP + UNWIND_INFO.FPRegOffset*16 */ - UWOP_SAVE_NONVOL, /* info == register number, offset in next slot */ - UWOP_SAVE_NONVOL_FAR, /* info == register number, offset in next 2 slots */ - UWOP_SAVE_XMM128 = 8, /* info == XMM reg number, offset in next slot */ - UWOP_SAVE_XMM128_FAR, /* info == XMM reg number, offset in next 2 slots */ - UWOP_PUSH_MACHFRAME /* info == 0: no error-code, 1: error-code */ -} UNWIND_CODE_OPS; - -void write_uwcode(jit_ctx *ctx, unsigned char offset, UNWIND_CODE_OPS code, unsigned char info) -{ - B(offset); - B((code) | (info) << 4); -} - -void write_unwind_data(jit_ctx *ctx) -{ - // All generated functions use a frame pointer, so the same unwind info can be used for all of them - unsigned char version = 1; - unsigned char flags = 0; - unsigned char CountOfCodes = 2; - unsigned char SizeOfProlog = 4; - unsigned char FrameRegister = 5; // RBP - unsigned char FrameOffset = 0; - B((version) | (flags) << 3); - B(SizeOfProlog); - B(CountOfCodes); - B((FrameRegister) | (FrameOffset) << 4); - write_uwcode(ctx, 4, UWOP_SET_FPREG, 0); - write_uwcode(ctx, 1, UWOP_PUSH_NONVOL, 5); -} -#endif - -#define jit_exit() { hl_debug_break(); exit(-1); } -#define jit_error(msg) _jit_error(ctx,msg,__LINE__) - -#ifndef HL_64 -# ifdef HL_DEBUG -# define error_i64() jit_error("i64-32") -# else -void error_i64() { - printf("The module you are loading is using 64 bit ints that are not supported by the HL32.\nPlease run using HL64 or compile with -D hl-legacy32"); - jit_exit(); -} -# endif -#endif - -static void _jit_error( jit_ctx *ctx, const char *msg, int line ); -static void on_jit_error( const char *msg, int_val line ); - -static preg *pmem( preg *r, CpuReg reg, int offset ) { - r->kind = RMEM; - r->id = 0 | (reg << 4) | (offset << 8); - return r; -} - -static preg *pmem2( preg *r, CpuReg reg, CpuReg reg2, int mult, int offset ) { - r->kind = RMEM; - r->id = mult | (reg << 4) | (reg2 << 8); - r->holds = (void*)(int_val)offset; - return r; -} - -#ifdef HL_64 -static preg *pcodeaddr( preg *r, int offset ) { - r->kind = RMEM; - r->id = 15 | (offset << 4); - return r; -} -#endif - -static preg *pconst( preg *r, int c ) { - r->kind = RCONST; - r->holds = NULL; - r->id = c; - return r; -} - -static preg *pconst64( preg *r, int_val c ) { -#ifdef HL_64 - if( ((int)c) == c ) - return pconst(r,(int)c); - r->kind = RCONST; - r->id = 0xC064C064; - r->holds = (vreg*)c; - return r; -#else - return pconst(r,(int)c); -#endif -} - -#ifndef HL_64 -// it is not possible to access direct 64 bit address in x86-64 -static preg *paddr( preg *r, void *p ) { - r->kind = RADDR; - r->holds = (vreg*)p; - return r; -} -#endif - -static void save_regs( jit_ctx *ctx ) { - int i; - for(i=0;isavedRegs[i] = ctx->pregs[i].holds; - ctx->savedLocks[i] = ctx->pregs[i].lock; - } -} - -static void restore_regs( jit_ctx *ctx ) { - int i; - for(i=0;imaxRegs;i++) - ctx->vregs[i].current = NULL; - for(i=0;isavedRegs[i]; - preg *p = ctx->pregs + i; - p->holds = r; - p->lock = ctx->savedLocks[i]; - if( r ) r->current = p; - } -} - -static void jit_buf( jit_ctx *ctx ) { - if( BUF_POS() > ctx->bufSize - MAX_OP_SIZE ) { - int nsize = ctx->bufSize * 4 / 3; - unsigned char *nbuf; - int curpos; - if( nsize == 0 ) { - int i; - for(i=0;im->code->nfunctions;i++) - nsize += ctx->m->code->functions[i].nops; - nsize *= 4; - } - if( nsize < ctx->bufSize + MAX_OP_SIZE * 4 ) nsize = ctx->bufSize + MAX_OP_SIZE * 4; - curpos = BUF_POS(); - nbuf = (unsigned char*)malloc(nsize); - if( nbuf == NULL ) ASSERT(nsize); - if( ctx->startBuf ) { - memcpy(nbuf,ctx->startBuf,curpos); - free(ctx->startBuf); - } - ctx->startBuf = nbuf; - ctx->buf.b = nbuf + curpos; - ctx->bufSize = nsize; - } -} - -static const char *KNAMES[] = { "cpu","fpu","stack","const","addr","mem","unused" }; -#define ERRIF(c) if( c ) { printf("%s(%s,%s)\n",f?f->name:"???",KNAMES[a->kind], KNAMES[b->kind]); ASSERT(0); } - -typedef struct { - const char *name; // single operand - int r_mem; // r32 / r/m32 r32 - int mem_r; // r/m32 / r32 r/m32 - int r_const; // r32 / imm32 imm32 - int r_i8; // r32 / imm8 imm8 - int mem_const; // r/m32 / imm32 N/A -} opform; - -#define FLAG_LONGOP 0x80000000 -#define FLAG_16B 0x40000000 -#define FLAG_8B 0x20000000 -#define FLAG_DUAL 0x10000000 - -#define RM(op,id) ((op) | (((id)+1)<<8)) -#define GET_RM(op) (((op) >> ((op) < 0 ? 24 : 8)) & 15) -#define SBYTE(op) ((op) << 16) -#define LONG_OP(op) ((op) | FLAG_LONGOP) -#define OP16(op) LONG_OP((op) | FLAG_16B) -#define LONG_RM(op,id) LONG_OP(op | (((id) + 1) << 24)) - -static opform OP_FORMS[_CPU_LAST] = { - { "MOV", 0x8B, 0x89, 0xB8, 0, RM(0xC7,0) }, - { "LEA", 0x8D }, - { "PUSH", 0x50, RM(0xFF,6), 0x68, 0x6A }, - { "ADD", 0x03, 0x01, RM(0x81,0), RM(0x83,0) }, - { "SUB", 0x2B, 0x29, RM(0x81,5), RM(0x83,5) }, - { "IMUL", LONG_OP(0x0FAF), 0, 0x69 | FLAG_DUAL, 0x6B | FLAG_DUAL }, - { "DIV", RM(0xF7,6), RM(0xF7,6) }, - { "IDIV", RM(0xF7,7), RM(0xF7,7) }, - { "CDQ", 0x99 }, - { "CDQE", 0x98 }, - { "POP", 0x58, RM(0x8F,0) }, - { "RET", 0xC3 }, - { "CALL", RM(0xFF,2), RM(0xFF,2), 0xE8 }, - { "AND", 0x23, 0x21, RM(0x81,4), RM(0x83,4) }, - { "OR", 0x0B, 0x09, RM(0x81,1), RM(0x83,1) }, - { "XOR", 0x33, 0x31, RM(0x81,6), RM(0x83,6) }, - { "CMP", 0x3B, 0x39, RM(0x81,7), RM(0x83,7) }, - { "TEST", 0x85, 0x85/*SWP?*/, RM(0xF7,0) }, - { "NOP", 0x90 }, - { "SHL", RM(0xD3,4), 0, 0, RM(0xC1,4) }, - { "SHR", RM(0xD3,5), 0, 0, RM(0xC1,5) }, - { "SAR", RM(0xD3,7), 0, 0, RM(0xC1,7) }, - { "INC", IS_64 ? RM(0xFF,0) : 0x40, RM(0xFF,0) }, - { "DEC", IS_64 ? RM(0xFF,1) : 0x48, RM(0xFF,1) }, - { "JMP", RM(0xFF,4) }, - // FPU - { "FSTP", 0, RM(0xDD,3) }, - { "FSTP32", 0, RM(0xD9,3) }, - { "FLD", 0, RM(0xDD,0) }, - { "FLD32", 0, RM(0xD9,0) }, - { "FLDCW", 0, RM(0xD9, 5) }, - // SSE - { "MOVSD", 0xF20F10, 0xF20F11 }, - { "MOVSS", 0xF30F10, 0xF30F11 }, - { "COMISD", 0x660F2F }, - { "COMISS", LONG_OP(0x0F2F) }, - { "ADDSD", 0xF20F58 }, - { "SUBSD", 0xF20F5C }, - { "MULSD", 0xF20F59 }, - { "DIVSD", 0xF20F5E }, - { "ADDSS", 0xF30F58 }, - { "SUBSS", 0xF30F5C }, - { "MULSS", 0xF30F59 }, - { "DIVSS", 0xF30F5E }, - { "XORPD", 0x660F57 }, - { "CVTSI2SD", 0xF20F2A }, - { "CVTSI2SS", 0xF30F2A }, - { "CVTSD2SI", 0xF20F2D }, - { "CVTSD2SS", 0xF20F5A }, - { "CVTSS2SD", 0xF30F5A }, - { "CVTSS2SI", 0xF30F2D }, - { "STMXCSR", 0, LONG_RM(0x0FAE,3) }, - { "LDMXCSR", 0, LONG_RM(0x0FAE,2) }, - // 8 bits, - { "MOV8", 0x8A, 0x88, 0, 0xB0, RM(0xC6,0) }, - { "CMP8", 0x3A, 0x38, 0, RM(0x80,7) }, - { "TEST8", 0x84, 0x84, RM(0xF6,0) }, - { "PUSH8", 0, 0, 0x6A | FLAG_8B }, - { "MOV16", OP16(0x8B), OP16(0x89), OP16(0xB8) }, - { "CMP16", OP16(0x3B), OP16(0x39) }, - { "TEST16", OP16(0x85) }, - // prefetchs - { "PREFETCHT0", 0, LONG_RM(0x0F18,1) }, - { "PREFETCHT1", 0, LONG_RM(0x0F18,2) }, - { "PREFETCHT2", 0, LONG_RM(0x0F18,3) }, - { "PREFETCHNTA", 0, LONG_RM(0x0F18,0) }, - { "PREFETCHW", 0, LONG_RM(0x0F0D,1) }, -}; - -#ifdef HL_64 -# define REX() if( r64 ) B(r64 | 0x40) -#else -# define REX() -#endif - -#define OP(b) \ - if( (b) & 0xFF0000 ) { \ - B((b)>>16); \ - if( r64 ) B(r64 | 0x40); /* also in 32 bits mode */ \ - B((b)>>8); \ - B(b); \ - } else { \ - if( (b) & FLAG_16B ) { \ - B(0x66); \ - REX(); \ - } else {\ - REX(); \ - if( (b) & FLAG_LONGOP ) B((b)>>8); \ - }\ - B(b); \ +void hl_jit_error( const char *msg, const char *func, int line ) { + printf("*** JIT ERROR %s:%d (%s)****\n", func, line, msg); + if( current_ctx ) { + jit_ctx *ctx = current_ctx; + current_ctx = NULL; + hl_emit_dump(ctx); } - -static bool is_reg8( preg *a ) { - return a->kind == RSTACK || a->kind == RMEM || a->kind == RCONST || (a->kind == RCPU && a->id != Esi && a->id != Edi); + fflush(stdout); } -static void op( jit_ctx *ctx, CpuOp o, preg *a, preg *b, bool mode64 ) { - opform *f = &OP_FORMS[o]; - int r64 = mode64 && (o != PUSH && o != POP && o != CALL && o != PUSH8 && o < PREFETCHT0) ? 8 : 0; - switch( o ) { - case CMP8: - case TEST8: - case MOV8: - if( !is_reg8(a) || !is_reg8(b) ) - ASSERT(0); - break; - default: - break; - } - switch( ID2(a->kind,b->kind) ) { - case ID2(RUNUSED,RUNUSED): - ERRIF(f->r_mem == 0); - OP(f->r_mem); - break; - case ID2(RCPU,RCPU): - case ID2(RFPU,RFPU): - ERRIF( f->r_mem == 0 ); - if( a->id > 7 ) r64 |= 4; - if( b->id > 7 ) r64 |= 1; - OP(f->r_mem); - MOD_RM(3,a->id,b->id); - break; - case ID2(RCPU,RFPU): - case ID2(RFPU,RCPU): - ERRIF( (f->r_mem>>16) == 0 ); - if( a->id > 7 ) r64 |= 4; - if( b->id > 7 ) r64 |= 1; - OP(f->r_mem); - MOD_RM(3,a->id,b->id); - break; - case ID2(RCPU,RUNUSED): - ERRIF( f->r_mem == 0 ); - if( a->id > 7 ) r64 |= 1; - if( GET_RM(f->r_mem) > 0 ) { - OP(f->r_mem); - MOD_RM(3, GET_RM(f->r_mem)-1, a->id); - } else - OP(f->r_mem + (a->id&7)); - break; - case ID2(RSTACK,RUNUSED): - ERRIF( f->mem_r == 0 || GET_RM(f->mem_r) == 0 ); - { - int stackPos = R(a->id)->stackPos; - OP(f->mem_r); - if( IS_SBYTE(stackPos) ) { - MOD_RM(1,GET_RM(f->mem_r)-1,Ebp); - B(stackPos); - } else { - MOD_RM(2,GET_RM(f->mem_r)-1,Ebp); - W(stackPos); - } - } - break; - case ID2(RCPU,RCONST): - ERRIF( f->r_const == 0 && f->r_i8 == 0 ); - if( a->id > 7 ) r64 |= 1; - { - int_val cval = b->holds ? (int_val)b->holds : b->id; - // short byte form - if( f->r_i8 && IS_SBYTE(cval) ) { - if( (f->r_i8&FLAG_DUAL) && a->id > 7 ) r64 |= 4; - OP(f->r_i8); - if( (f->r_i8&FLAG_DUAL) ) MOD_RM(3,a->id,a->id); else MOD_RM(3,GET_RM(f->r_i8)-1,a->id); - B((int)cval); - } else if( GET_RM(f->r_const) > 0 || (f->r_const&FLAG_DUAL) ) { - if( (f->r_i8&FLAG_DUAL) && a->id > 7 ) r64 |= 4; - OP(f->r_const&0xFF); - if( (f->r_i8&FLAG_DUAL) ) MOD_RM(3,a->id,a->id); else MOD_RM(3,GET_RM(f->r_const)-1,a->id); - if( mode64 && IS_64 && o == MOV ) W64(cval); else W((int)cval); - } else { - ERRIF( f->r_const == 0); - OP((f->r_const&0xFF) + (a->id&7)); - if( mode64 && IS_64 && o == MOV ) W64(cval); else W((int)cval); - } - } - break; - case ID2(RSTACK,RCPU): - case ID2(RSTACK,RFPU): - ERRIF( f->mem_r == 0 ); - if( b->id > 7 ) r64 |= 4; - { - int stackPos = R(a->id)->stackPos; - OP(f->mem_r); - if( IS_SBYTE(stackPos) ) { - MOD_RM(1,b->id,Ebp); - B(stackPos); - } else { - MOD_RM(2,b->id,Ebp); - W(stackPos); - } - } - break; - case ID2(RCPU,RSTACK): - case ID2(RFPU,RSTACK): - ERRIF( f->r_mem == 0 ); - if( a->id > 7 ) r64 |= 4; - { - int stackPos = R(b->id)->stackPos; - OP(f->r_mem); - if( IS_SBYTE(stackPos) ) { - MOD_RM(1,a->id,Ebp); - B(stackPos); - } else { - MOD_RM(2,a->id,Ebp); - W(stackPos); - } - } - break; - case ID2(RCONST,RUNUSED): - ERRIF( f->r_const == 0 ); - { - int_val cval = a->holds ? (int_val)a->holds : a->id; - OP(f->r_const); - if( f->r_const & FLAG_8B ) B((int)cval); else W((int)cval); - } - break; - case ID2(RMEM,RUNUSED): - ERRIF( f->mem_r == 0 ); - { - int mult = a->id & 0xF; - int regOrOffs = mult == 15 ? a->id >> 4 : a->id >> 8; - CpuReg reg = (a->id >> 4) & 0xF; - if( mult == 15 ) { - ERRIF(1); - } else if( mult == 0 ) { - if( reg > 7 ) r64 |= 1; - OP(f->mem_r); - if( regOrOffs == 0 && (reg&7) != Ebp ) { - MOD_RM(0,GET_RM(f->mem_r)-1,reg); - if( (reg&7) == Esp ) B(0x24); - } else if( IS_SBYTE(regOrOffs) ) { - MOD_RM(1,GET_RM(f->mem_r)-1,reg); - if( (reg&7) == Esp ) B(0x24); - B(regOrOffs); - } else { - MOD_RM(2,GET_RM(f->mem_r)-1,reg); - if( (reg&7) == Esp ) B(0x24); - W(regOrOffs); - } - } else { - // [eax + ebx * M] - ERRIF(1); - } - } - break; - case ID2(RCPU, RMEM): - case ID2(RFPU, RMEM): - ERRIF( f->r_mem == 0 ); - { - int mult = b->id & 0xF; - int regOrOffs = mult == 15 ? b->id >> 4 : b->id >> 8; - CpuReg reg = (b->id >> 4) & 0xF; - if( mult == 15 ) { - int pos; - if( a->id > 7 ) r64 |= 4; - OP(f->r_mem); - MOD_RM(0,a->id,5); - if( IS_64 ) { - // offset wrt current code - pos = BUF_POS() + 4; - W(regOrOffs - pos); - } else { - ERRIF(1); - } - } else if( mult == 0 ) { - if( a->id > 7 ) r64 |= 4; - if( reg > 7 ) r64 |= 1; - OP(f->r_mem); - if( regOrOffs == 0 && (reg&7) != Ebp ) { - MOD_RM(0,a->id,reg); - if( (reg&7) == Esp ) B(0x24); - } else if( IS_SBYTE(regOrOffs) ) { - MOD_RM(1,a->id,reg); - if( (reg&7) == Esp ) B(0x24); - B(regOrOffs); - } else { - MOD_RM(2,a->id,reg); - if( (reg&7) == Esp ) B(0x24); - W(regOrOffs); - } - } else { - int offset = (int)(int_val)b->holds; - if( a->id > 7 ) r64 |= 4; - if( reg > 7 ) r64 |= 1; - if( regOrOffs > 7 ) r64 |= 2; - OP(f->r_mem); - MOD_RM(offset == 0 ? 0 : IS_SBYTE(offset) ? 1 : 2,a->id,4); - SIB(mult,regOrOffs,reg); - if( offset ) { - if( IS_SBYTE(offset) ) B(offset); else W(offset); - } - } - } - break; -# ifndef HL_64 - case ID2(RFPU,RADDR): -# endif - case ID2(RCPU,RADDR): - ERRIF( f->r_mem == 0 ); - if( a->id > 7 ) r64 |= 4; - OP(f->r_mem); - MOD_RM(0,a->id,5); - if( IS_64 ) - W64((int_val)b->holds); - else - W((int)(int_val)b->holds); - break; -# ifndef HL_64 - case ID2(RADDR,RFPU): -# endif - case ID2(RADDR,RCPU): - ERRIF( f->mem_r == 0 ); - if( b->id > 7 ) r64 |= 4; - OP(f->mem_r); - MOD_RM(0,b->id,5); - if( IS_64 ) - W64((int_val)a->holds); - else - W((int)(int_val)a->holds); - break; - case ID2(RMEM, RCPU): - case ID2(RMEM, RFPU): - ERRIF( f->mem_r == 0 ); - { - int mult = a->id & 0xF; - int regOrOffs = mult == 15 ? a->id >> 4 : a->id >> 8; - CpuReg reg = (a->id >> 4) & 0xF; - if( mult == 15 ) { - int pos; - if( b->id > 7 ) r64 |= 4; - OP(f->mem_r); - MOD_RM(0,b->id,5); - if( IS_64 ) { - // offset wrt current code - pos = BUF_POS() + 4; - W(regOrOffs - pos); - } else { - ERRIF(1); - } - } else if( mult == 0 ) { - if( b->id > 7 ) r64 |= 4; - if( reg > 7 ) r64 |= 1; - OP(f->mem_r); - if( regOrOffs == 0 && (reg&7) != Ebp ) { - MOD_RM(0,b->id,reg); - if( (reg&7) == Esp ) B(0x24); - } else if( IS_SBYTE(regOrOffs) ) { - MOD_RM(1,b->id,reg); - if( (reg&7) == Esp ) B(0x24); - B(regOrOffs); - } else { - MOD_RM(2,b->id,reg); - if( (reg&7) == Esp ) B(0x24); - W(regOrOffs); - } - } else { - int offset = (int)(int_val)a->holds; - if( b->id > 7 ) r64 |= 4; - if( reg > 7 ) r64 |= 1; - if( regOrOffs > 7 ) r64 |= 2; - OP(f->mem_r); - MOD_RM(offset == 0 ? 0 : IS_SBYTE(offset) ? 1 : 2,b->id,4); - SIB(mult,regOrOffs,reg); - if( offset ) { - if( IS_SBYTE(offset) ) B(offset); else W(offset); - } - } - } - break; - default: - ERRIF(1); - } - if( ctx->debug && ctx->f && o == CALL ) { - preg p; - op(ctx,MOV,pmem(&p,Esp,-HL_WSIZE),PEBP,true); // erase EIP (clean stack report) - } -} +void hl_jit_null_field_access() { jit_assert(); } +void hl_jit_null_access() { jit_assert(); } +void hl_jit_assert() { jit_assert(); } -static void op32( jit_ctx *ctx, CpuOp o, preg *a, preg *b ) { - op(ctx,o,a,b,false); +void int_alloc_reset( int_alloc *a ) { + a->cur = 0; } -static void op64( jit_ctx *ctx, CpuOp o, preg *a, preg *b ) { -#ifndef HL_64 - op(ctx,o,a,b,false); -#else - op(ctx,o,a,b,true); -#endif +void int_alloc_free( int_alloc *a ) { + free(a->data); + a->cur = 0; + a->max = 0; + a->data = NULL; } -static void patch_jump( jit_ctx *ctx, int p ) { - if( p == 0 ) return; - if( p & 0x40000000 ) { - int d; - p &= 0x3FFFFFFF; - d = BUF_POS() - (p + 1); - if( d < -128 || d >= 128 ) ASSERT(d); - *(char*)(ctx->startBuf + p) = (char)d; - } else { - *(int*)(ctx->startBuf + p) = BUF_POS() - (p + 4); +int *int_alloc_get( int_alloc *a, int count ) { + while( a->cur + count > a->max ) { + int next_size = a->max ? a->max << 1 : 128; + int *new_data = (int*)malloc(sizeof(int) * next_size); + if( new_data == NULL ) jit_error("Out of memory"); + memcpy(new_data, a->data, sizeof(int) * a->cur); + free(a->data); + a->data = new_data; + a->max = next_size; } + int *ptr = a->data + a->cur; + a->cur += count; + return ptr; } -static void patch_jump_to( jit_ctx *ctx, int p, int target ) { - if( p == 0 ) return; - if( p & 0x40000000 ) { - int d; - p &= 0x3FFFFFFF; - d = target - (p + 1); - if( d < -128 || d >= 128 ) ASSERT(d); - *(char*)(ctx->startBuf + p) = (char)d; - } else { - *(int*)(ctx->startBuf + p) = target - (p + 4); - } -} - -static int stack_size( hl_type *t ) { - switch( t->kind ) { - case HUI8: - case HUI16: - case HBOOL: -# ifdef HL_64 - case HI32: - case HF32: -# endif - return sizeof(int_val); - case HI64: - default: - return hl_type_size(t); - } -} - -static int call_reg_index( int reg ) { -# ifdef HL_64 - int i; - for(i=0;ikind == RFPU ) - return p->id < CALL_NREGS; - for(i=0;ikind == RCPU && p->id == CALL_REGS[i] ) - return true; - return false; -# else - return false; -# endif -} - -static preg *alloc_reg( jit_ctx *ctx, preg_kind k ) { - int i; - preg *p; - switch( k ) { - case RCPU: - case RCPU_CALL: - case RCPU_8BITS: - { - int off = ctx->allocOffset++; - const int count = RCPU_SCRATCH_COUNT; - for(i=0;ipregs + r; - if( p->lock >= ctx->currentPos ) continue; - if( k == RCPU_CALL && is_call_reg(p) ) continue; - if( k == RCPU_8BITS && !is_reg8(p) ) continue; - if( p->holds == NULL ) { - RLOCK(p); - return p; - } - } - for(i=0;ipregs + RCPU_SCRATCH_REGS[(i + off)%count]; - if( p->lock >= ctx->currentPos ) continue; - if( k == RCPU_CALL && is_call_reg(p) ) continue; - if( k == RCPU_8BITS && !is_reg8(p) ) continue; - if( p->holds ) { - RLOCK(p); - p->holds->current = NULL; - p->holds = NULL; - return p; - } - } - } - break; - case RFPU: - { - int off = ctx->allocOffset++; - const int count = RFPU_SCRATCH_COUNT; - for(i=0;ilock >= ctx->currentPos ) continue; - if( p->holds == NULL ) { - RLOCK(p); - return p; - } - } - for(i=0;ilock >= ctx->currentPos ) continue; - if( p->holds ) { - RLOCK(p); - p->holds->current = NULL; - p->holds = NULL; - return p; - } - } - } - break; - default: - ASSERT(k); - } - ASSERT(0); // out of registers ! - return NULL; -} - -static preg *fetch( vreg *r ) { - if( r->current ) - return r->current; - return &r->stack; +void int_alloc_store( int_alloc *a, int v ) { + *int_alloc_get(a,1) = v; } -static void scratch( preg *r ) { - if( r && r->holds ) { - r->holds->current = NULL; - r->holds = NULL; - r->lock = 0; - } -} - -static preg *copy( jit_ctx *ctx, preg *to, preg *from, int size ); - -static void load( jit_ctx *ctx, preg *r, vreg *v ) { - preg *from = fetch(v); - if( from == r || v->size == 0 ) return; - if( r->holds ) r->holds->current = NULL; - if( v->current ) { - v->current->holds = NULL; - from = r; - } - r->holds = v; - v->current = r; - copy(ctx,r,from,v->size); -} - -static preg *alloc_fpu( jit_ctx *ctx, vreg *r, bool andLoad ) { - preg *p = fetch(r); - if( p->kind != RFPU ) { - if( !IS_FLOAT(r) && (IS_64 || r->t->kind != HI64) ) ASSERT(r->t->kind); - p = alloc_reg(ctx, RFPU); - if( andLoad ) - load(ctx,p,r); - else { - if( r->current ) - r->current->holds = NULL; - r->current = p; - p->holds = r; - } - } else - RLOCK(p); - return p; -} - -static void reg_bind( vreg *r, preg *p ) { - if( r->current ) - r->current->holds = NULL; - r->current = p; - p->holds = r; -} - -static preg *alloc_cpu( jit_ctx *ctx, vreg *r, bool andLoad ) { - preg *p = fetch(r); - if( p->kind != RCPU ) { -# ifndef HL_64 - if( r->t->kind == HI64 ) return alloc_fpu(ctx,r,andLoad); - if( r->size > 4 ) ASSERT(r->size); -# endif - p = alloc_reg(ctx, RCPU); - if( andLoad ) - load(ctx,p,r); - else - reg_bind(r,p); - } else - RLOCK(p); - return p; -} - -// allocate a register that is not a call parameter -static preg *alloc_cpu_call( jit_ctx *ctx, vreg *r ) { - preg *p = fetch(r); - if( p->kind != RCPU ) { -# ifndef HL_64 - if( r->t->kind == HI64 ) return alloc_fpu(ctx,r,true); - if( r->size > 4 ) ASSERT(r->size); -# endif - p = alloc_reg(ctx, RCPU_CALL); - load(ctx,p,r); - } else if( is_call_reg(p) ) { - preg *p2 = alloc_reg(ctx, RCPU_CALL); - op64(ctx,MOV,p2,p); - scratch(p); - reg_bind(r,p2); - return p2; - } else - RLOCK(p); - return p; -} - -static preg *fetch32( jit_ctx *ctx, vreg *r ) { - if( r->current ) - return r->current; - // make sure that the register is correctly erased - if( r->size < 4 ) { - preg *p = alloc_cpu(ctx, r, true); - RUNLOCK(p); - return p; - } - return fetch(r); -} - -// make sure higher bits are zeroes -static preg *alloc_cpu64( jit_ctx *ctx, vreg *r, bool andLoad ) { -# ifndef HL_64 - return alloc_cpu(ctx,r,andLoad); -# else - preg *p = fetch(r); - if( !andLoad ) ASSERT(0); - if( p->kind != RCPU ) { - p = alloc_reg(ctx, RCPU); - op64(ctx,XOR,p,p); - load(ctx,p,r); - } else { - // remove higher bits - preg tmp; - op64(ctx,SHL,p,pconst(&tmp,32)); - op64(ctx,SHR,p,pconst(&tmp,32)); - RLOCK(p); - } - return p; -# endif -} - -// make sure the register can be used with 8 bits access -static preg *alloc_cpu8( jit_ctx *ctx, vreg *r, bool andLoad ) { - preg *p = fetch(r); - if( p->kind != RCPU ) { - p = alloc_reg(ctx, RCPU_8BITS); - load(ctx,p,r); - } else if( !is_reg8(p) ) { - preg *p2 = alloc_reg(ctx, RCPU_8BITS); - op64(ctx,MOV,p2,p); - scratch(p); - reg_bind(r,p2); - return p2; - } else - RLOCK(p); - return p; -} - -static preg *copy( jit_ctx *ctx, preg *to, preg *from, int size ) { - if( size == 0 || to == from ) return to; - switch( ID2(to->kind,from->kind) ) { - case ID2(RMEM,RCPU): - case ID2(RSTACK,RCPU): - case ID2(RCPU,RSTACK): - case ID2(RCPU,RMEM): - case ID2(RCPU,RCPU): -# ifndef HL_64 - case ID2(RCPU,RADDR): - case ID2(RADDR,RCPU): -# endif - switch( size ) { - case 1: - if( to->kind == RCPU ) { - op64(ctx,XOR,to,to); - if( !is_reg8(to) ) { - preg p; - op32(ctx,MOV16,to,from); - op32(ctx,SHL,to,pconst(&p,24)); - op32(ctx,SHR,to,pconst(&p,24)); - break; - } - } - if( !is_reg8(from) ) { - preg *r = alloc_reg(ctx, RCPU_CALL); - op32(ctx, MOV, r, from); - RUNLOCK(r); - op32(ctx,MOV8,to,r); - return from; - } - op32(ctx,MOV8,to,from); - break; - case 2: - if( to->kind == RCPU ) - op64(ctx,XOR,to,to); - op32(ctx,MOV16,to,from); - break; - case 4: - op32(ctx,MOV,to,from); - break; - case 8: - if( IS_64 ) { - op64(ctx,MOV,to,from); - break; - } - default: - ASSERT(size); - } - return to->kind == RCPU ? to : from; - case ID2(RFPU,RFPU): - case ID2(RMEM,RFPU): - case ID2(RSTACK,RFPU): - case ID2(RFPU,RMEM): - case ID2(RFPU,RSTACK): - switch( size ) { - case 8: - op64(ctx,MOVSD,to,from); - break; - case 4: - op32(ctx,MOVSS,to,from); - break; - default: - ASSERT(size); - } - return to->kind == RFPU ? to : from; - case ID2(RMEM,RSTACK): - { - vreg *rfrom = R(from->id); - if( IS_FLOAT(rfrom) ) - return copy(ctx,to,alloc_fpu(ctx,rfrom,true),size); - return copy(ctx,to,alloc_cpu(ctx,rfrom,true),size); - } - case ID2(RMEM,RMEM): - case ID2(RSTACK,RMEM): - case ID2(RSTACK,RSTACK): -# ifndef HL_64 - case ID2(RMEM,RADDR): - case ID2(RSTACK,RADDR): - case ID2(RADDR,RSTACK): -# endif - { - preg *tmp; - if( (!IS_64 && size == 8) || (to->kind == RSTACK && IS_FLOAT(R(to->id))) || (from->kind == RSTACK && IS_FLOAT(R(from->id))) ) { - tmp = alloc_reg(ctx, RFPU); - op64(ctx,size == 8 ? MOVSD : MOVSS,tmp,from); - } else { - tmp = alloc_reg(ctx, RCPU); - copy(ctx,tmp,from,size); - } - return copy(ctx,to,tmp,size); - } -# ifdef HL_64 - case ID2(RCPU,RADDR): - case ID2(RMEM,RADDR): - case ID2(RSTACK,RADDR): - { - preg p; - preg *tmp = alloc_reg(ctx, RCPU); - op64(ctx,MOV,tmp,pconst64(&p,(int_val)from->holds)); - return copy(ctx,to,pmem(&p,tmp->id,0),size); - } - case ID2(RADDR,RCPU): - case ID2(RADDR,RMEM): - case ID2(RADDR,RSTACK): - { - preg p; - preg *tmp = alloc_reg(ctx, RCPU); - op64(ctx,MOV,tmp,pconst64(&p,(int_val)to->holds)); - return copy(ctx,pmem(&p,tmp->id,0),from,size); - } -# endif - default: - break; - } - printf("copy(%s,%s)\n",KNAMES[to->kind], KNAMES[from->kind]); - ASSERT(0); - return NULL; -} - -static void store( jit_ctx *ctx, vreg *r, preg *v, bool bind ) { - if( r->current && r->current != v ) { - r->current->holds = NULL; - r->current = NULL; - } - v = copy(ctx,&r->stack,v,r->size); - if( IS_FLOAT(r) != (v->kind == RFPU) ) - ASSERT(0); - if( bind && r->current != v && (v->kind == RCPU || v->kind == RFPU) ) { - scratch(v); - r->current = v; - v->holds = r; - } -} - -static void store_result( jit_ctx *ctx, vreg *r ) { -# ifndef HL_64 - switch( r->t->kind ) { - case HF64: - scratch(r->current); - op64(ctx,FSTP,&r->stack,UNUSED); - break; - case HF32: - scratch(r->current); - op64(ctx,FSTP32,&r->stack,UNUSED); - break; - case HI64: - scratch(r->current); - error_i64(); - break; - default: -# endif - store(ctx,r,IS_FLOAT(r) ? REG_AT(XMM(0)) : PEAX,true); -# ifndef HL_64 - break; - } -# endif -} - -static void op_mov( jit_ctx *ctx, vreg *to, vreg *from ) { - preg *r = fetch(from); -# ifndef HL_64 - if( to->t->kind == HI64 ) { - error_i64(); - return; - } -# endif - if( from->t->kind == HF32 && r->kind != RFPU ) - r = alloc_fpu(ctx,from,true); - store(ctx, to, r, true); -} - -static void copy_to( jit_ctx *ctx, vreg *to, preg *from ) { - store(ctx,to,from,true); -} - -static void copy_from( jit_ctx *ctx, preg *to, vreg *from ) { - copy(ctx,to,fetch(from),from->size); -} - -static void store_const( jit_ctx *ctx, vreg *r, int c ) { - preg p; - if( c == 0 ) - op(ctx,XOR,alloc_cpu(ctx,r,false),alloc_cpu(ctx,r,false),r->size == 8); - else if( r->size == 8 ) - op64(ctx,MOV,alloc_cpu(ctx,r,false),pconst64(&p,c)); - else - op32(ctx,MOV,alloc_cpu(ctx,r,false),pconst(&p,c)); - store(ctx,r,r->current,false); -} - -static void discard_regs( jit_ctx *ctx, bool native_call ) { - int i; - for(i=0;ipregs + RCPU_SCRATCH_REGS[i]; - if( r->holds ) { - r->holds->current = NULL; - r->holds = NULL; - } - } - for(i=0;ipregs + XMM(i); - if( r->holds ) { - r->holds->current = NULL; - r->holds = NULL; - } - } -} - -static int pad_before_call( jit_ctx *ctx, int size ) { - int total = size + ctx->totalRegsSize + HL_WSIZE * 2; // EIP+EBP - if( total & 15 ) { - int pad = 16 - (total & 15); - preg p; - if( pad ) op64(ctx,SUB,PESP,pconst(&p,pad)); - size += pad; - } - return size; -} - -static void push_reg( jit_ctx *ctx, vreg *r ) { - preg p; - switch( stack_size(r->t) ) { - case 1: - op64(ctx,SUB,PESP,pconst(&p,1)); - op32(ctx,MOV8,pmem(&p,Esp,0),alloc_cpu8(ctx,r,true)); - break; - case 2: - op64(ctx,SUB,PESP,pconst(&p,2)); - op32(ctx,MOV16,pmem(&p,Esp,0),alloc_cpu(ctx,r,true)); - break; - case 4: - if( r->size < 4 ) - alloc_cpu(ctx,r,true); // force fetch (higher bits set to 0) - if( !IS_64 ) { - if( r->current != NULL && r->current->kind == RFPU ) scratch(r->current); - op32(ctx,PUSH,fetch(r),UNUSED); - } else { - // pseudo push32 (not available) - op64(ctx,SUB,PESP,pconst(&p,4)); - op32(ctx,MOV,pmem(&p,Esp,0),alloc_cpu(ctx,r,true)); - } - break; - case 8: - if( fetch(r)->kind == RFPU ) { - op64(ctx,SUB,PESP,pconst(&p,8)); - op64(ctx,MOVSD,pmem(&p,Esp,0),fetch(r)); - } else if( IS_64 ) - op64(ctx,PUSH,fetch(r),UNUSED); - else if( r->stack.kind == RSTACK ) { - scratch(r->current); - r->stackPos += 4; - op32(ctx,PUSH,&r->stack,UNUSED); - r->stackPos -= 4; - op32(ctx,PUSH,&r->stack,UNUSED); - } else - ASSERT(0); - break; - default: - ASSERT(r->size); - } -} - -static int begin_native_call( jit_ctx *ctx, int nargs ) { - ctx->nativeArgsCount = nargs; - return pad_before_call(ctx, nargs > CALL_NREGS ? (nargs - CALL_NREGS) * HL_WSIZE : 0); -} - -static preg *alloc_native_arg( jit_ctx *ctx ) { -# ifdef HL_64 - int rid = ctx->nativeArgsCount - 1; - preg *r = rid < CALL_NREGS ? REG_AT(CALL_REGS[rid]) : alloc_reg(ctx,RCPU_CALL); - scratch(r); - return r; -# else - return alloc_reg(ctx, RCPU); -# endif -} - -static void set_native_arg( jit_ctx *ctx, preg *r ) { - if( r->kind == RSTACK ) { - vreg *v = ctx->vregs + r->id; - if( v->size < 4 ) - r = fetch32(ctx, v); - } -# ifdef HL_64 - if( r->kind == RFPU ) ASSERT(0); - int rid = --ctx->nativeArgsCount; - preg *target; - if( rid >= CALL_NREGS ) { - op64(ctx,PUSH,r,UNUSED); - return; - } - target = REG_AT(CALL_REGS[rid]); - if( target != r ) { - op64(ctx, MOV, target, r); - scratch(target); - } -# else - op32(ctx,PUSH,r,UNUSED); -# endif -} - -static void set_native_arg_fpu( jit_ctx *ctx, preg *r, bool isf32 ) { -# ifdef HL_64 - if( r->kind == RCPU ) ASSERT(0); - // can only be used if last argument !! - ctx->nativeArgsCount--; - preg *target = REG_AT(XMM(IS_WINCALL64 ? ctx->nativeArgsCount : 0)); - if( target != r ) { - op64(ctx, isf32 ? MOVSS : MOVSD, target, r); - scratch(target); - } -# else - op32(ctx,PUSH,r,UNUSED); -# endif -} - -typedef struct { - int nextCpu; - int nextFpu; - int mapped[REG_COUNT]; -} call_regs; - -static int select_call_reg( call_regs *regs, hl_type *t, int id ) { -# ifndef HL_64 - return -1; -#else - bool isFloat = t->kind == HF32 || t->kind == HF64; -# ifdef HL_WIN_CALL - int index = regs->nextCpu++; -# else - int index = isFloat ? regs->nextFpu++ : regs->nextCpu++; -# endif - if( index >= CALL_NREGS ) - return -1; - int reg = isFloat ? XMM(index) : CALL_REGS[index]; - regs->mapped[reg] = id + 1; - return reg; -#endif -} - -static int mapped_reg( call_regs *regs, int id ) { -# ifndef HL_64 - return -1; -#else - int i; - for(i=0;imapped[r] == id + 1 ) return r; - r = XMM(i); - if( regs->mapped[r] == id + 1 ) return r; - } - return -1; -#endif -} - -static int prepare_call_args( jit_ctx *ctx, int count, int *args, vreg *vregs, int extraSize ) { - int i; - int size = extraSize, paddedSize; - call_regs ctmp = {0}; - for(i=0;it, i); - if( cr >= 0 ) { - preg *c = REG_AT(cr); - preg *cur = fetch(r); - if( cur != c ) { - copy(ctx,c,cur,r->size); - scratch(c); - } - RLOCK(c); - continue; - } - size += stack_size(r->t); - } - paddedSize = pad_before_call(ctx,size); - for(i=0;i= 0 ) continue; - push_reg(ctx,r); - if( r->current ) RUNLOCK(r->current); - } - return paddedSize; -} - -static void op_call( jit_ctx *ctx, preg *r, int size ) { - preg p; -# ifdef JIT_DEBUG - if( IS_64 && size >= 0 ) { - int jchk; - op32(ctx,TEST,PESP,pconst(&p,15)); - XJump(JZero,jchk); - BREAK(); // unaligned ESP - patch_jump(ctx, jchk); - } -# endif - if( IS_WINCALL64 ) { - // MSVC requires 32bytes of free space here - op64(ctx,SUB,PESP,pconst(&p,32)); - if( size >= 0 ) size += 32; - } - op32(ctx, CALL, r, UNUSED); - if( size > 0 ) op64(ctx,ADD,PESP,pconst(&p,size)); -} - -static void call_native( jit_ctx *ctx, void *nativeFun, int size ) { - bool isExc = nativeFun == hl_assert || nativeFun == hl_throw || nativeFun == on_jit_error; - preg p; - // native function, already resolved - op64(ctx,MOV,PEAX,pconst64(&p,(int_val)nativeFun)); - op_call(ctx,PEAX, isExc ? -1 : size); - if( isExc ) - return; - discard_regs(ctx, true); -} - -static void op_call_fun( jit_ctx *ctx, vreg *dst, int findex, int count, int *args ) { - int fid = findex < 0 ? -1 : ctx->m->functions_indexes[findex]; - bool isNative = fid >= ctx->m->code->nfunctions; - int size = prepare_call_args(ctx,count,args,ctx->vregs,0); - preg p; - if( fid < 0 ) { - ASSERT(fid); - } else if( isNative ) { - call_native(ctx,ctx->m->functions_ptrs[findex],size); - } else { - int cpos = BUF_POS() + (IS_WINCALL64 ? 4 : 0); -# ifdef JIT_DEBUG - if( IS_64 ) cpos += 13; // ESP CHECK -# endif - if( ctx->m->functions_ptrs[findex] ) { - // already compiled - op_call(ctx,pconst(&p,(int)(int_val)ctx->m->functions_ptrs[findex] - (cpos + 5)), size); - } else if( ctx->m->code->functions + fid == ctx->f ) { - // our current function - op_call(ctx,pconst(&p, ctx->functionPos - (cpos + 5)), size); - } else { - // stage for later - jlist *j = (jlist*)hl_malloc(&ctx->galloc,sizeof(jlist)); - j->pos = cpos; - j->target = findex; - j->next = ctx->calls; - ctx->calls = j; - op_call(ctx,pconst(&p,0), size); - } - discard_regs(ctx, false); - } - if( dst ) - store_result(ctx,dst); -} - -static void op_enter( jit_ctx *ctx ) { - preg p; - op64(ctx, PUSH, PEBP, UNUSED); - op64(ctx, MOV, PEBP, PESP); - if( ctx->totalRegsSize ) op64(ctx, SUB, PESP, pconst(&p,ctx->totalRegsSize)); -} - -static void op_ret( jit_ctx *ctx, vreg *r ) { - preg p; - switch( r->t->kind ) { - case HF32: -# ifdef HL_64 - op64(ctx, MOVSS, PXMM(0), fetch(r)); -# else - op64(ctx,FLD32,&r->stack,UNUSED); -# endif - break; - case HF64: -# ifdef HL_64 - op64(ctx, MOVSD, PXMM(0), fetch(r)); -# else - op64(ctx,FLD,&r->stack,UNUSED); -# endif - break; - default: - if( r->size < 4 && !r->current ) - fetch32(ctx, r); - if( r->current != PEAX ) - op64(ctx,MOV,PEAX,fetch(r)); - break; - } - if( ctx->totalRegsSize ) op64(ctx, ADD, PESP, pconst(&p, ctx->totalRegsSize)); -# ifdef JIT_DEBUG - { - int jeq; - op64(ctx, CMP, PESP, PEBP); - XJump_small(JEq,jeq); - jit_error("invalid ESP"); - patch_jump(ctx,jeq); - } -# endif - op64(ctx, POP, PEBP, UNUSED); - op64(ctx, RET, UNUSED, UNUSED); -} - -static void call_native_consts( jit_ctx *ctx, void *nativeFun, int_val *args, int nargs ) { - int size = pad_before_call(ctx, IS_64 ? 0 : HL_WSIZE*nargs); - preg p; - int i; -# ifdef HL_64 - for(i=0;i=0;i--) - op32(ctx, PUSH, pconst64(&p, args[i]), UNUSED); -# endif - call_native(ctx, nativeFun, size); -} - -static void on_jit_error( const char *msg, int_val line ) { - char buf[256]; - int iline = (int)line; - sprintf(buf,"%s (line %d)",msg,iline); -#ifdef HL_WIN_DESKTOP - MessageBoxA(NULL,buf,"JIT ERROR",MB_OK); -#else - printf("JIT ERROR : %s\n",buf); -#endif - hl_debug_break(); - hl_throw(NULL); -} - -static void _jit_error( jit_ctx *ctx, const char *msg, int line ) { - int_val args[2] = { (int_val)msg, (int_val)line }; - call_native_consts(ctx,on_jit_error,args,2); -} - - -static preg *op_binop( jit_ctx *ctx, vreg *dst, vreg *a, vreg *b, hl_op bop ) { - preg *pa = fetch(a), *pb = fetch(b), *out = NULL; - CpuOp o; - if( IS_FLOAT(a) ) { - bool isf32 = a->t->kind == HF32; - switch( bop ) { - case OAdd: o = isf32 ? ADDSS : ADDSD; break; - case OSub: o = isf32 ? SUBSS : SUBSD; break; - case OMul: o = isf32 ? MULSS : MULSD; break; - case OSDiv: o = isf32 ? DIVSS : DIVSD; break; - case OJSLt: - case OJSGte: - case OJSLte: - case OJSGt: - case OJEq: - case OJNotEq: - case OJNotLt: - case OJNotGte: - o = isf32 ? COMISS : COMISD; - break; - case OSMod: - { - int args[] = { a->stack.id, b->stack.id }; - int size = prepare_call_args(ctx,2,args,ctx->vregs,0); - void *mod_fun; - if( isf32 ) mod_fun = fmodf; else mod_fun = fmod; - call_native(ctx,mod_fun,size); - store_result(ctx,dst); - return fetch(dst); - } - default: - printf("%s\n", hl_op_name(bop)); - ASSERT(bop); - } - } else { - bool is64 = a->t->kind == HI64; -# ifndef HL_64 - if( is64 ) { - error_i64(); - return fetch(a); - } -# endif - switch( bop ) { - case OAdd: o = ADD; break; - case OSub: o = SUB; break; - case OMul: o = IMUL; break; - case OAnd: o = AND; break; - case OOr: o = OR; break; - case OXor: o = XOR; break; - case OShl: - case OUShr: - case OSShr: - if( !b->current || b->current->kind != RCPU || b->current->id != Ecx ) { - scratch(REG_AT(Ecx)); - op(ctx,MOV,REG_AT(Ecx),pb,is64); - RLOCK(REG_AT(Ecx)); - pa = fetch(a); - } else - RLOCK(b->current); - if( pa->kind != RCPU ) { - pa = alloc_reg(ctx, RCPU); - op(ctx,MOV,pa,fetch(a), is64); - } - op(ctx,bop == OShl ? SHL : (bop == OUShr ? SHR : SAR), pa, UNUSED,is64); - if( dst ) store(ctx, dst, pa, true); - return pa; - case OSDiv: - case OUDiv: - case OSMod: - case OUMod: - { - preg *out = bop == OSMod || bop == OUMod ? REG_AT(Edx) : PEAX; - preg *r = pb; - preg p; - int jz, jz1 = 0, jend; - if( pa->kind == RCPU && pa->id == Eax ) RLOCK(pa); - // ensure b in CPU reg and not in Eax/Edx (for UI8/UI16) - if( pb->kind != RCPU || (pb->id == Eax || pb->id == Edx) ) { - scratch(REG_AT(Ecx)); - scratch(pb); - load(ctx,REG_AT(Ecx),b); - r = REG_AT(Ecx); - } - // integer div 0 => 0 - op(ctx,TEST,r,r,is64); - XJump_small(JZero, jz); - // Prevent MIN/-1 overflow exception - // OSMod: r = (b == 0 || b == -1) ? 0 : a % b - // OSDiv: r = (b == 0 || b == -1) ? a * b : a / b - if( bop == OSMod || bop == OSDiv ) { - op(ctx, CMP, r, pconst(&p,-1), is64); - XJump_small(JEq, jz1); - } - pa = fetch(a); - if( pa->kind != RCPU || pa->id != Eax ) { - scratch(PEAX); - scratch(pa); - load(ctx,PEAX,a); - } - scratch(REG_AT(Edx)); - scratch(REG_AT(Eax)); - if( bop == OUDiv || bop == OUMod ) - op(ctx, XOR, REG_AT(Edx), REG_AT(Edx), is64); - else - op(ctx, CDQ, UNUSED, UNUSED, is64); // sign-extend Eax into Eax:Edx - op(ctx, bop == OUDiv || bop == OUMod ? DIV : IDIV, r, UNUSED, is64); - XJump_small(JAlways, jend); - patch_jump(ctx, jz); - patch_jump(ctx, jz1); - if( bop != OSDiv ) { - op(ctx, XOR, out, out, is64); - } else { - load(ctx, out, a); - op(ctx, IMUL, out, r, is64); - } - patch_jump(ctx, jend); - if( dst ) store(ctx, dst, out, true); - return out; - } - case OJSLt: - case OJSGte: - case OJSLte: - case OJSGt: - case OJULt: - case OJUGte: - case OJEq: - case OJNotEq: - switch( a->t->kind ) { - case HUI8: - case HBOOL: - o = CMP8; - break; - case HUI16: - o = CMP16; - break; - default: - o = CMP; - break; - } - break; - default: - printf("%s\n", hl_op_name(bop)); - ASSERT(bop); - } - } - switch( RTYPE(a) ) { - case HI32: - case HUI8: - case HUI16: - case HBOOL: -# ifndef HL_64 - case HDYNOBJ: - case HVIRTUAL: - case HOBJ: - case HSTRUCT: - case HFUN: - case HMETHOD: - case HBYTES: - case HNULL: - case HENUM: - case HDYN: - case HTYPE: - case HABSTRACT: - case HARRAY: -# endif - switch( ID2(pa->kind, pb->kind) ) { - case ID2(RCPU,RCPU): - case ID2(RCPU,RSTACK): - op32(ctx, o, pa, pb); - scratch(pa); - out = pa; - break; - case ID2(RSTACK,RCPU): - if( dst == a && o != IMUL ) { - op32(ctx, o, pa, pb); - dst = NULL; - out = pa; - } else { - alloc_cpu(ctx,a, true); - return op_binop(ctx,dst,a,b,bop); - } - break; - case ID2(RSTACK,RSTACK): - alloc_cpu(ctx, a, true); - return op_binop(ctx, dst, a, b, bop); - default: - printf("%s(%d,%d)\n", hl_op_name(bop), pa->kind, pb->kind); - ASSERT(ID2(pa->kind, pb->kind)); - } - if( dst ) store(ctx, dst, out, true); - return out; -# ifdef HL_64 - case HOBJ: - case HSTRUCT: - case HDYNOBJ: - case HVIRTUAL: - case HFUN: - case HMETHOD: - case HBYTES: - case HNULL: - case HENUM: - case HDYN: - case HTYPE: - case HABSTRACT: - case HARRAY: - case HI64: - case HGUID: - switch( ID2(pa->kind, pb->kind) ) { - case ID2(RCPU,RCPU): - case ID2(RCPU,RSTACK): - op64(ctx, o, pa, pb); - scratch(pa); - out = pa; - break; - case ID2(RSTACK,RCPU): - if( dst == a && OP_FORMS[o].mem_r ) { - op64(ctx, o, pa, pb); - dst = NULL; - out = pa; - } else { - alloc_cpu(ctx,a, true); - return op_binop(ctx,dst,a,b,bop); - } - break; - case ID2(RSTACK,RSTACK): - alloc_cpu(ctx, a, true); - return op_binop(ctx, dst, a, b, bop); - default: - printf("%s(%d,%d)\n", hl_op_name(bop), pa->kind, pb->kind); - ASSERT(ID2(pa->kind, pb->kind)); - } - if( dst ) store(ctx, dst, out, true); - return out; -# endif - case HF64: - case HF32: - pa = alloc_fpu(ctx, a, true); - pb = alloc_fpu(ctx, b, true); - switch( ID2(pa->kind, pb->kind) ) { - case ID2(RFPU,RFPU): - op64(ctx,o,pa,pb); - if( (o == COMISD || o == COMISS) && bop != OJSGt ) { - int jnotnan; - XJump_small(JNParity,jnotnan); - switch( bop ) { - case OJSLt: - case OJNotLt: - { - preg *r = alloc_reg(ctx,RCPU); - // set CF=0, ZF=1 - op64(ctx,XOR,r,r); - RUNLOCK(r); - break; - } - case OJSGte: - case OJNotGte: - { - preg *r = alloc_reg(ctx,RCPU); - // set ZF=0, CF=1 - op64(ctx,XOR,r,r); - op64(ctx,CMP,r,PESP); - RUNLOCK(r); - break; - } - break; - case OJNotEq: - case OJEq: - // set ZF=0, CF=? - case OJSLte: - // set ZF=0, CF=0 - op64(ctx,TEST,PESP,PESP); - break; - default: - ASSERT(bop); - } - patch_jump(ctx,jnotnan); - } - scratch(pa); - out = pa; - break; - default: - printf("%s(%d,%d)\n", hl_op_name(bop), pa->kind, pb->kind); - ASSERT(ID2(pa->kind, pb->kind)); - } - if( dst ) store(ctx, dst, out, true); - return out; - default: - ASSERT(RTYPE(a)); - } - return NULL; -} - -static int do_jump( jit_ctx *ctx, hl_op op, bool isFloat ) { - int j; - switch( op ) { - case OJAlways: - XJump(JAlways,j); - break; - case OJSGte: - XJump(isFloat ? JUGte : JSGte,j); - break; - case OJSGt: - XJump(isFloat ? JUGt : JSGt,j); - break; - case OJUGte: - XJump(JUGte,j); - break; - case OJSLt: - XJump(isFloat ? JULt : JSLt,j); - break; - case OJSLte: - XJump(isFloat ? JULte : JSLte,j); - break; - case OJULt: - XJump(JULt,j); - break; - case OJEq: - XJump(JEq,j); - break; - case OJNotEq: - XJump(JNeq,j); - break; - case OJNotLt: - XJump(JUGte,j); - break; - case OJNotGte: - XJump(JULt,j); - break; - default: - j = 0; - printf("Unknown JUMP %d\n",op); - break; - } - return j; -} - -static void register_jump( jit_ctx *ctx, int pos, int target ) { - jlist *j = (jlist*)hl_malloc(&ctx->falloc, sizeof(jlist)); - j->pos = pos; - j->target = target; - j->next = ctx->jumps; - ctx->jumps = j; - if( target != 0 && ctx->opsPos[target] == 0 ) - ctx->opsPos[target] = -1; -} - -#define HDYN_VALUE 8 - -static void dyn_value_compare( jit_ctx *ctx, preg *a, preg *b, hl_type *t ) { - preg p; - switch( t->kind ) { - case HUI8: - case HBOOL: - op32(ctx,MOV8,a,pmem(&p,a->id,HDYN_VALUE)); - op32(ctx,MOV8,b,pmem(&p,b->id,HDYN_VALUE)); - op64(ctx,CMP8,a,b); - break; - case HUI16: - op32(ctx,MOV16,a,pmem(&p,a->id,HDYN_VALUE)); - op32(ctx,MOV16,b,pmem(&p,b->id,HDYN_VALUE)); - op64(ctx,CMP16,a,b); - break; - case HI32: - op32(ctx,MOV,a,pmem(&p,a->id,HDYN_VALUE)); - op32(ctx,MOV,b,pmem(&p,b->id,HDYN_VALUE)); - op64(ctx,CMP,a,b); - break; - case HF32: - { - preg *fa = alloc_reg(ctx, RFPU); - preg *fb = alloc_reg(ctx, RFPU); - op64(ctx,MOVSS,fa,pmem(&p,a->id,HDYN_VALUE)); - op64(ctx,MOVSS,fb,pmem(&p,b->id,HDYN_VALUE)); - op64(ctx,COMISD,fa,fb); - } - break; - case HF64: - { - preg *fa = alloc_reg(ctx, RFPU); - preg *fb = alloc_reg(ctx, RFPU); - op64(ctx,MOVSD,fa,pmem(&p,a->id,HDYN_VALUE)); - op64(ctx,MOVSD,fb,pmem(&p,b->id,HDYN_VALUE)); - op64(ctx,COMISD,fa,fb); - } - break; - case HI64: - default: - // ptr comparison - op64(ctx,MOV,a,pmem(&p,a->id,HDYN_VALUE)); - op64(ctx,MOV,b,pmem(&p,b->id,HDYN_VALUE)); - op64(ctx,CMP,a,b); - break; - } -} - -static void op_jump( jit_ctx *ctx, vreg *a, vreg *b, hl_opcode *op, int targetPos ) { - if( a->t->kind == HDYN || b->t->kind == HDYN || a->t->kind == HFUN || b->t->kind == HFUN ) { - int args[] = { a->stack.id, b->stack.id }; - int size = prepare_call_args(ctx,2,args,ctx->vregs,0); - call_native(ctx,hl_dyn_compare,size); - if( op->op == OJSGt || op->op == OJSGte ) { - preg p; - int jinvalid; - op32(ctx,CMP,PEAX,pconst(&p,hl_invalid_comparison)); - XJump_small(JEq,jinvalid); - op32(ctx,TEST,PEAX,PEAX); - register_jump(ctx,do_jump(ctx,op->op, IS_FLOAT(a)),targetPos); - patch_jump(ctx,jinvalid); - return; - } - op32(ctx,TEST,PEAX,PEAX); - } else switch( a->t->kind ) { - case HTYPE: - { - int args[] = { a->stack.id, b->stack.id }; - int size = prepare_call_args(ctx,2,args,ctx->vregs,0); - preg p; - call_native(ctx,hl_same_type,size); - op64(ctx,CMP8,PEAX,pconst(&p,1)); - } - break; - case HNULL: - { - preg *pa = hl_type_size(a->t->tparam) == 1 ? alloc_cpu8(ctx,a,true) : alloc_cpu(ctx,a,true); - preg *pb = hl_type_size(b->t->tparam) == 1 ? alloc_cpu8(ctx,b,true) : alloc_cpu(ctx,b,true); - if( op->op == OJEq ) { - // if( a == b || (a && b && a->v == b->v) ) goto - int ja, jb; - // if( a != b && (!a || !b || a->v != b->v) ) goto - op64(ctx,CMP,pa,pb); - register_jump(ctx,do_jump(ctx,OJEq,false),targetPos); - op64(ctx,TEST,pa,pa); - XJump_small(JZero,ja); - op64(ctx,TEST,pb,pb); - XJump_small(JZero,jb); - dyn_value_compare(ctx,pa,pb,a->t->tparam); - register_jump(ctx,do_jump(ctx,OJEq,false),targetPos); - scratch(pa); - scratch(pb); - patch_jump(ctx,ja); - patch_jump(ctx,jb); - } else if( op->op == OJNotEq ) { - int jeq, jcmp; - // if( a != b && (!a || !b || a->v != b->v) ) goto - op64(ctx,CMP,pa,pb); - XJump_small(JEq,jeq); - op64(ctx,TEST,pa,pa); - register_jump(ctx,do_jump(ctx,OJEq,false),targetPos); - op64(ctx,TEST,pb,pb); - register_jump(ctx,do_jump(ctx,OJEq,false),targetPos); - dyn_value_compare(ctx,pa,pb,a->t->tparam); - XJump_small(JZero,jcmp); - scratch(pa); - scratch(pb); - register_jump(ctx,do_jump(ctx,OJNotEq,false),targetPos); - patch_jump(ctx,jcmp); - patch_jump(ctx,jeq); - } else - ASSERT(op->op); - return; - } - case HVIRTUAL: - { - preg p; - preg *pa = alloc_cpu(ctx,a,true); - preg *pb = alloc_cpu(ctx,b,true); - int ja,jb,jav,jbv,jvalue; - if( b->t->kind == HOBJ ) { - if( op->op == OJEq ) { - // if( a ? (b && a->value == b) : (b == NULL) ) goto - op64(ctx,TEST,pa,pa); - XJump_small(JZero,ja); - op64(ctx,TEST,pb,pb); - XJump_small(JZero,jb); - op64(ctx,MOV,pa,pmem(&p,pa->id,HL_WSIZE)); - op64(ctx,CMP,pa,pb); - XJump_small(JAlways,jvalue); - patch_jump(ctx,ja); - op64(ctx,TEST,pb,pb); - patch_jump(ctx,jvalue); - register_jump(ctx,do_jump(ctx,OJEq,false),targetPos); - patch_jump(ctx,jb); - } else if( op->op == OJNotEq ) { - // if( a ? (b == NULL || a->value != b) : (b != NULL) ) goto - op64(ctx,TEST,pa,pa); - XJump_small(JZero,ja); - op64(ctx,TEST,pb,pb); - register_jump(ctx,do_jump(ctx,OJEq,false),targetPos); - op64(ctx,MOV,pa,pmem(&p,pa->id,HL_WSIZE)); - op64(ctx,CMP,pa,pb); - XJump_small(JAlways,jvalue); - patch_jump(ctx,ja); - op64(ctx,TEST,pb,pb); - patch_jump(ctx,jvalue); - register_jump(ctx,do_jump(ctx,OJNotEq,false),targetPos); - } else - ASSERT(op->op); - scratch(pa); - return; - } - op64(ctx,CMP,pa,pb); - if( op->op == OJEq ) { - // if( a == b || (a && b && a->value && b->value && a->value == b->value) ) goto - register_jump(ctx,do_jump(ctx,OJEq, false),targetPos); - op64(ctx,TEST,pa,pa); - XJump_small(JZero,ja); - op64(ctx,TEST,pb,pb); - XJump_small(JZero,jb); - op64(ctx,MOV,pa,pmem(&p,pa->id,HL_WSIZE)); - op64(ctx,TEST,pa,pa); - XJump_small(JZero,jav); - op64(ctx,MOV,pb,pmem(&p,pb->id,HL_WSIZE)); - op64(ctx,TEST,pb,pb); - XJump_small(JZero,jbv); - op64(ctx,CMP,pa,pb); - XJump_small(JNeq,jvalue); - register_jump(ctx,do_jump(ctx,OJEq, false),targetPos); - patch_jump(ctx,ja); - patch_jump(ctx,jb); - patch_jump(ctx,jav); - patch_jump(ctx,jbv); - patch_jump(ctx,jvalue); - } else if( op->op == OJNotEq ) { - int jnext; - // if( a != b && (!a || !b || !a->value || !b->value || a->value != b->value) ) goto - XJump_small(JEq,jnext); - op64(ctx,TEST,pa,pa); - XJump_small(JZero,ja); - op64(ctx,TEST,pb,pb); - XJump_small(JZero,jb); - op64(ctx,MOV,pa,pmem(&p,pa->id,HL_WSIZE)); - op64(ctx,TEST,pa,pa); - XJump_small(JZero,jav); - op64(ctx,MOV,pb,pmem(&p,pb->id,HL_WSIZE)); - op64(ctx,TEST,pb,pb); - XJump_small(JZero,jbv); - op64(ctx,CMP,pa,pb); - XJump_small(JEq,jvalue); - patch_jump(ctx,ja); - patch_jump(ctx,jb); - patch_jump(ctx,jav); - patch_jump(ctx,jbv); - register_jump(ctx,do_jump(ctx,OJAlways, false),targetPos); - patch_jump(ctx,jnext); - patch_jump(ctx,jvalue); - } else - ASSERT(op->op); - scratch(pa); - scratch(pb); - return; - } - break; - case HOBJ: - case HSTRUCT: - if( b->t->kind == HVIRTUAL ) { - op_jump(ctx,b,a,op,targetPos); // inverse - return; - } - if( hl_get_obj_rt(a->t)->compareFun ) { - preg *pa = alloc_cpu(ctx,a,true); - preg *pb = alloc_cpu(ctx,b,true); - preg p; - int jeq, ja, jb, jcmp; - int args[] = { a->stack.id, b->stack.id }; - switch( op->op ) { - case OJEq: - // if( a == b || (a && b && cmp(a,b) == 0) ) goto - op64(ctx,CMP,pa,pb); - XJump_small(JEq,jeq); - op64(ctx,TEST,pa,pa); - XJump_small(JZero,ja); - op64(ctx,TEST,pb,pb); - XJump_small(JZero,jb); - op_call_fun(ctx,NULL,(int)(int_val)a->t->obj->rt->compareFun,2,args); - op32(ctx,TEST,PEAX,PEAX); - XJump_small(JNotZero,jcmp); - patch_jump(ctx,jeq); - register_jump(ctx,do_jump(ctx,OJAlways,false),targetPos); - patch_jump(ctx,ja); - patch_jump(ctx,jb); - patch_jump(ctx,jcmp); - break; - case OJNotEq: - // if( a != b && (!a || !b || cmp(a,b) != 0) ) goto - op64(ctx,CMP,pa,pb); - XJump_small(JEq,jeq); - op64(ctx,TEST,pa,pa); - register_jump(ctx,do_jump(ctx,OJEq,false),targetPos); - op64(ctx,TEST,pb,pb); - register_jump(ctx,do_jump(ctx,OJEq,false),targetPos); - - op_call_fun(ctx,NULL,(int)(int_val)a->t->obj->rt->compareFun,2,args); - op32(ctx,TEST,PEAX,PEAX); - XJump_small(JZero,jcmp); - - register_jump(ctx,do_jump(ctx,OJNotEq,false),targetPos); - patch_jump(ctx,jcmp); - patch_jump(ctx,jeq); - break; - default: - // if( a && b && cmp(a,b) ?? 0 ) goto - op64(ctx,TEST,pa,pa); - XJump_small(JZero,ja); - op64(ctx,TEST,pb,pb); - XJump_small(JZero,jb); - op_call_fun(ctx,NULL,(int)(int_val)a->t->obj->rt->compareFun,2,args); - op32(ctx,CMP,PEAX,pconst(&p,0)); - register_jump(ctx,do_jump(ctx,op->op,false),targetPos); - patch_jump(ctx,ja); - patch_jump(ctx,jb); - break; - } - return; - } - // fallthrough - default: - // make sure we have valid 8 bits registers - if( a->size == 1 ) alloc_cpu8(ctx,a,true); - if( b->size == 1 ) alloc_cpu8(ctx,b,true); - op_binop(ctx,NULL,a,b,op->op); - break; - } - register_jump(ctx,do_jump(ctx,op->op, IS_FLOAT(a)),targetPos); -} +void hl_emit_alloc( jit_ctx *jit ); +void hl_emit_free( jit_ctx *jit ); +void hl_emit_function( jit_ctx *jit ); jit_ctx *hl_jit_alloc() { - int i; jit_ctx *ctx = (jit_ctx*)malloc(sizeof(jit_ctx)); - if( ctx == NULL ) return NULL; memset(ctx,0,sizeof(jit_ctx)); hl_alloc_init(&ctx->falloc); - hl_alloc_init(&ctx->galloc); - for(i=0;iid = i; - r->kind = RCPU; - } - for(i=0;iid = i; - r->kind = RFPU; - } + hl_emit_alloc(ctx); return ctx; } -void hl_jit_free( jit_ctx *ctx, h_bool can_reset ) { - free(ctx->vregs); - free(ctx->opsPos); - free(ctx->startBuf); - ctx->maxRegs = 0; - ctx->vregs = NULL; - ctx->maxOps = 0; - ctx->opsPos = NULL; - ctx->startBuf = NULL; - ctx->bufSize = 0; - ctx->buf.b = NULL; - ctx->calls = NULL; - ctx->switchs = NULL; - ctx->closure_list = NULL; - hl_free(&ctx->falloc); - hl_free(&ctx->galloc); - if( !can_reset ) free(ctx); -} - -static void jit_nops( jit_ctx *ctx ) { - while( BUF_POS() & 15 ) - op32(ctx, NOP, UNUSED, UNUSED); -} - -#define MAX_ARGS 16 - -static void *call_jit_c2hl = NULL; -static void *call_jit_hl2c = NULL; - -static void *callback_c2hl( void *_f, hl_type *t, void **args, vdynamic *ret ) { - /* - prepare stack and regs according to prepare_call_args, but by reading runtime type information - from the function type. The stack and regs will be setup by the trampoline function. - */ - void **f = (void**)_f; - unsigned char stack[MAX_ARGS * 8]; - call_regs cregs = {0}; - if( t->fun->nargs > MAX_ARGS ) - hl_error("Too many arguments for dynamic call"); - int i, size = 0, pad = 0, pos = 0; - for(i=0;ifun->nargs;i++) { - hl_type *at = t->fun->args[i]; - int creg = select_call_reg(&cregs,at,i); - if( creg >= 0 ) - continue; - size += stack_size(at); - } - pad = (-size) & 15; - size += pad; - pos = 0; - for(i=0;ifun->nargs;i++) { - // RTL - hl_type *at = t->fun->args[i]; - void *v = args[i]; - int creg = mapped_reg(&cregs,i); - void *store; - if( creg >= 0 ) { - if( REG_IS_FPU(creg) ) { - store = stack + size + CALL_NREGS * HL_WSIZE + (creg - XMM(0)) * sizeof(double); - } else { - store = stack + size + call_reg_index(creg) * HL_WSIZE; - } - switch( at->kind ) { - case HBOOL: - case HUI8: - *(int_val*)store = *(unsigned char*)v; - break; - case HUI16: - *(int_val*)store = *(unsigned short*)v; - break; - case HI32: - *(int_val*)store = *(int*)v; - break; - case HF32: - *(void**)store = 0; - *(float*)store = *(float*)v; - break; - case HF64: - *(double*)store = *(double*)v; - break; - case HI64: - case HGUID: - *(int64*)store = *(int64*)v; - break; - default: - *(void**)store = v; - break; - } - } else { - int tsize = stack_size(at); - store = stack + pos; - pos += tsize; - switch( at->kind ) { - case HBOOL: - case HUI8: - *(int*)store = *(unsigned char*)v; - break; - case HUI16: - *(int*)store = *(unsigned short*)v; - break; - case HI32: - case HF32: - *(int*)store = *(int*)v; - break; - case HF64: - *(double*)store = *(double*)v; - break; - case HI64: - case HGUID: - *(int64*)store = *(int64*)v; - break; - default: - *(void**)store = v; - break; - } - } - } - pos += pad; - pos >>= IS_64 ? 3 : 2; - switch( t->fun->ret->kind ) { - case HUI8: - case HUI16: - case HI32: - case HBOOL: - ret->v.i = ((int (*)(void *, void *, void *))call_jit_c2hl)(*f, (void**)&stack + pos, &stack); - return &ret->v.i; - case HI64: - case HGUID: - ret->v.i64 = ((int64 (*)(void *, void *, void *))call_jit_c2hl)(*f, (void**)&stack + pos, &stack); - return &ret->v.i64; - case HF32: - ret->v.f = ((float (*)(void *, void *, void *))call_jit_c2hl)(*f, (void**)&stack + pos, &stack); - return &ret->v.f; - case HF64: - ret->v.d = ((double (*)(void *, void *, void *))call_jit_c2hl)(*f, (void**)&stack + pos, &stack); - return &ret->v.d; - default: - return ((void *(*)(void *, void *, void *))call_jit_c2hl)(*f, (void**)&stack + pos, &stack); - } -} - -static void jit_c2hl( jit_ctx *ctx ) { - // create the function that will be called by callback_c2hl - // it will make sure to prepare the stack/regs according to native calling conventions - int jeq, jloop, jstart; - preg *fptr, *stack, *stend; - preg p; - - op64(ctx,PUSH,PEBP,UNUSED); - op64(ctx,MOV,PEBP,PESP); - -# ifdef HL_64 - - fptr = REG_AT(R10); - stack = PEAX; - stend = REG_AT(R11); - op64(ctx, MOV, fptr, REG_AT(CALL_REGS[0])); - op64(ctx, MOV, stack, REG_AT(CALL_REGS[1])); - op64(ctx, MOV, stend, REG_AT(CALL_REGS[2])); - - // set native call regs - int i; - for(i=0;iid,i*HL_WSIZE)); - for(i=0;iid,(i+CALL_NREGS)*HL_WSIZE)); - -# else - - // make sure the stack is aligned on 16 bytes - // the amount of push we will do afterwards is guaranteed to be a multiple of 16bytes by hl_callback -# ifdef HL_VCC - // VCC does not guarantee us an aligned stack... - op64(ctx,MOV,PEAX,PESP); - op64(ctx,AND,PEAX,pconst(&p,15)); - op64(ctx,SUB,PESP,PEAX); -# else - op64(ctx,SUB,PESP,pconst(&p,8)); -# endif - - // mov arguments to regs - fptr = REG_AT(Eax); - stack = REG_AT(Edx); - stend = REG_AT(Ecx); - op64(ctx,MOV,fptr,pmem(&p,Ebp,HL_WSIZE*2)); - op64(ctx,MOV,stack,pmem(&p,Ebp,HL_WSIZE*3)); - op64(ctx,MOV,stend,pmem(&p,Ebp,HL_WSIZE*4)); - -# endif - - // push stack args - jstart = BUF_POS(); - op64(ctx,CMP,stack,stend); - XJump(JEq,jeq); - op64(ctx,SUB,stack,pconst(&p,HL_WSIZE)); - op64(ctx,PUSH,pmem(&p,stack->id,0),UNUSED); - XJump(JAlways,jloop); - patch_jump(ctx,jeq); - patch_jump_to(ctx, jloop, jstart); - - op_call(ctx,fptr,0); - - // cleanup and ret - op64(ctx,MOV,PESP,PEBP); - op64(ctx,POP,PEBP, UNUSED); - op64(ctx,RET,UNUSED,UNUSED); -} - -static vdynamic *jit_wrapper_call( vclosure_wrapper *c, char *stack_args, void **regs ) { - vdynamic *args[MAX_ARGS]; - int i; - int nargs = c->cl.t->fun->nargs; - call_regs cregs = {0}; - if( nargs > MAX_ARGS ) - hl_error("Too many arguments for wrapped call"); - cregs.nextCpu++; // skip fptr in HL64 - was passed as arg0 - for(i=0;icl.t->fun->args[i]; - int creg = select_call_reg(&cregs,t,i); - if( creg < 0 ) { - args[i] = hl_is_dynamic(t) ? *(vdynamic**)stack_args : hl_make_dyn(stack_args,t); - stack_args += stack_size(t); - } else if( hl_is_dynamic(t) ) { - args[i] = *(vdynamic**)(regs + call_reg_index(creg)); - } else if( t->kind == HF32 || t->kind == HF64 ) { - args[i] = hl_make_dyn(regs + CALL_NREGS + creg - XMM(0),&hlt_f64); - } else { - args[i] = hl_make_dyn(regs + call_reg_index(creg),t); - } - } - return hl_dyn_call(c->wrappedFun,args,nargs); -} - -static void *jit_wrapper_ptr( vclosure_wrapper *c, char *stack_args, void **regs ) { - vdynamic *ret = jit_wrapper_call(c, stack_args, regs); - hl_type *tret = c->cl.t->fun->ret; - switch( tret->kind ) { - case HVOID: - return NULL; - case HUI8: - case HUI16: - case HI32: - case HBOOL: - return (void*)(int_val)hl_dyn_casti(&ret,&hlt_dyn,tret); - case HI64: - case HGUID: - return (void*)(int_val)hl_dyn_casti64(&ret,&hlt_dyn); - default: - return hl_dyn_castp(&ret,&hlt_dyn,tret); - } -} - -static double jit_wrapper_d( vclosure_wrapper *c, char *stack_args, void **regs ) { - vdynamic *ret = jit_wrapper_call(c, stack_args, regs); - return hl_dyn_castd(&ret,&hlt_dyn); -} - -static void jit_hl2c( jit_ctx *ctx ) { - // create a function that is called with a vclosure_wrapper* and native args - // and pack and pass the args to callback_hl2c - preg p; - int jfloat1, jfloat2, jexit; - hl_type_fun *ft = NULL; - int size; -# ifdef HL_64 - preg *cl = REG_AT(CALL_REGS[0]); - preg *tmp = REG_AT(CALL_REGS[1]); -# else - preg *cl = REG_AT(Ecx); - preg *tmp = REG_AT(Edx); -# endif - - op64(ctx,PUSH,PEBP,UNUSED); - op64(ctx,MOV,PEBP,PESP); - -# ifdef HL_64 - // push registers - int i; - op64(ctx,SUB,PESP,pconst(&p,CALL_NREGS*8)); - for(i=0;it->fun->ret->kind ) { - // case HF32: case HF64: return jit_wrapper_d(arg0,&args); - // default: return jit_wrapper_ptr(arg0,&args); - // } - if( !IS_64 ) - op64(ctx,MOV,cl,pmem(&p,Ebp,HL_WSIZE*2)); // load arg0 - op64(ctx,MOV,tmp,pmem(&p,cl->id,0)); // ->t - op64(ctx,MOV,tmp,pmem(&p,tmp->id,HL_WSIZE)); // ->fun - op64(ctx,MOV,tmp,pmem(&p,tmp->id,(int)(int_val)&ft->ret)); // ->ret - op32(ctx,MOV,tmp,pmem(&p,tmp->id,0)); // -> kind - - op32(ctx,CMP,tmp,pconst(&p,HF64)); - XJump_small(JEq,jfloat1); - op32(ctx,CMP,tmp,pconst(&p,HF32)); - XJump_small(JEq,jfloat2); - - // 64 bits : ESP + EIP (+WIN64PAD) - // 32 bits : ESP + EIP + PARAM0 - int args_pos = IS_64 ? ((IS_WINCALL64 ? 32 : 0) + HL_WSIZE * 2) : (HL_WSIZE*3); - - size = begin_native_call(ctx,3); - op64(ctx, LEA, tmp, pmem(&p,Ebp,-HL_WSIZE*CALL_NREGS*2)); - set_native_arg(ctx, tmp); - op64(ctx, LEA, tmp, pmem(&p,Ebp,args_pos)); - set_native_arg(ctx, tmp); - set_native_arg(ctx, cl); - call_native(ctx, jit_wrapper_ptr, size); - XJump_small(JAlways, jexit); - - patch_jump(ctx,jfloat1); - patch_jump(ctx,jfloat2); - size = begin_native_call(ctx,3); - op64(ctx, LEA, tmp, pmem(&p,Ebp,-HL_WSIZE*CALL_NREGS*2)); - set_native_arg(ctx, tmp); - op64(ctx, LEA, tmp, pmem(&p,Ebp,args_pos)); - set_native_arg(ctx, tmp); - set_native_arg(ctx, cl); - call_native(ctx, jit_wrapper_d, size); - - patch_jump(ctx,jexit); - op64(ctx,MOV,PESP,PEBP); - op64(ctx,POP,PEBP, UNUSED); - op64(ctx,RET,UNUSED,UNUSED); -} - -static void jit_fail( uchar *msg ) { - if( msg == NULL ) { - hl_debug_break(); - msg = USTR("assert"); - } - vdynamic *d = hl_alloc_dynamic(&hlt_bytes); - d->v.ptr = msg; - hl_throw(d); -} - -static void jit_null_access( jit_ctx *ctx ) { - op64(ctx,PUSH,PEBP,UNUSED); - op64(ctx,MOV,PEBP,PESP); - int_val arg = (int_val)USTR("Null access"); - call_native_consts(ctx, jit_fail, &arg, 1); -} - -static void jit_null_fail( int fhash ) { - vbyte *field = hl_field_name(fhash); - hl_buffer *b = hl_alloc_buffer(); - hl_buffer_str(b, USTR("Null access .")); - hl_buffer_str(b, (uchar*)field); - vdynamic *d = hl_alloc_dynamic(&hlt_bytes); - d->v.ptr = hl_buffer_content(b,NULL); - hl_throw(d); -} - -static void jit_null_field_access( jit_ctx *ctx ) { - preg p; - op64(ctx,PUSH,PEBP,UNUSED); - op64(ctx,MOV,PEBP,PESP); - int size = begin_native_call(ctx, 1); - int args_pos = (IS_WINCALL64 ? 32 : 0) + HL_WSIZE*2; - set_native_arg(ctx, pmem(&p,Ebp,args_pos)); - call_native(ctx,jit_null_fail,size); -} - -static void jit_assert( jit_ctx *ctx ) { - op64(ctx,PUSH,PEBP,UNUSED); - op64(ctx,MOV,PEBP,PESP); - int_val arg = 0; - call_native_consts(ctx, jit_fail, &arg, 1); -} - -static int jit_build( jit_ctx *ctx, void (*fbuild)( jit_ctx *) ) { - int pos; - jit_buf(ctx); - jit_nops(ctx); - pos = BUF_POS(); - fbuild(ctx); - int endPos = BUF_POS(); - jit_nops(ctx); -#ifdef WIN64_UNWIND_TABLES - int fid = ctx->nunwind++; - ctx->unwind_table[fid].BeginAddress = pos; - ctx->unwind_table[fid].EndAddress = endPos; - ctx->unwind_table[fid].UnwindData = ctx->unwind_offset; -#endif - return pos; -} - -static void hl_jit_init_module( jit_ctx *ctx, hl_module *m ) { - int i; - ctx->m = m; - if( m->code->hasdebug ) { - ctx->debug = (hl_debug_infos*)malloc(sizeof(hl_debug_infos) * m->code->nfunctions); - memset(ctx->debug, -1, sizeof(hl_debug_infos) * m->code->nfunctions); - } - for(i=0;icode->nfloats;i++) { - jit_buf(ctx); - *ctx->buf.d++ = m->code->floats[i]; - } -#ifdef WIN64_UNWIND_TABLES - jit_buf(ctx); - ctx->unwind_offset = BUF_POS(); - write_unwind_data(ctx); - - ctx->unwind_table = malloc(sizeof(RUNTIME_FUNCTION) * (m->code->nfunctions + 10)); - memset(ctx->unwind_table, 0, sizeof(RUNTIME_FUNCTION) * (m->code->nfunctions + 10)); -#endif -} - void hl_jit_init( jit_ctx *ctx, hl_module *m ) { - hl_jit_init_module(ctx,m); - ctx->c2hl = jit_build(ctx, jit_c2hl); - ctx->hl2c = jit_build(ctx, jit_hl2c); - ctx->static_functions[0] = (void*)(int_val)jit_build(ctx,jit_null_access); - ctx->static_functions[1] = (void*)(int_val)jit_build(ctx,jit_assert); - ctx->static_functions[2] = (void*)(int_val)jit_build(ctx,jit_null_field_access); -} - -void hl_jit_reset( jit_ctx *ctx, hl_module *m ) { - ctx->debug = NULL; - hl_jit_init_module(ctx,m); -} - -static void *get_dyncast( hl_type *t ) { - switch( t->kind ) { - case HF32: - return hl_dyn_castf; - case HF64: - return hl_dyn_castd; - case HI64: - case HGUID: - return hl_dyn_casti64; - case HI32: - case HUI16: - case HUI8: - case HBOOL: - return hl_dyn_casti; - default: - return hl_dyn_castp; - } -} - -static void *get_dynset( hl_type *t ) { - switch( t->kind ) { - case HF32: - return hl_dyn_setf; - case HF64: - return hl_dyn_setd; - case HI64: - case HGUID: - return hl_dyn_seti64; - case HI32: - case HUI16: - case HUI8: - case HBOOL: - return hl_dyn_seti; - default: - return hl_dyn_setp; - } -} - -static void *get_dynget( hl_type *t ) { - switch( t->kind ) { - case HF32: - return hl_dyn_getf; - case HF64: - return hl_dyn_getd; - case HI64: - case HGUID: - return hl_dyn_geti64; - case HI32: - case HUI16: - case HUI8: - case HBOOL: - return hl_dyn_geti; - default: - return hl_dyn_getp; - } -} - -static double uint_to_double( unsigned int v ) { - return v; } -static vclosure *alloc_static_closure( jit_ctx *ctx, int fid ) { - hl_module *m = ctx->m; - vclosure *c = hl_malloc(&m->ctx.alloc,sizeof(vclosure)); - int fidx = m->functions_indexes[fid]; - c->hasValue = 0; - if( fidx >= m->code->nfunctions ) { - // native - c->t = m->code->natives[fidx - m->code->nfunctions].t; - c->fun = m->functions_ptrs[fid]; - c->value = NULL; - } else { - c->t = m->code->functions[fidx].type; - c->fun = (void*)(int_val)fid; - c->value = ctx->closure_list; - ctx->closure_list = c; - } - return c; +void hl_jit_free( jit_ctx *ctx, h_bool can_reset ) { + hl_emit_free(ctx); + hl_free(&ctx->falloc); + free(ctx); } -static void make_dyn_cast( jit_ctx *ctx, vreg *dst, vreg *v ) { - int size; - preg p; - preg *tmp; - if( v->t->kind == HNULL && v->t->tparam->kind == dst->t->kind ) { - int jnull, jend; - preg *out; - switch( dst->t->kind ) { - case HUI8: - case HUI16: - case HI32: - case HBOOL: - case HI64: - case HGUID: - tmp = alloc_cpu(ctx, v, true); - op64(ctx, TEST, tmp, tmp); - XJump_small(JZero, jnull); - op64(ctx, MOV, tmp, pmem(&p,tmp->id,8)); - XJump_small(JAlways, jend); - patch_jump(ctx, jnull); - op64(ctx, XOR, tmp, tmp); - patch_jump(ctx, jend); - store(ctx, dst, tmp, true); - return; - case HF32: - case HF64: - tmp = alloc_cpu(ctx, v, true); - out = alloc_fpu(ctx, dst, false); - op64(ctx, TEST, tmp, tmp); - XJump_small(JZero, jnull); - op64(ctx, dst->t->kind == HF32 ? MOVSS : MOVSD, out, pmem(&p,tmp->id,8)); - XJump_small(JAlways, jend); - patch_jump(ctx, jnull); - op64(ctx, XORPD, out, out); - patch_jump(ctx, jend); - store(ctx, dst, out, true); - return; - default: - break; - } - } - switch( dst->t->kind ) { - case HF32: - case HF64: - case HI64: - case HGUID: - size = begin_native_call(ctx, 2); - set_native_arg(ctx, pconst64(&p,(int_val)v->t)); - break; - default: - size = begin_native_call(ctx, 3); - set_native_arg(ctx, pconst64(&p,(int_val)dst->t)); - set_native_arg(ctx, pconst64(&p,(int_val)v->t)); - break; - } - tmp = alloc_native_arg(ctx); - op64(ctx,MOV,tmp,REG_AT(Ebp)); - if( v->stackPos >= 0 ) - op64(ctx,ADD,tmp,pconst(&p,v->stackPos)); - else - op64(ctx,SUB,tmp,pconst(&p,-v->stackPos)); - set_native_arg(ctx,tmp); - call_native(ctx,get_dyncast(dst->t),size); - store_result(ctx, dst); +void hl_jit_reset( jit_ctx *ctx, hl_module *m ) { } int hl_jit_function( jit_ctx *ctx, hl_module *m, hl_function *f ) { - int i, size = 0, opCount; - int codePos = BUF_POS(); - int nargs = f->type->fun->nargs; - unsigned short *debug16 = NULL; - int *debug32 = NULL; - call_regs cregs = {0}; - hl_thread_info *tinf = NULL; - preg p; - ctx->f = f; - ctx->allocOffset = 0; - if( f->nregs > ctx->maxRegs ) { - free(ctx->vregs); - ctx->vregs = (vreg*)malloc(sizeof(vreg) * (f->nregs + 1)); - if( ctx->vregs == NULL ) { - ctx->maxRegs = 0; - return -1; - } - ctx->maxRegs = f->nregs; - } - if( f->nops > ctx->maxOps ) { - free(ctx->opsPos); - ctx->opsPos = (int*)malloc(sizeof(int) * (f->nops + 1)); - if( ctx->opsPos == NULL ) { - ctx->maxOps = 0; - return -1; - } - ctx->maxOps = f->nops; - } - memset(ctx->opsPos,0,(f->nops+1)*sizeof(int)); - for(i=0;inregs;i++) { - vreg *r = R(i); - r->t = f->regs[i]; - r->size = hl_type_size(r->t); - r->current = NULL; - r->stack.holds = NULL; - r->stack.id = i; - r->stack.kind = RSTACK; - } - size = 0; - int argsSize = 0; - for(i=0;it,i); - if( creg < 0 || IS_WINCALL64 ) { - // use existing stack storage - r->stackPos = argsSize + HL_WSIZE * 2; - argsSize += stack_size(r->t); - } else { - // make room in local vars - size += r->size; - size += hl_pad_size(size,r->t); - r->stackPos = -size; - } - } - for(i=nargs;inregs;i++) { - vreg *r = R(i); - size += r->size; - size += hl_pad_size(size,r->t); // align local vars - r->stackPos = -size; - } -# ifdef HL_64 - size += (-size) & 15; // align on 16 bytes -# else - size += hl_pad_size(size,&hlt_dyn); // align on word size -# endif - ctx->totalRegsSize = size; - jit_buf(ctx); - ctx->functionPos = BUF_POS(); - // make sure currentPos is > 0 before any reg allocations happen - // otherwise `alloc_reg` thinks that all registers are locked - ctx->currentPos = 1; - op_enter(ctx); -# ifdef HL_64 - { - // store in local var - for(i=0;isize); - p->holds = r; - r->current = p; - } - } -# endif - if( ctx->m->code->hasdebug ) { - debug16 = (unsigned short*)malloc(sizeof(unsigned short) * (f->nops + 1)); - debug16[0] = (unsigned short)(BUF_POS() - codePos); - } - ctx->opsPos[0] = BUF_POS(); - - for(opCount=0;opCountnops;opCount++) { - int jump; - hl_opcode *o = f->ops + opCount; - vreg *dst = R(o->p1); - vreg *ra = R(o->p2); - vreg *rb = R(o->p3); - ctx->currentPos = opCount + 1; - jit_buf(ctx); -# ifdef JIT_DEBUG - if( opCount == 0 || f->ops[opCount-1].op != OAsm ) { - int uid = opCount + (f->findex<<16); - op32(ctx, PUSH, pconst(&p,uid), UNUSED); - op64(ctx, ADD, PESP, pconst(&p,HL_WSIZE)); - } -# endif - // emit code - switch( o->op ) { - case OMov: - case OUnsafeCast: - op_mov(ctx, dst, ra); - break; - case OInt: - store_const(ctx, dst, m->code->ints[o->p2]); - break; - case OBool: - store_const(ctx, dst, o->p2); - break; - case OGetGlobal: - { - void *addr = m->globals_data + m->globals_indexes[o->p2]; -# ifdef HL_64 - preg *tmp = alloc_reg(ctx, RCPU); - op64(ctx, MOV, tmp, pconst64(&p,(int_val)addr)); - copy_to(ctx, dst, pmem(&p,tmp->id,0)); -# else - copy_to(ctx, dst, paddr(&p,addr)); -# endif - } - break; - case OSetGlobal: - { - void *addr = m->globals_data + m->globals_indexes[o->p1]; -# ifdef HL_64 - preg *tmp = alloc_reg(ctx, RCPU); - op64(ctx, MOV, tmp, pconst64(&p,(int_val)addr)); - copy_from(ctx, pmem(&p,tmp->id,0), ra); -# else - copy_from(ctx, paddr(&p,addr), ra); -# endif - } - break; - case OCall3: - { - int args[3] = { o->p3, o->extra[0], o->extra[1] }; - op_call_fun(ctx, dst, o->p2, 3, args); - } - break; - case OCall4: - { - int args[4] = { o->p3, o->extra[0], o->extra[1], o->extra[2] }; - op_call_fun(ctx, dst, o->p2, 4, args); - } - break; - case OCallN: - op_call_fun(ctx, dst, o->p2, o->p3, o->extra); - break; - case OCall0: - op_call_fun(ctx, dst, o->p2, 0, NULL); - break; - case OCall1: - op_call_fun(ctx, dst, o->p2, 1, &o->p3); - break; - case OCall2: - { - int args[2] = { o->p3, (int)(int_val)o->extra }; - op_call_fun(ctx, dst, o->p2, 2, args); - } - break; - case OSub: - case OAdd: - case OMul: - case OSDiv: - case OUDiv: - case OShl: - case OSShr: - case OUShr: - case OAnd: - case OOr: - case OXor: - case OSMod: - case OUMod: - op_binop(ctx, dst, ra, rb, o->op); - break; - case ONeg: - { - if( IS_FLOAT(ra) ) { - preg *pa = alloc_reg(ctx,RFPU); - preg *pb = alloc_fpu(ctx,ra,true); - op64(ctx,XORPD,pa,pa); - op64(ctx,ra->t->kind == HF32 ? SUBSS : SUBSD,pa,pb); - store(ctx,dst,pa,true); - } else if( ra->t->kind == HI64 ) { -# ifdef HL_64 - preg *pa = alloc_reg(ctx,RCPU); - preg *pb = alloc_cpu(ctx,ra,true); - op64(ctx,XOR,pa,pa); - op64(ctx,SUB,pa,pb); - store(ctx,dst,pa,true); -# else - error_i64(); -# endif - } else { - preg *pa = alloc_reg(ctx,RCPU); - preg *pb = alloc_cpu(ctx,ra,true); - op32(ctx,XOR,pa,pa); - op32(ctx,SUB,pa,pb); - store(ctx,dst,pa,true); - } - } - break; - case ONot: - { - preg *v = alloc_cpu(ctx,ra,true); - op32(ctx,XOR,v,pconst(&p,1)); - store(ctx,dst,v,true); - } - break; - case OJFalse: - case OJTrue: - case OJNotNull: - case OJNull: - { - preg *r = dst->t->kind == HBOOL ? alloc_cpu8(ctx, dst, true) : alloc_cpu(ctx, dst, true); - op64(ctx, dst->t->kind == HBOOL ? TEST8 : TEST, r, r); - XJump( o->op == OJFalse || o->op == OJNull ? JZero : JNotZero,jump); - register_jump(ctx,jump,(opCount + 1) + o->p2); - } - break; - case OJEq: - case OJNotEq: - case OJSLt: - case OJSGte: - case OJSLte: - case OJSGt: - case OJULt: - case OJUGte: - case OJNotLt: - case OJNotGte: - op_jump(ctx,dst,ra,o,(opCount + 1) + o->p3); - break; - case OJAlways: - jump = do_jump(ctx,o->op,false); - register_jump(ctx,jump,(opCount + 1) + o->p1); - break; - case OToDyn: - if( ra->t->kind == HBOOL ) { - int size = begin_native_call(ctx, 1); - set_native_arg(ctx, fetch(ra)); - call_native(ctx, hl_alloc_dynbool, size); - store(ctx, dst, PEAX, true); - } else { - int_val rt = (int_val)ra->t; - int jskip = 0; - if( hl_is_ptr(ra->t) ) { - int jnz; - preg *a = alloc_cpu(ctx,ra,true); - op64(ctx,TEST,a,a); - XJump_small(JNotZero,jnz); - op64(ctx,XOR,PEAX,PEAX); // will replace the result of alloc_dynamic at jump land - XJump_small(JAlways,jskip); - patch_jump(ctx,jnz); - } - call_native_consts(ctx, hl_alloc_dynamic, &rt, 1); - // copy value to dynamic - if( (IS_FLOAT(ra) || ra->size == 8) && !IS_64 ) { - preg *tmp = REG_AT(RCPU_SCRATCH_REGS[1]); - op64(ctx,MOV,tmp,&ra->stack); - op32(ctx,MOV,pmem(&p,Eax,HDYN_VALUE),tmp); - if( ra->t->kind == HF64 ) { - ra->stackPos += 4; - op64(ctx,MOV,tmp,&ra->stack); - op32(ctx,MOV,pmem(&p,Eax,HDYN_VALUE+4),tmp); - ra->stackPos -= 4; - } - } else { - preg *tmp = REG_AT(RCPU_SCRATCH_REGS[1]); - copy_from(ctx,tmp,ra); - op64(ctx,MOV,pmem(&p,Eax,HDYN_VALUE),tmp); - } - if( hl_is_ptr(ra->t) ) patch_jump(ctx,jskip); - store(ctx, dst, PEAX, true); - } - break; - case OToSFloat: - if( ra == dst ) break; - if (ra->t->kind == HI32 || ra->t->kind == HUI16 || ra->t->kind == HUI8) { - preg* r = alloc_cpu(ctx, ra, true); - preg* w = alloc_fpu(ctx, dst, false); - op32(ctx, dst->t->kind == HF64 ? CVTSI2SD : CVTSI2SS, w, r); - store(ctx, dst, w, true); - } else if (ra->t->kind == HI64 ) { - preg* r = alloc_cpu(ctx, ra, true); - preg* w = alloc_fpu(ctx, dst, false); - op64(ctx, dst->t->kind == HF64 ? CVTSI2SD : CVTSI2SS, w, r); - store(ctx, dst, w, true); - } else if( ra->t->kind == HF64 && dst->t->kind == HF32 ) { - preg *r = alloc_fpu(ctx,ra,true); - preg *w = alloc_fpu(ctx,dst,false); - op32(ctx,CVTSD2SS,w,r); - store(ctx, dst, w, true); - } else if( ra->t->kind == HF32 && dst->t->kind == HF64 ) { - preg *r = alloc_fpu(ctx,ra,true); - preg *w = alloc_fpu(ctx,dst,false); - op32(ctx,CVTSS2SD,w,r); - store(ctx, dst, w, true); - } else - ASSERT(0); - break; - case OToUFloat: - { - int size; - size = prepare_call_args(ctx,1,&o->p2,ctx->vregs,0); - call_native(ctx,uint_to_double,size); - store_result(ctx,dst); - } - break; - case OToInt: - if( ra == dst ) break; - if( ra->t->kind == HF64 ) { - preg *r = alloc_fpu(ctx,ra,true); - preg *w = alloc_cpu(ctx,dst,false); - preg *tmp = alloc_reg(ctx,RCPU); - op32(ctx,STMXCSR,pmem(&p,Esp,-4),UNUSED); - op32(ctx,MOV,tmp,&p); - op32(ctx,OR,tmp,pconst(&p,0x6000)); // set round towards 0 - op32(ctx,MOV,pmem(&p,Esp,-8),tmp); - op32(ctx,LDMXCSR,&p,UNUSED); - op32(ctx,CVTSD2SI,w,r); - op32(ctx,LDMXCSR,pmem(&p,Esp,-4),UNUSED); - store(ctx, dst, w, true); - } else if (ra->t->kind == HF32) { - preg *r = alloc_fpu(ctx, ra, true); - preg *w = alloc_cpu(ctx, dst, false); - preg *tmp = alloc_reg(ctx, RCPU); - op32(ctx, STMXCSR, pmem(&p, Esp, -4), UNUSED); - op32(ctx, MOV, tmp, &p); - op32(ctx, OR, tmp, pconst(&p, 0x6000)); // set round towards 0 - op32(ctx, MOV, pmem(&p, Esp, -8), tmp); - op32(ctx, LDMXCSR, &p, UNUSED); - op32(ctx, CVTSS2SI, w, r); - op32(ctx, LDMXCSR, pmem(&p, Esp, -4), UNUSED); - store(ctx, dst, w, true); - } else if( (dst->t->kind == HI64 || dst->t->kind == HGUID) && ra->t->kind == HI32 ) { - if( ra->current != PEAX ) { - op32(ctx, MOV, PEAX, fetch(ra)); - scratch(PEAX); - } -# ifdef HL_64 - op64(ctx, CDQE, UNUSED, UNUSED); // sign-extend Eax into Rax - store(ctx, dst, PEAX, true); -# else - op32(ctx, CDQ, UNUSED, UNUSED); // sign-extend Eax into Eax:Edx - scratch(REG_AT(Edx)); - op32(ctx, MOV, fetch(dst), PEAX); - dst->stackPos += 4; - op32(ctx, MOV, fetch(dst), REG_AT(Edx)); - dst->stackPos -= 4; - } else if( dst->t->kind == HI32 && ra->t->kind == HI64 ) { - error_i64(); -# endif - } else { - preg *r = alloc_cpu(ctx,dst,false); - copy_from(ctx, r, ra); - store(ctx, dst, r, true); - } - break; - case ORet: - op_ret(ctx, dst); - break; - case OIncr: - { - if( IS_FLOAT(dst) ) { - ASSERT(0); - } else { - preg *v = fetch32(ctx,dst); - op32(ctx,INC,v,UNUSED); - if( v->kind != RSTACK ) store(ctx, dst, v, false); - } - } - break; - case ODecr: - { - if( IS_FLOAT(dst) ) { - ASSERT(0); - } else { - preg *v = fetch32(ctx,dst); - op32(ctx,DEC,v,UNUSED); - if( v->kind != RSTACK ) store(ctx, dst, v, false); - } - } - break; - case OFloat: - { - if( m->code->floats[o->p2] == 0 ) { - preg *f = alloc_fpu(ctx,dst,false); - op64(ctx,XORPD,f,f); - } else switch( dst->t->kind ) { - case HF64: - case HF32: -# ifdef HL_64 - op64(ctx,dst->t->kind == HF32 ? CVTSD2SS : MOVSD,alloc_fpu(ctx,dst,false),pcodeaddr(&p,o->p2 * 8)); -# else - op64(ctx,dst->t->kind == HF32 ? MOVSS : MOVSD,alloc_fpu(ctx,dst,false),paddr(&p,m->code->floats + o->p2)); -# endif - break; - default: - ASSERT(dst->t->kind); - } - store(ctx,dst,dst->current,false); - } - break; - case OString: - op64(ctx,MOV,alloc_cpu(ctx, dst, false),pconst64(&p,(int_val)hl_get_ustring(m->code,o->p2))); - store(ctx,dst,dst->current,false); - break; - case OBytes: - { - char *b = m->code->version >= 5 ? m->code->bytes + m->code->bytes_pos[o->p2] : m->code->strings[o->p2]; - op64(ctx,MOV,alloc_cpu(ctx,dst,false),pconst64(&p,(int_val)b)); - store(ctx,dst,dst->current,false); - } - break; - case ONull: - { - op64(ctx,XOR,alloc_cpu(ctx, dst, false),alloc_cpu(ctx, dst, false)); - store(ctx,dst,dst->current,false); - } - break; - case ONew: - { - int_val args[] = { (int_val)dst->t }; - void *allocFun; - int nargs = 1; - switch( dst->t->kind ) { - case HOBJ: - case HSTRUCT: - allocFun = hl_alloc_obj; - break; - case HDYNOBJ: - allocFun = hl_alloc_dynobj; - nargs = 0; - break; - case HVIRTUAL: - allocFun = hl_alloc_virtual; - break; - default: - ASSERT(dst->t->kind); - } - call_native_consts(ctx, allocFun, args, nargs); - store(ctx, dst, PEAX, true); - } - break; - case OInstanceClosure: - { - preg *r = alloc_cpu(ctx, rb, true); - jlist *j = (jlist*)hl_malloc(&ctx->galloc,sizeof(jlist)); - int size = begin_native_call(ctx,3); - set_native_arg(ctx,r); - - j->pos = BUF_POS(); - j->target = o->p2; - j->next = ctx->calls; - ctx->calls = j; - - set_native_arg(ctx,pconst64(&p,RESERVE_ADDRESS)); - set_native_arg(ctx,pconst64(&p,(int_val)m->code->functions[m->functions_indexes[o->p2]].type)); - call_native(ctx,hl_alloc_closure_ptr,size); - store(ctx,dst,PEAX,true); - } - break; - case OVirtualClosure: - { - int size, i; - preg *r = alloc_cpu_call(ctx, ra); - hl_type *t = NULL; - hl_type *ot = ra->t; - while( t == NULL ) { - for(i=0;iobj->nproto;i++) { - hl_obj_proto *pp = ot->obj->proto + i; - if( pp->pindex == o->p3 ) { - t = m->code->functions[m->functions_indexes[pp->findex]].type; - break; - } - } - ot = ot->obj->super; - } - size = begin_native_call(ctx,3); - set_native_arg(ctx,r); - // read r->type->vobj_proto[i] for function address - op64(ctx,MOV,r,pmem(&p,r->id,0)); - op64(ctx,MOV,r,pmem(&p,r->id,HL_WSIZE*2)); - op64(ctx,MOV,r,pmem(&p,r->id,HL_WSIZE*o->p3)); - set_native_arg(ctx,r); - op64(ctx,MOV,r,pconst64(&p,(int_val)t)); - set_native_arg(ctx,r); - call_native(ctx,hl_alloc_closure_ptr,size); - store(ctx,dst,PEAX,true); - } - break; - case OCallClosure: - if( ra->t->kind == HDYN ) { - // ASM for { - // vdynamic *args[] = {args}; - // vdynamic *ret = hl_dyn_call(closure,args,nargs); - // dst = hl_dyncast(ret,t_dynamic,t_dst); - // } - int offset = o->p3 * HL_WSIZE; - preg *r = alloc_reg(ctx, RCPU_CALL); - if( offset & 15 ) offset += 16 - (offset & 15); - op64(ctx,SUB,PESP,pconst(&p,offset)); - op64(ctx,MOV,r,PESP); - for(i=0;ip3;i++) { - vreg *a = R(o->extra[i]); - if( !hl_is_dynamic(a->t) ) ASSERT(0); - preg *v = alloc_cpu(ctx,a,true); - op64(ctx,MOV,pmem(&p,r->id,i * HL_WSIZE),v); - RUNLOCK(v); - } -# ifdef HL_64 - int size = begin_native_call(ctx, 3) + offset; - set_native_arg(ctx, pconst(&p,o->p3)); - set_native_arg(ctx, r); - set_native_arg(ctx, fetch(ra)); -# else - int size = pad_before_call(ctx,HL_WSIZE*2 + sizeof(int) + offset); - op64(ctx,PUSH,pconst(&p,o->p3),UNUSED); - op64(ctx,PUSH,r,UNUSED); - op64(ctx,PUSH,alloc_cpu(ctx,ra,true),UNUSED); -# endif - call_native(ctx,hl_dyn_call,size); - if( dst->t->kind != HVOID ) { - store(ctx,dst,PEAX,true); - make_dyn_cast(ctx,dst,dst); - } - } else { - int jhasvalue, jend, size; - // ASM for if( c->hasValue ) c->fun(value,args) else c->fun(args) - preg *r = alloc_cpu(ctx,ra,true); - preg *tmp = alloc_reg(ctx, RCPU); - op32(ctx,MOV,tmp,pmem(&p,r->id,HL_WSIZE*2)); - op32(ctx,TEST,tmp,tmp); - scratch(tmp); - XJump_small(JNotZero,jhasvalue); - save_regs(ctx); - size = prepare_call_args(ctx,o->p3,o->extra,ctx->vregs,0); - preg *rr = r; - if( rr->holds != ra ) rr = alloc_cpu(ctx, ra, true); - op_call(ctx, pmem(&p,rr->id,HL_WSIZE), size); - XJump_small(JAlways,jend); - patch_jump(ctx,jhasvalue); - restore_regs(ctx); -# ifdef HL_64 - { - int regids[64]; - preg *pc = REG_AT(CALL_REGS[0]); - vreg *sc = R(f->nregs); // scratch register that we temporary rebind - if( o->p3 >= 63 ) jit_error("assert"); - memcpy(regids + 1, o->extra, o->p3 * sizeof(int)); - regids[0] = f->nregs; - sc->size = HL_WSIZE; - sc->t = &hlt_dyn; - op64(ctx, MOV, pc, pmem(&p,r->id,HL_WSIZE*3)); - scratch(pc); - sc->current = pc; - pc->holds = sc; - size = prepare_call_args(ctx,o->p3 + 1,regids,ctx->vregs,0); - if( r->holds != ra ) r = alloc_cpu(ctx, ra, true); - } -# else - size = prepare_call_args(ctx,o->p3,o->extra,ctx->vregs,HL_WSIZE); - if( r->holds != ra ) r = alloc_cpu(ctx, ra, true); - op64(ctx, PUSH,pmem(&p,r->id,HL_WSIZE*3),UNUSED); // push closure value -# endif - op_call(ctx, pmem(&p,r->id,HL_WSIZE), size); - discard_regs(ctx,false); - patch_jump(ctx,jend); - store_result(ctx, dst); - } - break; - case OStaticClosure: - { - vclosure *c = alloc_static_closure(ctx,o->p2); - preg *r = alloc_reg(ctx, RCPU); - op64(ctx, MOV, r, pconst64(&p,(int_val)c)); - store(ctx,dst,r,true); - } - break; - case OField: - { -# ifndef HL_64 - if( dst->t->kind == HI64 ) { - error_i64(); - break; - } -# endif - switch( ra->t->kind ) { - case HOBJ: - case HSTRUCT: - { - hl_runtime_obj *rt = hl_get_obj_rt(ra->t); - preg *rr = alloc_cpu(ctx,ra, true); - if( dst->t->kind == HSTRUCT ) { - hl_type *ft = hl_obj_field_fetch(ra->t,o->p3)->t; - if( ft->kind == HPACKED ) { - preg *r = alloc_reg(ctx,RCPU); - op64(ctx,LEA,r,pmem(&p,(CpuReg)rr->id,rt->fields_indexes[o->p3])); - store(ctx,dst,r,true); - break; - } - } - copy_to(ctx,dst,pmem(&p, (CpuReg)rr->id, rt->fields_indexes[o->p3])); - } - break; - case HVIRTUAL: - // ASM for --> if( hl_vfields(o)[f] ) r = *hl_vfields(o)[f]; else r = hl_dyn_get(o,hash(field),vt) - { - int jhasfield, jend, size; - bool need_type = !(IS_FLOAT(dst) || dst->t->kind == HI64); - preg *v = alloc_cpu_call(ctx,ra); - preg *r = alloc_reg(ctx,RCPU); - op64(ctx,MOV,r,pmem(&p,v->id,sizeof(vvirtual)+HL_WSIZE*o->p3)); - op64(ctx,TEST,r,r); - XJump_small(JNotZero,jhasfield); - size = begin_native_call(ctx, need_type ? 3 : 2); - if( need_type ) set_native_arg(ctx,pconst64(&p,(int_val)dst->t)); - set_native_arg(ctx,pconst64(&p,(int_val)ra->t->virt->fields[o->p3].hashed_name)); - set_native_arg(ctx,v); - call_native(ctx,get_dynget(dst->t),size); - store_result(ctx,dst); - XJump_small(JAlways,jend); - patch_jump(ctx,jhasfield); - copy_to(ctx, dst, pmem(&p,(CpuReg)r->id,0)); - patch_jump(ctx,jend); - scratch(dst->current); - } - break; - default: - ASSERT(ra->t->kind); - break; - } - } - break; - case OSetField: - { - switch( dst->t->kind ) { - case HOBJ: - case HSTRUCT: - { - hl_runtime_obj *rt = hl_get_obj_rt(dst->t); - preg *rr = alloc_cpu(ctx, dst, true); - if( rb->t->kind == HSTRUCT ) { - hl_type *ft = hl_obj_field_fetch(dst->t,o->p2)->t; - if( ft->kind == HPACKED ) { - hl_runtime_obj *frt = hl_get_obj_rt(ft->tparam); - preg *prb = alloc_cpu(ctx, rb, true); - preg *tmp = alloc_reg(ctx, RCPU_CALL); - int offset = 0; - while( offset < frt->size ) { - int remain = frt->size - offset; - int copy_size = remain >= HL_WSIZE ? HL_WSIZE : (remain >= 4 ? 4 : (remain >= 2 ? 2 : 1)); - copy(ctx, tmp, pmem(&p, (CpuReg)prb->id, offset), copy_size); - copy(ctx, pmem(&p, (CpuReg)rr->id, rt->fields_indexes[o->p2]+offset), tmp, copy_size); - offset += copy_size; - } - break; - } - } - copy_from(ctx, pmem(&p, (CpuReg)rr->id, rt->fields_indexes[o->p2]), rb); - } - break; - case HVIRTUAL: - // ASM for --> if( hl_vfields(o)[f] ) *hl_vfields(o)[f] = v; else hl_dyn_set(o,hash(field),vt,v) - { - int jhasfield, jend; - preg *obj = alloc_cpu_call(ctx,dst); - preg *r = alloc_reg(ctx,RCPU); - op64(ctx,MOV,r,pmem(&p,obj->id,sizeof(vvirtual)+HL_WSIZE*o->p2)); - op64(ctx,TEST,r,r); - XJump_small(JNotZero,jhasfield); -# ifdef HL_64 - switch( rb->t->kind ) { - case HF64: - case HF32: - size = begin_native_call(ctx,3); - set_native_arg_fpu(ctx, fetch(rb), rb->t->kind == HF32); - break; - case HI64: - case HGUID: - size = begin_native_call(ctx,3); - set_native_arg(ctx, fetch(rb)); - break; - default: - size = begin_native_call(ctx, 4); - set_native_arg(ctx, fetch(rb)); - set_native_arg(ctx, pconst64(&p,(int_val)rb->t)); - break; - } - set_native_arg(ctx,pconst(&p,dst->t->virt->fields[o->p2].hashed_name)); - set_native_arg(ctx,obj); -# else - switch( rb->t->kind ) { - case HF64: - case HI64: - case HGUID: - size = pad_before_call(ctx,HL_WSIZE*2 + sizeof(double)); - push_reg(ctx,rb); - break; - case HF32: - size = pad_before_call(ctx,HL_WSIZE*2 + sizeof(float)); - push_reg(ctx,rb); - break; - default: - size = pad_before_call(ctx,HL_WSIZE*4); - op64(ctx,PUSH,fetch32(ctx,rb),UNUSED); - op64(ctx,MOV,r,pconst64(&p,(int_val)rb->t)); - op64(ctx,PUSH,r,UNUSED); - break; - } - op32(ctx,MOV,r,pconst(&p,dst->t->virt->fields[o->p2].hashed_name)); - op64(ctx,PUSH,r,UNUSED); - op64(ctx,PUSH,obj,UNUSED); -# endif - call_native(ctx,get_dynset(rb->t),size); - XJump_small(JAlways,jend); - patch_jump(ctx,jhasfield); - copy_from(ctx, pmem(&p,(CpuReg)r->id,0), rb); - patch_jump(ctx,jend); - scratch(rb->current); - } - break; - default: - ASSERT(dst->t->kind); - break; - } - } - break; - case OGetThis: - { - vreg *r = R(0); - hl_runtime_obj *rt = hl_get_obj_rt(r->t); - preg *rr = alloc_cpu(ctx,r, true); - if( dst->t->kind == HSTRUCT ) { - hl_type *ft = hl_obj_field_fetch(r->t,o->p2)->t; - if( ft->kind == HPACKED ) { - preg *r = alloc_reg(ctx,RCPU); - op64(ctx,LEA,r,pmem(&p,(CpuReg)rr->id,rt->fields_indexes[o->p2])); - store(ctx,dst,r,true); - break; - } - } - copy_to(ctx,dst,pmem(&p, (CpuReg)rr->id, rt->fields_indexes[o->p2])); - } - break; - case OSetThis: - { - vreg *r = R(0); - hl_runtime_obj *rt = hl_get_obj_rt(r->t); - preg *rr = alloc_cpu(ctx, r, true); - if( ra->t->kind == HSTRUCT ) { - hl_type *ft = hl_obj_field_fetch(r->t,o->p1)->t; - if( ft->kind == HPACKED ) { - hl_runtime_obj *frt = hl_get_obj_rt(ft->tparam); - preg *pra = alloc_cpu(ctx, ra, true); - preg *tmp = alloc_reg(ctx, RCPU_CALL); - int offset = 0; - while( offset < frt->size ) { - int remain = frt->size - offset; - int copy_size = remain >= HL_WSIZE ? HL_WSIZE : (remain >= 4 ? 4 : (remain >= 2 ? 2 : 1)); - copy(ctx, tmp, pmem(&p, (CpuReg)pra->id, offset), copy_size); - copy(ctx, pmem(&p, (CpuReg)rr->id, rt->fields_indexes[o->p1]+offset), tmp, copy_size); - offset += copy_size; - } - break; - } - } - copy_from(ctx, pmem(&p, (CpuReg)rr->id, rt->fields_indexes[o->p1]), ra); - } - break; - case OCallThis: - { - int nargs = o->p3 + 1; - int *args = (int*)hl_malloc(&ctx->falloc,sizeof(int) * nargs); - int size; - preg *r = alloc_cpu(ctx, R(0), true); - preg *tmp; - tmp = alloc_reg(ctx, RCPU_CALL); - op64(ctx,MOV,tmp,pmem(&p,r->id,0)); // read type - op64(ctx,MOV,tmp,pmem(&p,tmp->id,HL_WSIZE*2)); // read proto - args[0] = 0; - for(i=1;iextra[i-1]; - size = prepare_call_args(ctx,nargs,args,ctx->vregs,0); - op_call(ctx,pmem(&p,tmp->id,o->p2*HL_WSIZE),size); - discard_regs(ctx, false); - store_result(ctx, dst); - } - break; - case OCallMethod: - switch( R(o->extra[0])->t->kind ) { - case HOBJ: { - int size; - preg *r = alloc_cpu(ctx, R(o->extra[0]), true); - preg *tmp; - tmp = alloc_reg(ctx, RCPU_CALL); - op64(ctx,MOV,tmp,pmem(&p,r->id,0)); // read type - op64(ctx,MOV,tmp,pmem(&p,tmp->id,HL_WSIZE*2)); // read proto - size = prepare_call_args(ctx,o->p3,o->extra,ctx->vregs,0); - op_call(ctx,pmem(&p,tmp->id,o->p2*HL_WSIZE),size); - discard_regs(ctx, false); - store_result(ctx, dst); - break; - } - case HVIRTUAL: - // ASM for --> if( hl_vfields(o)[f] ) dst = *hl_vfields(o)[f](o->value,args...); else dst = hl_dyn_call_obj(o->value,field,args,&ret) - { - int size; - int paramsSize; - int jhasfield, jend; - bool need_dyn; - bool obj_in_args = false; - vreg *obj = R(o->extra[0]); - preg *v = alloc_cpu_call(ctx,obj); - preg *r = alloc_reg(ctx,RCPU_CALL); - op64(ctx,MOV,r,pmem(&p,v->id,sizeof(vvirtual)+HL_WSIZE*o->p2)); - op64(ctx,TEST,r,r); - save_regs(ctx); - - if( o->p3 < 6 ) { - XJump_small(JNotZero,jhasfield); - } else { - XJump(JNotZero,jhasfield); - } - - need_dyn = !hl_is_ptr(dst->t) && dst->t->kind != HVOID; - paramsSize = (o->p3 - 1) * HL_WSIZE; - if( need_dyn ) paramsSize += sizeof(vdynamic); - if( paramsSize & 15 ) paramsSize += 16 - (paramsSize&15); - op64(ctx,SUB,PESP,pconst(&p,paramsSize)); - op64(ctx,MOV,r,PESP); - - for(i=0;ip3-1;i++) { - vreg *a = R(o->extra[i+1]); - if( hl_is_ptr(a->t) ) { - op64(ctx,MOV,pmem(&p,r->id,i*HL_WSIZE),alloc_cpu(ctx,a,true)); - if( a->current != v ) { - RUNLOCK(a->current); - } else - obj_in_args = true; - } else { - preg *r2 = alloc_reg(ctx,RCPU); - op64(ctx,LEA,r2,&a->stack); - op64(ctx,MOV,pmem(&p,r->id,i*HL_WSIZE),r2); - if( r2 != v ) RUNLOCK(r2); - } - } - - jit_buf(ctx); - - if( !need_dyn ) { - size = begin_native_call(ctx, 5); - set_native_arg(ctx, pconst(&p,0)); - } else { - preg *rtmp = alloc_reg(ctx,RCPU); - op64(ctx,LEA,rtmp,pmem(&p,Esp,paramsSize - sizeof(vdynamic))); - size = begin_native_call(ctx, 5); - set_native_arg(ctx,rtmp); - if( !IS_64 ) RUNLOCK(rtmp); - } - set_native_arg(ctx,r); - set_native_arg(ctx,pconst(&p,obj->t->virt->fields[o->p2].hashed_name)); // fid - set_native_arg(ctx,pconst64(&p,(int_val)obj->t->virt->fields[o->p2].t)); // ftype - set_native_arg(ctx,pmem(&p,v->id,HL_WSIZE)); // o->value - call_native(ctx,hl_dyn_call_obj,size + paramsSize); - if( need_dyn ) { - preg *r = IS_FLOAT(dst) ? REG_AT(XMM(0)) : PEAX; - copy(ctx,r,pmem(&p,Esp,HDYN_VALUE - (int)sizeof(vdynamic)),dst->size); - store(ctx, dst, r, false); - } else - store(ctx, dst, PEAX, false); - - XJump_small(JAlways,jend); - patch_jump(ctx,jhasfield); - restore_regs(ctx); - - if( !obj_in_args ) { - // o = o->value hack - if( v->holds ) v->holds->current = NULL; - obj->current = v; - v->holds = obj; - op64(ctx,MOV,v,pmem(&p,v->id,HL_WSIZE)); - size = prepare_call_args(ctx,o->p3,o->extra,ctx->vregs,0); - } else { - // keep o->value in R(f->nregs) - int regids[64]; - preg *pc = alloc_reg(ctx,RCPU_CALL); - vreg *sc = R(f->nregs); // scratch register that we temporary rebind - if( o->p3 >= 63 ) jit_error("assert"); - memcpy(regids, o->extra, o->p3 * sizeof(int)); - regids[0] = f->nregs; - sc->size = HL_WSIZE; - sc->t = &hlt_dyn; - op64(ctx, MOV, pc, pmem(&p,v->id,HL_WSIZE)); - scratch(pc); - sc->current = pc; - pc->holds = sc; - size = prepare_call_args(ctx,o->p3,regids,ctx->vregs,0); - } - - op_call(ctx,r,size); - discard_regs(ctx, false); - store_result(ctx, dst); - patch_jump(ctx,jend); - } - break; - default: - ASSERT(0); - break; - } - break; - case ORethrow: - { - int size = prepare_call_args(ctx,1,&o->p1,ctx->vregs,0); - call_native(ctx,hl_rethrow,size); - } - break; - case OThrow: - { - int size = prepare_call_args(ctx,1,&o->p1,ctx->vregs,0); - call_native(ctx,hl_throw,size); - } - break; - case OLabel: - // NOP for now - discard_regs(ctx,false); - break; - case OGetI8: - case OGetI16: - { - preg *base = alloc_cpu(ctx, ra, true); - preg *offset = alloc_cpu64(ctx, rb, true); - preg *r = alloc_reg(ctx,o->op == OGetI8 ? RCPU_8BITS : RCPU); - op64(ctx,XOR,r,r); - op32(ctx, o->op == OGetI8 ? MOV8 : MOV16,r,pmem2(&p,base->id,offset->id,1,0)); - store(ctx, dst, r, true); - } - break; - case OGetMem: - { - #ifndef HL_64 - if (dst->t->kind == HI64) { - error_i64(); - } - #endif - preg *base = alloc_cpu(ctx, ra, true); - preg *offset = alloc_cpu64(ctx, rb, true); - store(ctx, dst, pmem2(&p,base->id,offset->id,1,0), false); - } - break; - case OSetI8: - { - preg *base = alloc_cpu(ctx, dst, true); - preg *offset = alloc_cpu64(ctx, ra, true); - preg *value = alloc_cpu8(ctx, rb, true); - op32(ctx,MOV8,pmem2(&p,base->id,offset->id,1,0),value); - } - break; - case OSetI16: - { - preg *base = alloc_cpu(ctx, dst, true); - preg *offset = alloc_cpu64(ctx, ra, true); - preg *value = alloc_cpu(ctx, rb, true); - op32(ctx,MOV16,pmem2(&p,base->id,offset->id,1,0),value); - } - break; - case OSetMem: - { - preg *base = alloc_cpu(ctx, dst, true); - preg *offset = alloc_cpu64(ctx, ra, true); - preg *value; - switch( rb->t->kind ) { - case HI32: - value = alloc_cpu(ctx, rb, true); - op32(ctx,MOV,pmem2(&p,base->id,offset->id,1,0),value); - break; - case HF32: - value = alloc_fpu(ctx, rb, true); - op32(ctx,MOVSS,pmem2(&p,base->id,offset->id,1,0),value); - break; - case HF64: - value = alloc_fpu(ctx, rb, true); - op32(ctx,MOVSD,pmem2(&p,base->id,offset->id,1,0),value); - break; - case HI64: - case HGUID: - value = alloc_cpu(ctx, rb, true); - op64(ctx,MOV,pmem2(&p,base->id,offset->id,1,0),value); - break; - default: - ASSERT(rb->t->kind); - break; - } - } - break; - case OType: - { - op64(ctx,MOV,alloc_cpu(ctx, dst, false),pconst64(&p,(int_val)(m->code->types + o->p2))); - store(ctx,dst,dst->current,false); - } - break; - case OGetType: - { - int jnext, jend; - preg *r = alloc_cpu(ctx, ra, true); - preg *tmp = alloc_reg(ctx, RCPU); - op64(ctx,TEST,r,r); - XJump_small(JNotZero,jnext); - op64(ctx,MOV, tmp, pconst64(&p,(int_val)&hlt_void)); - XJump_small(JAlways,jend); - patch_jump(ctx,jnext); - op64(ctx, MOV, tmp, pmem(&p,r->id,0)); - patch_jump(ctx,jend); - store(ctx,dst,tmp,true); - } - break; - case OGetArray: - { - preg *rdst = IS_FLOAT(dst) ? alloc_fpu(ctx,dst,false) : alloc_cpu(ctx,dst,false); - if( ra->t->kind == HABSTRACT ) { - int osize; - bool isRead = dst->t->kind != HOBJ && dst->t->kind != HSTRUCT; - if( isRead ) - osize = sizeof(void*); - else { - hl_runtime_obj *rt = hl_get_obj_rt(dst->t); - osize = rt->size; - } - preg *idx = alloc_cpu64(ctx, rb, true); - op64(ctx, IMUL, idx, pconst(&p,osize)); - op64(ctx, isRead?MOV:LEA, rdst, pmem2(&p,alloc_cpu(ctx,ra, true)->id,idx->id,1,0)); - store(ctx,dst,dst->current,false); - scratch(idx); - } else { - copy(ctx, rdst, pmem2(&p,alloc_cpu(ctx,ra,true)->id,alloc_cpu64(ctx,rb,true)->id,hl_type_size(dst->t),sizeof(varray)), dst->size); - store(ctx,dst,dst->current,false); - } - } - break; - case OSetArray: - { - if( dst->t->kind == HABSTRACT ) { - int osize; - bool isWrite = rb->t->kind != HOBJ && rb->t->kind != HSTRUCT; - if( isWrite ) { - osize = sizeof(void*); - } else { - hl_runtime_obj *rt = hl_get_obj_rt(rb->t); - osize = rt->size; - } - preg *pdst = alloc_cpu(ctx,dst,true); - preg *pra = alloc_cpu64(ctx,ra,true); - op64(ctx, IMUL, pra, pconst(&p,osize)); - op64(ctx, ADD, pdst, pra); - scratch(pra); - preg *prb = alloc_cpu(ctx,rb,true); - preg *tmp = alloc_reg(ctx, RCPU_CALL); - int offset = 0; - while( offset < osize ) { - int remain = osize - offset; - int copy_size = remain >= HL_WSIZE ? HL_WSIZE : (remain >= 4 ? 4 : (remain >= 2 ? 2 : 1)); - copy(ctx, tmp, pmem(&p, prb->id, offset), copy_size); - copy(ctx, pmem(&p, pdst->id, offset), tmp, copy_size); - offset += copy_size; - } - scratch(pdst); - } else { - preg *rrb = IS_FLOAT(rb) ? alloc_fpu(ctx,rb,true) : alloc_cpu(ctx,rb,true); - copy(ctx, pmem2(&p,alloc_cpu(ctx,dst,true)->id,alloc_cpu64(ctx,ra,true)->id,hl_type_size(rb->t),sizeof(varray)), rrb, rb->size); - } - } - break; - case OArraySize: - { - op32(ctx,MOV,alloc_cpu(ctx,dst,false),pmem(&p,alloc_cpu(ctx,ra,true)->id,ra->t->kind == HABSTRACT ? HL_WSIZE + 4 : HL_WSIZE*2)); - store(ctx,dst,dst->current,false); - } - break; - case ORef: - { - scratch(ra->current); - op64(ctx,MOV,alloc_cpu(ctx,dst,false),REG_AT(Ebp)); - if( ra->stackPos < 0 ) - op64(ctx,SUB,dst->current,pconst(&p,-ra->stackPos)); - else - op64(ctx,ADD,dst->current,pconst(&p,ra->stackPos)); - store(ctx,dst,dst->current,false); - } - break; - case OUnref: - copy_to(ctx,dst,pmem(&p,alloc_cpu(ctx,ra,true)->id,0)); - break; - case OSetref: - copy_from(ctx,pmem(&p,alloc_cpu(ctx,dst,true)->id,0),ra); - break; - case ORefData: - switch( ra->t->kind ) { - case HARRAY: - { - preg *r = fetch(ra); - preg *d = alloc_cpu(ctx,dst,false); - op64(ctx,MOV,d,r); - op64(ctx,ADD,d,pconst(&p,sizeof(varray))); - store(ctx,dst,dst->current,false); - } - break; - default: - ASSERT(ra->t->kind); - } - break; - case ORefOffset: - { - preg *d = alloc_cpu(ctx,rb,true); - preg *r2 = alloc_cpu(ctx,dst,false); - preg *r = fetch(ra); - int size = hl_type_size(dst->t->tparam); - op64(ctx,MOV,r2,r); - switch( size ) { - case 1: - break; - case 2: - op64(ctx,SHL,d,pconst(&p,1)); - break; - case 4: - op64(ctx,SHL,d,pconst(&p,2)); - break; - case 8: - op64(ctx,SHL,d,pconst(&p,3)); - break; - default: - op64(ctx,IMUL,d,pconst(&p,size)); - break; - } - op64(ctx,ADD,r2,d); - scratch(d); - store(ctx,dst,dst->current,false); - } - break; - case OToVirtual: - { -# ifdef HL_64 - int size = pad_before_call(ctx, 0); - op64(ctx,MOV,REG_AT(CALL_REGS[1]),fetch(ra)); - op64(ctx,MOV,REG_AT(CALL_REGS[0]),pconst64(&p,(int_val)dst->t)); -# else - int size = pad_before_call(ctx, HL_WSIZE*2); - op32(ctx,PUSH,fetch(ra),UNUSED); - op32(ctx,PUSH,pconst(&p,(int)(int_val)dst->t),UNUSED); -# endif - if( ra->t->kind == HOBJ ) hl_get_obj_rt(ra->t); // ensure it's initialized - call_native(ctx,hl_to_virtual,size); - store(ctx,dst,PEAX,true); - } - break; - case OMakeEnum: - { - hl_enum_construct *c = &dst->t->tenum->constructs[o->p2]; - int_val args[] = { (int_val)dst->t, o->p2 }; - int i; - call_native_consts(ctx, hl_alloc_enum, args, 2); - RLOCK(PEAX); - for(i=0;inparams;i++) { - preg *r = fetch(R(o->extra[i])); - copy(ctx, pmem(&p,Eax,c->offsets[i]),r, R(o->extra[i])->size); - RUNLOCK(fetch(R(o->extra[i]))); - if ((i & 15) == 0) jit_buf(ctx); - } - store(ctx, dst, PEAX, true); - } - break; - case OEnumAlloc: - { - int_val args[] = { (int_val)dst->t, o->p2 }; - call_native_consts(ctx, hl_alloc_enum, args, 2); - store(ctx, dst, PEAX, true); - } - break; - case OEnumField: - { - hl_enum_construct *c = &ra->t->tenum->constructs[o->p3]; - preg *r = alloc_cpu(ctx,ra,true); - copy_to(ctx,dst,pmem(&p,r->id,c->offsets[(int)(int_val)o->extra])); - } - break; - case OSetEnumField: - { - hl_enum_construct *c = &dst->t->tenum->constructs[0]; - preg *r = alloc_cpu(ctx,dst,true); - switch( rb->t->kind ) { - case HF64: - { - preg *d = alloc_fpu(ctx,rb,true); - copy(ctx,pmem(&p,r->id,c->offsets[o->p2]),d,8); - break; - } - default: - copy(ctx,pmem(&p,r->id,c->offsets[o->p2]),alloc_cpu(ctx,rb,true),hl_type_size(c->params[o->p2])); - break; - } - } - break; - case ONullCheck: - { - int jz; - preg *r = alloc_cpu(ctx,dst,true); - op64(ctx,TEST,r,r); - XJump_small(JNotZero,jz); - - hl_opcode *next = f->ops + opCount + 1; - bool null_field_access = false; - int hashed_name = 0; - // skip const and operation between nullcheck and access - while( (next < f->ops + f->nops - 1) && (next->op >= OInt && next->op <= ODecr) ) { - next++; - } - if( (next->op == OField && next->p2 == o->p1) || (next->op == OSetField && next->p1 == o->p1) ) { - int fid = next->op == OField ? next->p3 : next->p2; - hl_obj_field *f = NULL; - if( dst->t->kind == HOBJ || dst->t->kind == HSTRUCT ) - f = hl_obj_field_fetch(dst->t, fid); - else if( dst->t->kind == HVIRTUAL ) - f = dst->t->virt->fields + fid; - if( f == NULL ) ASSERT(dst->t->kind); - null_field_access = true; - hashed_name = f->hashed_name; - } else if( (next->op >= OCall1 && next->op <= OCallN) && next->p3 == o->p1 ) { - int fid = next->p2 < 0 ? -1 : ctx->m->functions_indexes[next->p2]; - hl_function *cf = ctx->m->code->functions + fid; - const uchar *name = fun_field_name(cf); - null_field_access = true; - hashed_name = hl_hash_gen(name, true); - } - - if( null_field_access ) { - pad_before_call(ctx, HL_WSIZE); - if( hashed_name >= 0 && hashed_name < 256 ) - op64(ctx,PUSH8,pconst(&p,hashed_name),UNUSED); - else - op32(ctx,PUSH,pconst(&p,hashed_name),UNUSED); - } else { - pad_before_call(ctx, 0); - } - - jlist *j = (jlist*)hl_malloc(&ctx->galloc,sizeof(jlist)); - j->pos = BUF_POS(); - j->target = null_field_access ? -3 : -1; - j->next = ctx->calls; - ctx->calls = j; - - op64(ctx,MOV,PEAX,pconst64(&p,RESERVE_ADDRESS)); - op_call(ctx,PEAX,-1); - patch_jump(ctx,jz); - } - break; - case OSafeCast: - make_dyn_cast(ctx, dst, ra); - break; - case ODynGet: - { - int size; -# ifdef HL_64 - if( IS_FLOAT(dst) || dst->t->kind == HI64 ) { - size = begin_native_call(ctx,2); - } else { - size = begin_native_call(ctx,3); - set_native_arg(ctx,pconst64(&p,(int_val)dst->t)); - } - set_native_arg(ctx,pconst64(&p,(int_val)hl_hash_utf8(m->code->strings[o->p3]))); - set_native_arg(ctx,fetch(ra)); -# else - preg *r; - r = alloc_reg(ctx,RCPU); - if( IS_FLOAT(dst) || dst->t->kind == HI64 ) { - size = pad_before_call(ctx,HL_WSIZE*2); - } else { - size = pad_before_call(ctx,HL_WSIZE*3); - op64(ctx,MOV,r,pconst64(&p,(int_val)dst->t)); - op64(ctx,PUSH,r,UNUSED); - } - op64(ctx,MOV,r,pconst64(&p,(int_val)hl_hash_utf8(m->code->strings[o->p3]))); - op64(ctx,PUSH,r,UNUSED); - op64(ctx,PUSH,fetch(ra),UNUSED); -# endif - call_native(ctx,get_dynget(dst->t),size); - store_result(ctx,dst); - } - break; - case ODynSet: - { - int size; -# ifdef HL_64 - switch( rb->t->kind ) { - case HF32: - case HF64: - size = begin_native_call(ctx, 3); - set_native_arg_fpu(ctx,fetch(rb),rb->t->kind == HF32); - set_native_arg(ctx,pconst64(&p,hl_hash_gen(hl_get_ustring(m->code,o->p2),true))); - set_native_arg(ctx,fetch(dst)); - call_native(ctx,get_dynset(rb->t),size); - break; - case HI64: - case HGUID: - size = begin_native_call(ctx, 3); - set_native_arg(ctx,fetch(rb)); - set_native_arg(ctx,pconst64(&p,hl_hash_gen(hl_get_ustring(m->code,o->p2),true))); - set_native_arg(ctx,fetch(dst)); - call_native(ctx,get_dynset(rb->t),size); - break; - default: - size = begin_native_call(ctx,4); - set_native_arg(ctx,fetch(rb)); - set_native_arg(ctx,pconst64(&p,(int_val)rb->t)); - set_native_arg(ctx,pconst64(&p,hl_hash_gen(hl_get_ustring(m->code,o->p2),true))); - set_native_arg(ctx,fetch(dst)); - call_native(ctx,get_dynset(rb->t),size); - break; - } -# else - switch( rb->t->kind ) { - case HF32: - size = pad_before_call(ctx, HL_WSIZE*2 + sizeof(float)); - push_reg(ctx,rb); - op32(ctx,PUSH,pconst64(&p,hl_hash_gen(hl_get_ustring(m->code,o->p2),true)),UNUSED); - op32(ctx,PUSH,fetch(dst),UNUSED); - call_native(ctx,get_dynset(rb->t),size); - break; - case HF64: - case HI64: - case HGUID: - size = pad_before_call(ctx, HL_WSIZE*2 + sizeof(double)); - push_reg(ctx,rb); - op32(ctx,PUSH,pconst64(&p,hl_hash_gen(hl_get_ustring(m->code,o->p2),true)),UNUSED); - op32(ctx,PUSH,fetch(dst),UNUSED); - call_native(ctx,get_dynset(rb->t),size); - break; - default: - size = pad_before_call(ctx, HL_WSIZE*4); - op32(ctx,PUSH,fetch32(ctx,rb),UNUSED); - op32(ctx,PUSH,pconst64(&p,(int_val)rb->t),UNUSED); - op32(ctx,PUSH,pconst64(&p,hl_hash_gen(hl_get_ustring(m->code,o->p2),true)),UNUSED); - op32(ctx,PUSH,fetch(dst),UNUSED); - call_native(ctx,get_dynset(rb->t),size); - break; - } -# endif - } - break; - case OTrap: - { - int size, jenter, jtrap; - int offset = 0; - int trap_size = (sizeof(hl_trap_ctx) + 15) & 0xFFF0; - hl_trap_ctx *t = NULL; -# ifndef HL_THREADS - if( tinf == NULL ) tinf = hl_get_thread(); // single thread -# endif - -# ifdef HL_64 - preg *trap = REG_AT(CALL_REGS[0]); -# else - preg *trap = PEAX; -# endif - RLOCK(trap); - - preg *treg = alloc_reg(ctx, RCPU); - if( !tinf ) { - call_native(ctx, hl_get_thread, 0); - op64(ctx,MOV,treg,PEAX); - offset = (int)(int_val)&tinf->trap_current; - } else { - offset = 0; - op64(ctx,MOV,treg,pconst64(&p,(int_val)&tinf->trap_current)); - } - op64(ctx,MOV,trap,pmem(&p,treg->id,offset)); - op64(ctx,SUB,PESP,pconst(&p,trap_size)); - op64(ctx,MOV,pmem(&p,Esp,(int)(int_val)&t->prev),trap); - op64(ctx,MOV,trap,PESP); - op64(ctx,MOV,pmem(&p,treg->id,offset),trap); - - /* - trap E,@catch - catch g - catch g2 - ... - @:catch - - // Before haxe 5 - This is a bit hackshish : we want to detect the type of exception filtered by the catch so we check the following - sequence of HL opcodes: - - trap E,@catch - ... - @catch: - global R, _ - call _, ???(R,E) - - ??? is expected to be hl.BaseType.check - */ - hl_opcode *cat = f->ops + opCount + 1; - hl_opcode *next = f->ops + opCount + 1 + o->p2; - hl_opcode *next2 = f->ops + opCount + 2 + o->p2; - if( cat->op == OCatch || (next->op == OGetGlobal && next2->op == OCall2 && next2->p3 == next->p1 && dst->stack.id == (int)(int_val)next2->extra) ) { - int gindex = cat->op == OCatch ? cat->p1 : next->p2; - hl_type *gt = m->code->globals[gindex]; - while( gt->kind == HOBJ && gt->obj->super ) gt = gt->obj->super; - if( gt->kind == HOBJ && gt->obj->nfields && gt->obj->fields[0].t->kind == HTYPE ) { - void *addr = m->globals_data + m->globals_indexes[gindex]; -# ifdef HL_64 - op64(ctx,MOV,treg,pconst64(&p,(int_val)addr)); - op64(ctx,MOV,treg,pmem(&p,treg->id,0)); -# else - op64(ctx,MOV,treg,paddr(&p,addr)); -# endif - } else - op64(ctx,MOV,treg,pconst(&p,0)); - } else { - op64(ctx,MOV,treg,pconst(&p,0)); - } - op64(ctx,MOV,pmem(&p,Esp,(int)(int_val)&t->tcheck),treg); - - // On Win64 setjmp actually takes two arguments - // the jump buffer and the frame pointer (or the stack pointer if there is no FP) -#if defined(HL_WIN) && defined(HL_64) - size = begin_native_call(ctx, 2); - set_native_arg(ctx, REG_AT(Ebp)); -#else - size = begin_native_call(ctx, 1); -#endif - set_native_arg(ctx,trap); -#ifdef HL_MINGW - call_native(ctx,_setjmp,size); -#else - call_native(ctx,setjmp,size); -#endif - op64(ctx,TEST,PEAX,PEAX); - XJump_small(JZero,jenter); - op64(ctx,ADD,PESP,pconst(&p,trap_size)); - if( !tinf ) { - call_native(ctx, hl_get_thread, 0); - op64(ctx,MOV,PEAX,pmem(&p, Eax, (int)(int_val)&tinf->exc_value)); - } else { - op64(ctx,MOV,PEAX,pconst64(&p,(int_val)&tinf->exc_value)); - op64(ctx,MOV,PEAX,pmem(&p, Eax, 0)); - } - store(ctx,dst,PEAX,false); - - jtrap = do_jump(ctx,OJAlways,false); - register_jump(ctx,jtrap,(opCount + 1) + o->p2); - patch_jump(ctx,jenter); - } - break; - case OEndTrap: - { - int trap_size = (sizeof(hl_trap_ctx) + 15) & 0xFFF0; - hl_trap_ctx *tmp = NULL; - preg *addr,*r; - int offset; - if (!tinf) { - call_native(ctx, hl_get_thread, 0); - addr = PEAX; - RLOCK(addr); - offset = (int)(int_val)&tinf->trap_current; - } else { - offset = 0; - addr = alloc_reg(ctx, RCPU); - op64(ctx, MOV, addr, pconst64(&p, (int_val)&tinf->trap_current)); - } - r = alloc_reg(ctx, RCPU); - op64(ctx, MOV, r, pmem(&p,addr->id,offset)); - op64(ctx, MOV, r, pmem(&p,r->id,(int)(int_val)&tmp->prev)); - op64(ctx, MOV, pmem(&p,addr->id, offset), r); -# ifdef HL_WIN - // erase eip (prevent false positive) - { - _JUMP_BUFFER *b = NULL; -# ifdef HL_64 - op64(ctx,MOV,pmem(&p,Esp,(int)(int_val)&(b->Rip)),PEAX); -# else - op64(ctx,MOV,pmem(&p,Esp,(int)&(b->Eip)),PEAX); -# endif - } -# endif - op64(ctx,ADD,PESP,pconst(&p,trap_size)); - } - break; - case OEnumIndex: - { - preg *r = alloc_reg(ctx,RCPU); - op64(ctx,MOV,r,pmem(&p,alloc_cpu(ctx,ra,true)->id,HL_WSIZE)); - store(ctx,dst,r,true); - break; - } - break; - case OSwitch: - { - int jdefault; - int i; - preg *r = alloc_cpu(ctx, dst, true); - preg *r2 = alloc_reg(ctx, RCPU); - op32(ctx, CMP, r, pconst(&p,o->p2)); - XJump(JUGte,jdefault); - // r2 = r * 5 + eip -# ifdef HL_64 - op64(ctx, XOR, r2, r2); -# endif - op32(ctx, MOV, r2, r); - op32(ctx, SHL, r2, pconst(&p,2)); - op32(ctx, ADD, r2, r); -# ifdef HL_64 - preg *tmp = alloc_reg(ctx, RCPU); - op64(ctx, MOV, tmp, pconst64(&p,RESERVE_ADDRESS)); -# else - op64(ctx, ADD, r2, pconst64(&p,RESERVE_ADDRESS)); -# endif - { - jlist *s = (jlist*)hl_malloc(&ctx->galloc, sizeof(jlist)); - s->pos = BUF_POS() - sizeof(void*); - s->next = ctx->switchs; - ctx->switchs = s; - } -# ifdef HL_64 - op64(ctx, ADD, r2, tmp); -# endif - op64(ctx, JMP, r2, UNUSED); - for(i=0;ip2;i++) { - int j = do_jump(ctx,OJAlways,false); - register_jump(ctx,j,(opCount + 1) + o->extra[i]); - if( (i & 15) == 0 ) jit_buf(ctx); - } - patch_jump(ctx, jdefault); - } - break; - case OGetTID: - op32(ctx, MOV, alloc_cpu(ctx,dst,false), pmem(&p,alloc_cpu(ctx,ra,true)->id,0)); - store(ctx,dst,dst->current,false); - break; - case OAssert: - { - pad_before_call(ctx, 0); - jlist *j = (jlist*)hl_malloc(&ctx->galloc,sizeof(jlist)); - j->pos = BUF_POS(); - j->target = -2; - j->next = ctx->calls; - ctx->calls = j; - - op64(ctx,MOV,PEAX,pconst64(&p,RESERVE_ADDRESS)); - op_call(ctx,PEAX,-1); - } - break; - case ONop: - break; - case OPrefetch: - { - preg *r = alloc_cpu(ctx, dst, true); - if( o->p2 > 0 ) { - switch( dst->t->kind ) { - case HOBJ: - case HSTRUCT: - { - hl_runtime_obj *rt = hl_get_obj_rt(dst->t); - preg *r2 = alloc_reg(ctx, RCPU); - op64(ctx, LEA, r2, pmem(&p, r->id, rt->fields_indexes[o->p2-1])); - r = r2; - } - break; - default: - ASSERT(dst->t->kind); - break; - } - } - switch( o->p3 ) { - case 0: - op64(ctx, PREFETCHT0, pmem(&p,r->id,0), UNUSED); - break; - case 1: - op64(ctx, PREFETCHT1, pmem(&p,r->id,0), UNUSED); - break; - case 2: - op64(ctx, PREFETCHT2, pmem(&p,r->id,0), UNUSED); - break; - case 3: - op64(ctx, PREFETCHNTA, pmem(&p,r->id,0), UNUSED); - break; - case 4: - op64(ctx, PREFETCHW, pmem(&p,r->id,0), UNUSED); - break; - default: - ASSERT(o->p3); - break; - } - } - break; - case OAsm: - { - switch( o->p1 ) { - case 0: // byte output - B(o->p2); - break; - case 1: // scratch cpu reg - scratch(REG_AT(o->p2)); - break; - case 2: // read vm reg - rb--; - copy(ctx, REG_AT(o->p2), &rb->stack, rb->size); - scratch(REG_AT(o->p2)); - break; - case 3: // write vm reg - rb--; - copy(ctx, &rb->stack, REG_AT(o->p2), rb->size); - scratch(rb->current); - break; - case 4: - if( ctx->totalRegsSize != 0 ) - hl_fatal("Asm naked function should not have local variables"); - if( opCount != 0 ) - hl_fatal("Asm naked function should be on first opcode"); - ctx->buf.b -= BUF_POS() - ctx->functionPos; // reset to our function start - break; - default: - ASSERT(o->p1); - break; - } - } - break; - case OCatch: - // Only used by OTrap typing - break; - default: - jit_error(hl_op_name(o->op)); - break; - } - // we are landing at this position, assume we have lost our registers - if( ctx->opsPos[opCount+1] == -1 ) - discard_regs(ctx,true); - ctx->opsPos[opCount+1] = BUF_POS(); - - // write debug infos - size = BUF_POS() - codePos; - if( debug16 && size > 0xFF00 ) { - debug32 = malloc(sizeof(int) * (f->nops + 1)); - for(i=0;icurrentPos;i++) - debug32[i] = debug16[i]; - free(debug16); - debug16 = NULL; - } - if( debug16 ) debug16[ctx->currentPos] = (unsigned short)size; else if( debug32 ) debug32[ctx->currentPos] = size; - - } - // patch jumps - { - jlist *j = ctx->jumps; - while( j ) { - *(int*)(ctx->startBuf + j->pos) = ctx->opsPos[j->target] - (j->pos + 4); - j = j->next; - } - ctx->jumps = NULL; - } - int codeEndPos = BUF_POS(); - // add nops padding - jit_nops(ctx); - // clear regs - for(i=0;iholds = NULL; - r->lock = 0; - } - // save debug infos - if( ctx->debug ) { - int fid = (int)(f - m->code->functions); - ctx->debug[fid].start = codePos; - ctx->debug[fid].offsets = debug32 ? (void*)debug32 : (void*)debug16; - ctx->debug[fid].large = debug32 != NULL; - } - // unwind info -#ifdef WIN64_UNWIND_TABLES - int uw_idx = ctx->nunwind++; - ctx->unwind_table[uw_idx].BeginAddress = codePos; - ctx->unwind_table[uw_idx].EndAddress = codeEndPos; - ctx->unwind_table[uw_idx].UnwindData = ctx->unwind_offset; -#endif - // reset tmp allocator hl_free(&ctx->falloc); - return codePos; -} - -static void *get_wrapper( hl_type *t ) { - return call_jit_hl2c; -} - -void hl_jit_patch_method( void *old_fun, void **new_fun_table ) { - // mov eax, addr - // jmp [eax] - unsigned char *b = (unsigned char*)old_fun; - unsigned long long addr = (unsigned long long)(int_val)new_fun_table; -# ifdef HL_64 - *b++ = 0x48; - *b++ = 0xB8; - *b++ = (unsigned char)addr; - *b++ = (unsigned char)(addr>>8); - *b++ = (unsigned char)(addr>>16); - *b++ = (unsigned char)(addr>>24); - *b++ = (unsigned char)(addr>>32); - *b++ = (unsigned char)(addr>>40); - *b++ = (unsigned char)(addr>>48); - *b++ = (unsigned char)(addr>>56); -# else - *b++ = 0xB8; - *b++ = (unsigned char)addr; - *b++ = (unsigned char)(addr>>8); - *b++ = (unsigned char)(addr>>16); - *b++ = (unsigned char)(addr>>24); -# endif - *b++ = 0xFF; - *b++ = 0x20; -} - -static void missing_closure() { - hl_error("Missing static closure"); + ctx->mod = m; + ctx->fun = f; + current_ctx = ctx; + hl_emit_function(ctx); + current_ctx = NULL; + return 0; } void *hl_jit_code( jit_ctx *ctx, hl_module *m, int *codesize, hl_debug_infos **debug, hl_module *previous ) { - jlist *c; - int size = BUF_POS(); - unsigned char *code; - if( size & 4095 ) size += 4096 - (size&4095); - code = (unsigned char*)hl_alloc_executable_memory(size); - if( code == NULL ) return NULL; - memcpy(code,ctx->startBuf,BUF_POS()); - *codesize = size; - *debug = ctx->debug; - if( !call_jit_c2hl ) { - call_jit_c2hl = code + ctx->c2hl; - call_jit_hl2c = code + ctx->hl2c; - hl_setup.get_wrapper = get_wrapper; - hl_setup.static_call = callback_c2hl; - hl_setup.static_call_ref = true; - } -#ifdef WIN64_UNWIND_TABLES - m->unwind_table = ctx->unwind_table; - RtlAddFunctionTable(m->unwind_table, ctx->nunwind, (DWORD64)code); -#endif - if( !ctx->static_function_offset ) { - int i; - ctx->static_function_offset = true; - for(i=0;i<(int)(sizeof(ctx->static_functions)/sizeof(void*));i++) - ctx->static_functions[i] = (void*)(code + (int)(int_val)ctx->static_functions[i]); - } - // patch calls - c = ctx->calls; - while( c ) { - void *fabs; - if( c->target < 0 ) - fabs = ctx->static_functions[-c->target-1]; - else { - fabs = m->functions_ptrs[c->target]; - if( fabs == NULL ) { - // read absolute address from previous module - int old_idx = m->hash->functions_hashes[m->functions_indexes[c->target]]; - if( old_idx < 0 ) - return NULL; - fabs = previous->functions_ptrs[(previous->code->functions + old_idx)->findex]; - } else { - // relative - fabs = (unsigned char*)code + (int)(int_val)fabs; - } - } - if( (code[c->pos]&~3) == (IS_64?0x48:0xB8) || code[c->pos] == 0x68 ) // MOV : absolute | PUSH - *(void**)(code + c->pos + (IS_64?2:1)) = fabs; - else { - int_val delta = (int_val)fabs - (int_val)code - (c->pos + 5); - int rpos = (int)delta; - if( (int_val)rpos != delta ) { - printf("Target code too far too rebase\n"); - return NULL; - } - *(int*)(code + c->pos + 1) = rpos; - } - c = c->next; - } - // patch switchs - c = ctx->switchs; - while( c ) { - *(void**)(code + c->pos) = code + c->pos + (IS_64 ? 14 : 6); - c = c->next; - } - // patch closures - { - vclosure *c = ctx->closure_list; - while( c ) { - vclosure *next; - int fidx = (int)(int_val)c->fun; - void *fabs = m->functions_ptrs[fidx]; - if( fabs == NULL ) { - // read absolute address from previous module - int old_idx = m->hash->functions_hashes[m->functions_indexes[fidx]]; - if( old_idx < 0 ) - fabs = missing_closure; - else - fabs = previous->functions_ptrs[(previous->code->functions + old_idx)->findex]; - } else { - // relative - fabs = (unsigned char*)code + (int)(int_val)fabs; - } - c->fun = fabs; - next = (vclosure*)c->value; - c->value = NULL; - c = next; - } - } - return code; + printf("TODO:emit_code\n"); + exit(0); + return NULL; } +void hl_jit_patch_method( void*fun, void**newt ) { + jit_assert(); +} diff --git a/src/jit.h b/src/jit.h new file mode 100644 index 000000000..820e79bd5 --- /dev/null +++ b/src/jit.h @@ -0,0 +1,183 @@ +/* + * Copyright (C)2005-2016 Haxe Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ +#ifndef JIT_H +#define JIT_H + +#include + +typedef enum { + LOAD_ADDR, + LOAD_IMM, + LOAD_ARG, + STORE, + LEA, + TEST, + CMP, + JCOND, + JUMP, + JUMP_TABLE, + BINOP, + UNOP, + CONV, + CONV_UNSIGNED, + RET, + CALL_PTR, + CALL_REG, + CALL_FUN, + MOV, + ALLOC_STACK, + FREE_STACK, + NATIVE_REG, + PREFETCH, + DEBUG_BREAK, +} emit_op; + +typedef enum { + REG_RBP, +} native_reg; + +typedef enum { + M_NONE, + M_UI8, + M_UI16, + M_I32, + M_I64, + M_F32, + M_F64, + M_PTR, + M_VOID, + M_NORET, +} emit_mode; + +typedef struct { + int index; +} ereg; + +typedef struct { + union { + struct { + unsigned char op; + unsigned char mode; + unsigned char nargs; + unsigned char _unused; + }; + int header; + }; + int size_offs; + union { + struct { + ereg a; + ereg b; + }; + uint64 value; + }; +} einstr; + +#define VAL_NULL 0x80000000 +#define IS_NULL(e) ((e).index == VAL_NULL) + +typedef struct { + int *data; + int max; + int cur; +} int_alloc; + +typedef struct _ephi ephi; + +struct _ephi { + ereg value; + int nvalues; + ereg *values; +}; + +typedef struct _eblock { + int id; + int start_pos; + int end_pos; + int next_count; + int pred_count; + int phi_count; + int *nexts; + int *preds; + ephi *phis; +} eblock; + +typedef struct _emit_ctx emit_ctx; + +typedef struct _jit_ctx ji_ctx; + +struct _jit_ctx { + hl_module *mod; + hl_function *fun; + hl_alloc falloc; + emit_ctx *emit; + // emit output + int instr_count; + int block_count; + int value_count; + einstr *instrs; + eblock *blocks; + int *values_writes; + int *emit_pos_map; +}; + +jit_ctx *hl_jit_alloc(); +void hl_jit_free( jit_ctx *ctx, h_bool can_reset ); +void hl_jit_reset( jit_ctx *ctx, hl_module *m ); +void hl_jit_init( jit_ctx *ctx, hl_module *m ); +int hl_jit_function( jit_ctx *ctx, hl_module *m, hl_function *f ); + +void hl_jit_null_field_access(); +void hl_jit_null_access(); +void hl_jit_assert(); + +void int_alloc_reset( int_alloc *a ); +void int_alloc_free( int_alloc *a ); +int *int_alloc_get( int_alloc *a, int count ); +void int_alloc_store( int_alloc *a, int v ); + +void hl_emit_dump( jit_ctx *ctx ); +const char *hl_emit_regstr( ereg v ); +ereg *hl_emit_get_args( emit_ctx *ctx, einstr *e ); + +#define val_str(v) hl_emit_regstr(v) + + +#ifdef HL_DEBUG +# define JIT_DEBUG +#endif + +#define jit_error(msg) { hl_jit_error(msg,__func__,__LINE__); hl_debug_break(); exit(-1); } +#define jit_assert() jit_error("") + +#ifdef JIT_DEBUG +# define jit_debug(...) printf(__VA_ARGS__) +#else +# define jit_debug(...) +#endif + +void hl_jit_error( const char *msg, const char *func, int line ); + +void *hl_jit_code( jit_ctx *ctx, hl_module *m, int *codesize, hl_debug_infos **debug, hl_module *previous ); +void hl_jit_patch_method( void *old_fun, void **new_fun_table ); + +#endif diff --git a/src/jit_dump.c b/src/jit_dump.c new file mode 100644 index 000000000..f9c8ce348 --- /dev/null +++ b/src/jit_dump.c @@ -0,0 +1,415 @@ +/* + * Copyright (C)2015-2016 Haxe Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ +#include + +static const char *op_names[] = { + "load-addr", + "load-imm", + "load-arg", + "store", + "lea", + "test", + "cmp", + "jcond", + "jump", + "jump-table", + "binop", + "unop", + "conv", + "conv-unsigned", + "ret", + "call", + "call", + "call", + "mov", + "alloc-stack", + "free-stack", + "native-reg", + "prefetch", + "debug-break", +}; + +const char *hl_emit_regstr( ereg v ) { + static char fmts[4][10]; + static int flip = 0; + // allow up to four concurrent val_str + char *fmt = fmts[flip++&3]; + if( IS_NULL(v) ) + sprintf(fmt,"NULL???"); + else if( v.index < 0 ) + sprintf(fmt,"P%d",-v.index); + else + sprintf(fmt,"V%d",v.index); + return fmt; +} + +static void hl_dump_arg( hl_function *fun, int fmt, int val, char sep, int pos ) { + if( fmt == 0 ) return; + printf("%c", sep); + switch( fmt ) { + case 1: + case 2: + printf("R%d", val); + if( val < 0 || val >= fun->nregs ) printf("?"); + break; + case 3: + printf("%d", val); + break; + case 4: + printf("[%d]", val); + break; + case 5: + case 6: + printf("@%X", val + pos + 1); + break; + default: + printf("?#%d", fmt); + break; + } +} + +#define OP(_,_a,_b,_c) ((_a) | (((_b)&0xFF) << 8) | (((_c)&0xFF) << 16)), +#define OP_BEGIN static int hl_op_fmt[] = { +#define OP_END }; +#undef R +#include "opcodes.h" + +static void hl_dump_op( hl_function *fun, hl_opcode *op ) { + printf("%s", hl_op_name(op->op) + 1); + int fmt = hl_op_fmt[op->op]; + int pos = (int)(op - fun->ops); + hl_dump_arg(fun, fmt & 0xFF, op->p1, ' ', pos); + if( ((fmt >> 8) & 0xFF) == 5 ) { + int count = (fmt >> 16) & 0xFF; + printf(" ["); + if( count == 4 ) { + printf("%d", op->p2); + printf(",%d", op->p3); + printf(",%d", (int)(int_val)op->extra); + } else if( op->op == OSwitch ) { + for(int i=0;ip2;i++) { + if( i != 0 ) printf(","); + printf("@%X", (op->extra[i] + pos + 1)); + } + printf(",def=@%X", op->p3 + pos + 1); + } else { + if( count == 0xFF ) + count = op->p3; + else { + printf("%d,%d,",op->p2,op->p3); + count -= 3; + } + for(int i=0;iextra[i]); + } + } + printf("]"); + } else { + hl_dump_arg(fun, (fmt >> 8) & 0xFF, op->p2,',', pos); + hl_dump_arg(fun, fmt >> 16, op->p3,',', pos); + } +} + +static const char *emit_mode_str( emit_mode mode ) { + switch( mode ) { + case M_UI8: return "-ui8"; + case M_UI16: return "-ui16"; + case M_I32: return "-i32"; + case M_I64: return "-i64"; + case M_F32: return "-f32"; + case M_F64: return "-f64"; + case M_PTR: return ""; + case M_VOID: return "-void"; + case M_NORET: return "-noret"; + default: + static char buf[50]; + sprintf(buf,"?%d",mode); + return buf; + } +} + +static void dump_value( jit_ctx *ctx, uint64 value, emit_mode mode ) { + union { + uint64 v; + double d; + float f; + } tmp; + hl_module *mod = ctx->mod; + hl_code *code = ctx->mod->code; + switch( mode ) { + case M_NONE: + printf("?0x%llX",value); + break; + case M_UI8: + case M_UI16: + case M_I32: + if( (int)value >= -0x10000 && (int)value <= 0x10000 ) + printf("%d",(int)value); + else + printf("0x%X",(int)value); + break; + case M_F32: + tmp.v = value; + printf("%f",tmp.f); + break; + case M_F64: + tmp.v = value; + printf("%g",tmp.d); + break; + default: + if( value == 0 ) + printf("NULL"); + else if( mode == M_PTR && value >= (uint64)code->types && value < (uint64)(code->types + code->ntypes) ) + uprintf(USTR("<%s>"),hl_type_str((hl_type*)value)); + else if( mode == M_PTR && value >= (uint64)mod->globals_data && value < (uint64)(mod->globals_data + mod->globals_size) ) + printf("",(int)(value - (uint64)mod->globals_data)); + else if( value == (uint64)&hlt_void ) + printf(""); + else + printf("0x%llX",value); + break; + } +} + +static void hl_dump_fun_name( hl_function *f ) { + if( f->obj ) + uprintf(USTR("%s.%s"),f->obj->name,f->field.name); + else if( f->field.ref ) + uprintf(USTR("%s.~%s.%d"),f->field.ref->obj->name, f->field.ref->field.name, f->ref); + printf("[%X]", f->findex); +} + +static void hl_dump_args( jit_ctx *ctx, einstr *e ) { + ereg *v = hl_emit_get_args(ctx->emit, e); + printf("("); + for(int i=0;inargs;i++) { + if( i != 0 ) printf(","); + printf("%s", val_str(v[i])); + } + printf(")"); +} + +typedef struct { const char *name; void *ptr; } named_ptr; +static void hl_dump_ptr_name( jit_ctx *ctx, void *ptr ) { +# define N(v) ptr_names[i].name = #v; ptr_names[i].ptr = v; i++ +# define N2(n,v) ptr_names[i].name = n; ptr_names[i].ptr = v; i++ +# define DYN(p) N2("dyn_get" #p, hl_dyn_get##p); N2("dyn_set" #p, hl_dyn_set##p); N2("dyn_cast" #p, hl_dyn_cast##p) + static named_ptr ptr_names[256] = { NULL }; + int i = 0; + if( !ptr_names[0].ptr ) { + N(hl_alloc_dynbool); + N(hl_alloc_dynamic); + N(hl_alloc_obj); + N(hl_alloc_dynobj); + N(hl_alloc_virtual); + N(hl_alloc_closure_ptr); + N(hl_dyn_call); + N(hl_dyn_call_obj); + N(hl_throw); + N(hl_rethrow); + N(hl_to_virtual); + N(hl_alloc_enum); + DYN(f); + DYN(d); + DYN(i64); + DYN(i); + DYN(p); + N2("null_field",hl_jit_null_field_access); + N2("null_access",hl_jit_null_access); + N(hl_get_thread); + N(setjmp); + N(_setjmp); + N2("assert",hl_jit_assert); + i = 0; + } +# undef N +# undef N2 + while( true ) { + named_ptr p = ptr_names[i++]; + if( !p.ptr ) break; + if( p.ptr == ptr ) { + printf("<%s>",p.name); + return; + } + } + for(i=0;imod->code->nnatives;i++) { + hl_native *n = ctx->mod->code->natives + i; + if( ctx->mod->functions_ptrs[n->findex] == ptr ) { + printf("<%s.%s>",n->lib[0] == '?' ? n->lib + 1 : n->lib,n->name); + return; + } + } + printf("",(uint64)ptr); +} + +void hl_emit_flush( jit_ctx *ctx ); + +void hl_emit_dump( jit_ctx *ctx ) { + int i; + int cur_op = 0; + hl_function *f = ctx->fun; + int nargs = f->type->fun->nargs; + hl_emit_flush(ctx); // if it not was not before (in case of dump during emit) + printf("function "); + hl_dump_fun_name(f); + printf("("); + for(i=0;i 0 ) printf(","); + uprintf(USTR("R%d"), i); + } + printf(")\n"); + for(i=0;inregs;i++) + uprintf(USTR("\tR%d : %s\n"),i, hl_type_str(f->regs[i])); + // check blocks intervals + int cur = 0; + for(i=0;iblock_count;i++) { + eblock *b = ctx->blocks + i; + if( b->id != i ) printf(" ??? BLOCK @%d ID is %d\n",i,b->id); + if( b->start_pos != cur ) printf(" ??? BLOCK %d START AT %X != %X\n", i, b->start_pos, cur); + if( b->end_pos < b->start_pos ) printf(" ??? BLOCK %d RANGE [%X,%X]\n", i, b->start_pos, b->end_pos); + cur = b->end_pos + 1; + } + if( cur != ctx->instr_count ) + printf(" ??? MISSING BLOCK FOR RANGE %X-%X\n", cur, ctx->instr_count); + // print instrs + int vpos = 0; + cur = 0; + for(i=0;iinstr_count;i++) { + while( cur < ctx->block_count && ctx->blocks[cur].start_pos == i ) { + eblock *b = &ctx->blocks[cur]; + printf("--- BLOCK #%d ---\n", cur); + for(int k=0;kphi_count;k++) { + ephi *p = b->phis + k; + printf("\t\t@%X %s = phi(",i,val_str(p->value)); + for(int n=0;nnvalues;n++) { + if( n > 0 ) printf(","); + printf("%s",val_str(p->values[n])); + } + printf(")"); + if( p->nvalues <= 1 ) + printf(" ???"); + printf("\n"); + } + cur++; + } + while( ctx->emit_pos_map[cur_op] == i ) { + printf("@%X ", cur_op); + hl_dump_op(ctx->fun, f->ops + cur_op); + printf("\n"); + cur_op++; + } + einstr *e = ctx->instrs + i; + printf("\t\t@%X ", i); + if( vpos < ctx->value_count && ctx->values_writes[vpos] == i ) + printf("V%d = ", vpos++); + printf("%s", op_names[e->op]); + bool show_size = true; + switch( e->op ) { + case TEST: + case CMP: + printf("-%s", hl_op_name(e->size_offs)+2); + show_size = false; + break; + case BINOP: + case UNOP: + printf("-%s", hl_op_name(e->size_offs)+1); + show_size = false; + break; + default: + break; + } + if( e->mode ) + printf("%s", emit_mode_str(e->mode)); + switch( e->op ) { + case CALL_FUN: + printf(" "); + { + int fid = ctx->mod->functions_indexes[e->a.index]; + hl_code *code = ctx->mod->code; + if( fid < code->nfunctions ) { + hl_dump_fun_name(&code->functions[fid]); + } else { + printf("???"); + } + } + hl_dump_args(ctx,e); + break; + case CALL_REG: + printf(" %s", val_str(e->a)); + hl_dump_args(ctx,e); + break; + case CALL_PTR: + printf(" "); + hl_dump_ptr_name(ctx, (void*)e->value); + hl_dump_args(ctx,e); + break; + case JUMP: + case JCOND: + printf(" @%X", i + 1 + e->size_offs); + break; + case LOAD_IMM: + printf(" "); + dump_value(ctx, e->value, e->mode); + break; + case LOAD_ADDR: + if( (e->size_offs>>8) ) + printf(" %s[%Xh]", val_str(e->a), e->size_offs); + else + printf(" %s[%d]", val_str(e->a), e->size_offs); + break; + case STORE: + { + int offs = e->size_offs; + if( offs == 0 ) + printf(" [%s]", val_str(e->a)); + else + printf(" %s[%d]", val_str(e->a), offs); + printf(" = %s", val_str(e->b)); + //if( e->mode == 0 || e->mode != ctx->instrs[ctx->values_writes[e->b.index]].mode ) + // printf(" ???"); + } + break; + default: + if( !IS_NULL(e->a) ) { + printf(" %s", val_str(e->a)); + if( !IS_NULL(e->b) ) printf(", %s", val_str(e->b)); + if( e->a.index >= vpos || e->b.index >= vpos ) printf(" ???"); + } + if( show_size && e->size_offs != 0 ) + printf(" %d", e->size_offs); + break; + } + printf("\n"); + } + // invalid ? + while( vpos < ctx->value_count ) + printf(" ??? UNWRITTEN VALUE V%d @%X\n", vpos, ctx->values_writes[vpos++]); + // interrupted + if( cur_op < f->nops ) { + printf("@%X ", cur_op); + hl_dump_op(ctx->fun, f->ops + cur_op); + printf("\n\t\t...\n"); + } + printf("\n\n"); + fflush(stdout); +} diff --git a/src/jit_emit.c b/src/jit_emit.c new file mode 100644 index 000000000..5ebf22ec2 --- /dev/null +++ b/src/jit_emit.c @@ -0,0 +1,1921 @@ +/* + * Copyright (C)2015-2016 Haxe Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ +#include +#include +#include "data_struct.h" + +static ereg ENULL = {VAL_NULL}; +static void __ignore( void *value ) {} + +typedef struct { + hl_type *t; + int id; +} vreg; + +#define MAX_TMP_ARGS 32 +#define MAX_TRAPS 32 +#define MAX_REFS 512 // TODO : different impl + +typedef struct _linked_inf linked_inf; +typedef struct _emit_block emit_block; +typedef struct _tmp_phi tmp_phi; + +#define DEF_ALLOC &ctx->jit->falloc + +#define S_TYPE blocks +#define S_NAME(name) blocks_##name +#define S_VALUE emit_block* +#include "data_struct.c" +#define blocks_add(set,v) blocks_add_impl(DEF_ALLOC,&(set),v) + +#define S_TYPE phi_arr +#define S_NAME(name) phi_##name +#define S_VALUE tmp_phi* +#include "data_struct.c" +#define phi_add(set,v) phi_add_impl(DEF_ALLOC,&(set),v) + +#define S_SORTED + +#define S_DEFVAL ENULL +#define S_CMP(a,b) a.index > b.index +#define S_TYPE ereg_map +#define S_NAME(name) ereg_##name +#define S_VALUE ereg +#include "data_struct.c" +#define ereg_add(set,v) ereg_add_impl(DEF_ALLOC,&(set),v) + +#define S_MAP + +#define S_DEFVAL ENULL +#define S_TYPE vreg_map +#define S_NAME(name) vreg_##name +#define S_KEY int +#define S_VALUE ereg +#include "data_struct.c" +#define vreg_replace(set,k,v) vreg_replace_impl(DEF_ALLOC,&(set),k,v) + +struct _linked_inf { + int id; + void *ptr; + linked_inf *next; +}; + +struct _emit_block { + int id; + int start_pos; + int end_pos; + int wait_nexts; + int mark; + bool sealed; + blocks nexts; + blocks preds; + vreg_map written_vars; + phi_arr phis; + emit_block *wait_seal_next; +}; + +struct _tmp_phi { + ereg value; + vreg *r; + ereg target; + int final_id; + bool locked; + bool opt; + unsigned char mode; + emit_block *b; + ereg_map vals; + phi_arr ref_phis; + linked_inf *ref_blocks; +}; + +struct _emit_ctx { + hl_module *mod; + hl_function *fun; + jit_ctx *jit; + + einstr *instrs; + vreg *vregs; + tmp_phi **phis; + int max_instrs; + int max_regs; + int max_phis; + int emit_pos; + int op_pos; + int phi_count; + int phi_depth; + bool flushed; + + ereg tmp_args[MAX_TMP_ARGS]; + ereg traps[MAX_TRAPS]; + struct { + ereg r; + int reg; + } refs[MAX_REFS]; + int *pos_map; + int pos_map_size; + int trap_count; + int ref_count; + + int_alloc args_data; + int_alloc jump_regs; + int_alloc values; + + emit_block *root_block; + emit_block *current_block; + emit_block *wait_seal; + linked_inf *arrival_points; + void *closure_list; // TODO : patch with good addresses +}; + +#define R(i) (ctx->vregs + (i)) + +#define LOAD(r) emit_load_reg(ctx, r) +#define STORE(r, v) emit_store_reg(ctx, r, v) +#define LOAD_CONST(v, t) emit_load_const(ctx, (uint64)(v), t) +#define LOAD_CONST_PTR(v) LOAD_CONST(v,&hlt_bytes) +#define LOAD_MEM(v, offs, t) emit_load_mem(ctx, v, offs, t) +#define LOAD_MEM_PTR(v, offs) LOAD_MEM(v, offs, &hlt_bytes) +#define STORE_MEM(to, offs, v) emit_store_mem(ctx, to, offs, v) +#define LOAD_OBJ_METHOD(obj,id) LOAD_MEM_PTR(LOAD_MEM_PTR(LOAD_MEM_PTR(obj,0),HL_WSIZE*2),HL_WSIZE*(id)) +#define OFFSET(base,index,mult,offset) emit_gen_ext(ctx, LEA, base, index, M_PTR, (mult) | ((offset) << 8)) +#define BREAK() emit_gen(ctx, DEBUG_BREAK, ENULL, ENULL, 0) +#define GET_MODE(r) emit_get_mode(ctx,r) +#define GET_PHI(r) ctx->phis[-(r).index-1] +#define HDYN_VALUE 8 + +#define IS_FLOAT(t) ((t)->kind == HF64 || (t)->kind == HF32) + +static hl_type hlt_ui8 = { HUI8, 0 }; +static hl_type hlt_ui16 = { HUI16, 0 }; + +static linked_inf *link_add( emit_ctx *ctx, int id, void *ptr, linked_inf *head ) { + linked_inf *l = hl_malloc(&ctx->jit->falloc,sizeof(linked_inf)); + l->id = id; + l->ptr = ptr; + l->next = head; + return l; +} + +static linked_inf *link_add_sort_unique( emit_ctx *ctx, int id, void *ptr, linked_inf *head ) { + linked_inf *prev = NULL; + linked_inf *cur = head; + while( cur && cur->id < id ) { + prev = cur; + cur = cur->next; + } + // check duplicate + while( cur && cur->id == id ) { + if( cur->ptr == ptr ) + return head; + cur = cur->next; + } + // insert + linked_inf *l = hl_malloc(&ctx->jit->falloc,sizeof(linked_inf)); + l->id = id; + l->ptr = ptr; + if( !prev ) { + l->next = head; + return l; + } else { + l->next = prev->next; + prev->next = l; + return head; + } +} + +static linked_inf *link_add_sort_replace( emit_ctx *ctx, int id, void *ptr, linked_inf *head ) { + linked_inf *prev = NULL; + linked_inf *cur = head; + while( cur && cur->id < id ) { + prev = cur; + cur = cur->next; + } + // replace duplicate + if( cur && cur->id == id ) { + cur->ptr = ptr; + return head; + } + // insert + linked_inf *l = hl_malloc(&ctx->jit->falloc,sizeof(linked_inf)); + l->id = id; + l->ptr = ptr; + if( !prev ) { + l->next = head; + return l; + } else { + l->next = prev->next; + prev->next = l; + return head; + } +} + +static void *link_sort_lookup( linked_inf *head, int id ) { + while( head && head->id < id ) + head = head->next; + if( head && head->id == id ) + return head->ptr; + return NULL; +} + +static linked_inf *link_sort_remove( linked_inf *head, int id ) { + linked_inf *prev = NULL; + linked_inf *cur = head; + while( cur && cur->id < id ) { + prev = cur; + cur = cur->next; + } + if( cur && cur->id == id ) { + if( !prev ) + return cur->next; + prev->next = cur->next; + return head; + } + return head; +} + +static emit_mode hl_type_mode( hl_type *t ) { + if( t->kind == HVOID ) + return M_VOID; + if( t->kind < HBOOL ) + return (emit_mode)t->kind; + if( t->kind == HBOOL ) + return sizeof(bool) == 1 ? M_UI8 : M_I32; + if( t->kind == HGUID ) + return M_I64; + return M_PTR; +} + +static ereg new_value( emit_ctx *ctx ) { + ereg r = {ctx->values.cur}; + int_alloc_store(&ctx->values, ctx->emit_pos-1); + return r; +} + +static ereg *get_tmp_args( emit_ctx *ctx, int count ) { + if( count > MAX_TMP_ARGS ) jit_error("Too many arguments"); + return ctx->tmp_args; +} + +static ereg resolve_ref( emit_ctx *ctx, int reg ) { + for(int i=0;iref_count;i++) { + if( ctx->refs[i].reg == reg ) + return ctx->refs[i].r; + } + return ENULL; +} + +static unsigned char emit_get_mode( emit_ctx *ctx, ereg v ) { + if( IS_NULL(v) ) jit_assert(); + if( v.index < 0 ) + return GET_PHI(v)->mode; + return ctx->instrs[ctx->values.data[v.index]].mode; +} + +static const char *phi_prefix( emit_ctx *ctx ) { + static char tmp[20]; + int sp = 3 + ctx->phi_depth * 2; + if( sp > 19 ) sp = 19; + memset(tmp,0x20,sp); + tmp[sp] = 0; + return tmp; +} + +static einstr *emit_instr( emit_ctx *ctx, emit_op op ) { + if( ctx->emit_pos == ctx->max_instrs ) { + int pos = ctx->emit_pos; + int next_size = ctx->max_instrs ? (ctx->max_instrs << 1) : 256; + einstr *instrs = (einstr*)malloc(sizeof(einstr) * next_size); + if( instrs == NULL ) jit_error("Out of memory"); + memcpy(instrs, ctx->instrs, pos * sizeof(einstr)); + memset(instrs + pos, 0, (next_size - pos) * sizeof(einstr)); + free(ctx->instrs); + ctx->instrs = instrs; + ctx->max_instrs = next_size; + } else if( (ctx->emit_pos & 0xFF) == 0 ) + memset(ctx->instrs + ctx->emit_pos, 0, 256 * sizeof(einstr)); + einstr *e = ctx->instrs + ctx->emit_pos++; + e->op = op; + return e; +} + +static void emit_store_mem( emit_ctx *ctx, ereg to, int offs, ereg from ) { + einstr *e = emit_instr(ctx, STORE); + e->mode = GET_MODE(from); + e->size_offs = offs; + e->a = to; + e->b = from; +} + +static void store_args( emit_ctx *ctx, einstr *e, ereg *args, int count ) { + if( count < 0 ) jit_assert(); + if( count > 64 ) jit_error("Too many arguments"); + e->nargs = (unsigned char)count; + if( count == 0 ) return; + if( count == 1 ) { + e->size_offs = args[0].index; + return; + } + int *args_data = int_alloc_get(&ctx->args_data, count); + e->size_offs = (int)(args_data - ctx->args_data.data); + memcpy(args_data, args, sizeof(int) * count); +} + +ereg *hl_emit_get_args( emit_ctx *ctx, einstr *e ) { + if( e->nargs == 0 ) + return NULL; + if( e->nargs == 1 ) + return (ereg*)&e->size_offs; + return (ereg*)(ctx->args_data.data + e->size_offs); +} + +static ereg emit_gen_ext( emit_ctx *ctx, emit_op op, ereg a, ereg b, int mode, int size_offs ) { + einstr *e = emit_instr(ctx, op); + if( (unsigned char)mode != mode ) jit_assert(); + e->mode = (unsigned char)mode; + e->size_offs = size_offs; + e->a = a; + e->b = b; + return mode == 0 || mode == M_NORET ? ENULL : new_value(ctx); +} + +static ereg emit_gen( emit_ctx *ctx, emit_op op, ereg a, ereg b, int mode ) { + return emit_gen_ext(ctx,op,a,b,mode,0); +} + +static ereg emit_gen_size( emit_ctx *ctx, emit_op op, int size_offs ) { + return emit_gen_ext(ctx,op,ENULL,ENULL,op==ALLOC_STACK ? M_PTR : 0,size_offs); +} + +static void patch_instr_mode( emit_ctx *ctx, int mode ) { + ctx->instrs[ctx->emit_pos-1].mode = (unsigned char)mode; +} + +static tmp_phi *alloc_phi( emit_ctx *ctx, emit_block *b, vreg *r ) { + if( ctx->phi_count == ctx->max_phis ) { + int new_size = ctx->max_phis ? ctx->max_phis << 1 : 64; + tmp_phi **phis = (tmp_phi**)malloc(sizeof(tmp_phi*) * new_size); + if( phis == NULL ) jit_error("Out of memory"); + memcpy(phis, ctx->phis, sizeof(tmp_phi*) * ctx->phi_count); + free(ctx->phis); + ctx->phis = phis; + ctx->max_phis = new_size; + } + tmp_phi *p = (tmp_phi*)hl_zalloc(&ctx->jit->falloc, sizeof(tmp_phi)); + p->b = b; + p->r = r; + if( r ) p->mode = hl_type_mode(r->t); + p->value.index = -(++ctx->phi_count); + phi_add(b->phis,p); + GET_PHI(p->value) = p; + return p; +} + +static int emit_jump( emit_ctx *ctx, bool cond ) { + int p = ctx->emit_pos; + emit_gen(ctx, cond ? JCOND : JUMP, ENULL, ENULL, 0); + return p; +} + +static void patch_jump( emit_ctx *ctx, int jpos ) { + ctx->instrs[jpos].size_offs = ctx->emit_pos - (jpos + 1); +} + +static emit_block *alloc_block( emit_ctx *ctx ) { + return hl_zalloc(&ctx->jit->falloc, sizeof(emit_block)); +} + +static void block_add_pred( emit_ctx *ctx, emit_block *b, emit_block *p ) { + blocks_add(b->preds,p); + blocks_add(p->nexts,b); + jit_debug(" PRED #%d\n",p->id); +} + +static void store_block_var( emit_ctx *ctx, emit_block *b, vreg *r, ereg v ) { + if( IS_NULL(v) ) jit_assert(); + vreg_replace(b->written_vars,r->id,v); + if( v.index < 0 ) { + tmp_phi *p = GET_PHI(v); + p->ref_blocks = link_add_sort_unique(ctx,b->id,b,p->ref_blocks); + } +} + +static void split_block( emit_ctx *ctx ) { + emit_block *b = alloc_block(ctx); + b->sealed = true; + b->id = ctx->current_block->id + 1; + b->start_pos = ctx->emit_pos; + jit_debug("BLOCK #%d@%X[%X]\n",b->id,b->start_pos,ctx->op_pos); + while( ctx->arrival_points && ctx->arrival_points->id == ctx->op_pos ) { + block_add_pred(ctx, b, (emit_block*)ctx->arrival_points->ptr); + ctx->arrival_points = ctx->arrival_points->next; + } + bool dead_code = blocks_count(b->preds) == 0; // if we have no reach, force previous block dependency, this is rare dead code emit by compiler + einstr *eprev = &ctx->instrs[ctx->emit_pos-1]; + if( (eprev->op != JUMP && eprev->op != RET && eprev->mode != M_NORET) || ctx->fun->ops[ctx->op_pos].op == OTrap || dead_code ) + block_add_pred(ctx, b, ctx->current_block); + ctx->current_block->end_pos = ctx->emit_pos - 1; + ctx->current_block = b; +} + +static void register_jump( emit_ctx *ctx, int jpos, int offs ) { + int target = offs + ctx->op_pos + 1; + int_alloc_store(&ctx->jump_regs, jpos); + int_alloc_store(&ctx->jump_regs, target); + if( offs > 0 ) { + ctx->arrival_points = link_add_sort_unique(ctx, target, ctx->current_block, ctx->arrival_points); + if( ctx->arrival_points->id != ctx->op_pos + 1 && ctx->fun->ops[ctx->op_pos].op != OSwitch ) + split_block(ctx); + } +} + +static ereg emit_load_const( emit_ctx *ctx, uint64 value, hl_type *size_t ) { + einstr *e = emit_instr(ctx, LOAD_IMM); + e->mode = hl_type_mode(size_t); + e->value = value; + return new_value(ctx); +} + +static ereg emit_load_mem( emit_ctx *ctx, ereg v, int offset, hl_type *size_t ) { + einstr *e = emit_instr(ctx, LOAD_ADDR); + e->mode = hl_type_mode(size_t); + e->a = v; + e->b = ENULL; + e->size_offs = offset; + return new_value(ctx); +} + +static void emit_store_reg( emit_ctx *ctx, vreg *to, ereg v ) { + if( to->t->kind == HVOID ) return; + if( IS_NULL(v) ) jit_assert(); + store_block_var(ctx,ctx->current_block,to,v); +} + +static ereg emit_native_call( emit_ctx *ctx, void *native_ptr, ereg args[], int nargs, hl_type *ret ) { + einstr *e = emit_instr(ctx, CALL_PTR); + e->mode = (unsigned char)(ret ? hl_type_mode(ret) : M_NORET); + e->value = (int_val)native_ptr; + store_args(ctx, e, args, nargs); + return ret == NULL || e->mode == M_VOID ? ENULL : new_value(ctx); +} + +static ereg emit_dyn_call( emit_ctx *ctx, ereg f, ereg args[], int nargs, hl_type *ret ) { + einstr *e = emit_instr(ctx, CALL_REG); + e->mode = hl_type_mode(ret); + e->a = f; + store_args(ctx, e, args, nargs); + return e->mode == M_VOID ? ENULL : new_value(ctx); +} + +static void emit_test( emit_ctx *ctx, ereg v, hl_op o ) { + emit_gen_ext(ctx, TEST, v, ENULL, 0, o); + patch_instr_mode(ctx, GET_MODE(v)); +} + +static void phi_remove_val( emit_ctx *ctx, tmp_phi *p, ereg v ) { + ereg_remove(&p->vals,v); + jit_debug("%sPHI-REM-DEP %s = %s\n", phi_prefix(ctx), val_str(p->value), val_str(v)); +} + +static void phi_add_val( emit_ctx *ctx, tmp_phi *p, ereg v ) { + if( !p->b ) jit_assert(); + if( IS_NULL(v) ) jit_assert(); + if( p->value.index == v.index ) + return; + if( !ereg_add(p->vals,v) ) + return; + jit_debug("%sPHI-DEP %s = %s\n", phi_prefix(ctx), val_str(p->value), val_str(v)); + if( v.index < 0 ) { + tmp_phi *p2 = GET_PHI(v); + phi_add(p2->ref_phis,p); + } +} + +static ereg optimize_phi_rec( emit_ctx *ctx, tmp_phi *p ) { + + if( p->locked ) jit_assert(); + ereg same = ENULL; + for_iter(ereg,v,p->vals) { + if( v.index == same.index || v.index == p->value.index ) + continue; + if( !IS_NULL(same) ) + return p->value; + same = v; + } + if( IS_NULL(same) ) + return p->value; // sealed (no dep yet) + + if( !phi_count(p->ref_phis) && !p->ref_blocks ) + return same; + + if( p->locked || p->opt ) jit_assert(); + + jit_debug("%sPHI-OPT %s = %s\n", phi_prefix(ctx), val_str(p->value), val_str(same)); + p->opt = true; + ctx->phi_depth++; + linked_inf *l = p->ref_blocks; + while( l ) { + emit_block *b = (emit_block*)l->ptr; + if( vreg_find(b->written_vars,p->r->id).index == p->value.index ) + store_block_var(ctx,b,p->r,same); + l = l->next; + } + for_iter(phi,p2,p->ref_phis) { + phi_remove_val(ctx,p2,p->value); + phi_add_val(ctx,p2,same); + } + p->ref_blocks = NULL; + int count = phi_count(p->ref_phis); + tmp_phi **phis = phi_free(&p->ref_phis); + for(int i=0;iphi_depth--; + jit_debug("%sPHI-OPT-DONE %s = %s\n", phi_prefix(ctx), val_str(p->value), val_str(same)); + return optimize_phi_rec(ctx,p); +} + +static ereg emit_load_reg_block( emit_ctx *ctx, emit_block *b, vreg *r ); + +static ereg gather_phis( emit_ctx *ctx, tmp_phi *p ) { + p->locked = true; + for_iter(blocks,b,p->b->preds) { + ereg r = p->r ? emit_load_reg_block(ctx, b, p->r) : p->value; + phi_add_val(ctx, p, r); + } + p->locked = false; + return optimize_phi_rec(ctx, p); +} + +static ereg emit_load_reg_block( emit_ctx *ctx, emit_block *b, vreg *r ) { + ereg v = vreg_find(b->written_vars,r->id); + if( !IS_NULL(v) ) + return v; + if( !b->sealed ) { + tmp_phi *p = alloc_phi(ctx,b,r); + jit_debug("%sPHI-SEALED %s = R%d\n",phi_prefix(ctx),val_str(p->value),r->id); + v = p->value; + } else if( blocks_count(b->preds) == 1 ) + v = emit_load_reg_block(ctx, blocks_get(b->preds,0), r); + else { + tmp_phi *p = alloc_phi(ctx,b,r); + store_block_var(ctx,b,r,p->value); + v = gather_phis(ctx, p); + } + store_block_var(ctx,b,r,v); + return v; +} + +static void emit_walk_blocks_rec( emit_ctx *ctx, emit_block *b, int mark, void (*fun)(emit_ctx*,emit_block*) ) { + if( b->mark == mark ) return; + b->mark = mark; + fun(ctx, b); + for_iter(blocks,n,b->nexts) + emit_walk_blocks_rec(ctx,n,mark,fun); +} + +static void emit_walk_blocks( emit_ctx *ctx, void (*fun)(emit_ctx*,emit_block*) ) { + static int MARK_UID = 0; + int mark = ++MARK_UID; + if( mark == 0 ) mark = ++MARK_UID; + emit_walk_blocks_rec(ctx, ctx->root_block, mark, fun); +} + +static ereg emit_load_reg( emit_ctx *ctx, vreg *r ) { + ereg ref = resolve_ref(ctx, r->id); + if( ref.index >= 0 ) + return LOAD_MEM(ref,0,r->t); + return emit_load_reg_block(ctx, ctx->current_block, r); +} + +static void seal_block( emit_ctx *ctx, emit_block *b ) { + jit_debug(" SEAL #%d\n",b->id); + for_iter(phi,p,b->phis) + gather_phis(ctx, p); + b->sealed = true; +} + +static ereg emit_phi( emit_ctx *ctx, ereg v1, ereg v2 ) { + unsigned char mode = GET_MODE(v1); + if( mode != GET_MODE(v2) ) jit_assert(); + tmp_phi *p = alloc_phi(ctx, ctx->current_block, NULL); + p->mode = mode; + phi_add_val(ctx, p, v1); + phi_add_val(ctx, p, v2); + return p->value; +} + +static void emit_call_fun( emit_ctx *ctx, vreg *dst, int findex, int count, int *args_regs ) { + hl_module *m = ctx->mod; + int fid = m->functions_indexes[findex]; + bool isNative = fid >= m->code->nfunctions; + ereg *args = get_tmp_args(ctx, count); + for(int i=0;ifunctions_ptrs[findex], args, count, dst->t)); + else { + einstr *e = emit_instr(ctx, CALL_FUN); + e->mode = hl_type_mode(dst->t); + e->a.index = findex; + store_args(ctx, e, args, count); + STORE(dst, e->mode == M_VOID ? ENULL : new_value(ctx)); + } +} + +static vclosure *alloc_static_closure( emit_ctx *ctx, int fid ) { + hl_module *m = ctx->mod; + vclosure *c = hl_malloc(&m->ctx.alloc,sizeof(vclosure)); + int fidx = m->functions_indexes[fid]; + c->hasValue = 0; + if( fidx >= m->code->nfunctions ) { + // native + c->t = m->code->natives[fidx - m->code->nfunctions].t; + c->fun = m->functions_ptrs[fid]; + c->value = NULL; + } else { + c->t = m->code->functions[fidx].type; + c->fun = (void*)(int_val)fid; + c->value = ctx->closure_list; + ctx->closure_list = c; + } + return c; +} + +static void *get_dynget( hl_type *t ) { + switch( t->kind ) { + case HF32: + return hl_dyn_getf; + case HF64: + return hl_dyn_getd; + case HI64: + case HGUID: + return hl_dyn_geti64; + case HI32: + case HUI16: + case HUI8: + case HBOOL: + return hl_dyn_geti; + default: + return hl_dyn_getp; + } +} + +static void *get_dynset( hl_type *t ) { + switch( t->kind ) { + case HF32: + return hl_dyn_setf; + case HF64: + return hl_dyn_setd; + case HI64: + case HGUID: + return hl_dyn_seti64; + case HI32: + case HUI16: + case HUI8: + case HBOOL: + return hl_dyn_seti; + default: + return hl_dyn_setp; + } +} + +static void *get_dyncast( hl_type *t ) { + switch( t->kind ) { + case HF32: + return hl_dyn_castf; + case HF64: + return hl_dyn_castd; + case HI64: + case HGUID: + return hl_dyn_casti64; + case HI32: + case HUI16: + case HUI8: + case HBOOL: + return hl_dyn_casti; + default: + return hl_dyn_castp; + } +} + +static void emit_store_size( emit_ctx *ctx, ereg dst, int dst_offset, ereg src, int src_offset, int total_size ) { + int offset = 0; + while( offset < total_size) { + int remain = total_size - offset; + hl_type *ct = remain >= HL_WSIZE ? &hlt_bytes : (remain >= 4 ? &hlt_i32 : &hlt_ui8); + STORE_MEM(dst, dst_offset+offset, LOAD_MEM(src,src_offset+offset,ct)); + offset += hl_type_size(ct); + } +} + +static ereg emit_conv( emit_ctx *ctx, ereg v, emit_mode mode, bool _unsigned ) { + return emit_gen(ctx, _unsigned ? CONV_UNSIGNED : CONV, v, ENULL, mode); +} + +static bool dyn_need_type( hl_type *t ) { + return !(IS_FLOAT(t) || t->kind == HI64 || t->kind == HGUID); +} + +static ereg emit_dyn_cast( emit_ctx *ctx, ereg v, hl_type *t, hl_type *dt ) { + if( t->kind == HNULL && t->tparam->kind == dt->kind ) { + emit_test(ctx, v, OJNotNull); + int jnot = emit_jump(ctx, false); + ereg v1 = LOAD_CONST(0,dt); + int jend = emit_jump(ctx, true); + patch_jump(ctx, jnot); + ereg v2 = LOAD_MEM(v,0,dt); + patch_jump(ctx, jend); + return emit_phi(ctx, v1, v2); + } + bool need_dyn = dyn_need_type(dt); + ereg st = emit_gen_size(ctx, ALLOC_STACK, 1); + STORE_MEM(st, 0, v); + ereg args[3]; + args[0] = st; + args[1] = LOAD_CONST_PTR(t); + if( need_dyn ) args[2] = LOAD_CONST_PTR(dt); + ereg r = emit_native_call(ctx, get_dyncast(dt), args, need_dyn ? 3 : 2, dt); + emit_gen_size(ctx, FREE_STACK, 1); + return r; +} + +static void emit_opcode( emit_ctx *ctx, hl_opcode *o ); + +static void remap_phi_reg( emit_ctx *ctx, ereg *r ) { + if( r->index >= 0 || IS_NULL(*r) ) + return; + tmp_phi *p = GET_PHI(*r); + while( p->final_id < 0 ) { + if( p->target.index >= 0 ) { + r->index = p->target.index; + return; + } + p = GET_PHI(p->target); + } + if( p->final_id == 0 ) + return; + r->index = -p->final_id; // new phis +} + +static void emit_write_block( emit_ctx *ctx, emit_block *b ) { + jit_ctx *jit = ctx->jit; + eblock *bl = jit->blocks + b->id; + bl->id = b->id; + bl->start_pos = b->start_pos; + bl->end_pos = b->end_pos; + bl->pred_count = blocks_count(b->preds); + bl->next_count = blocks_count(b->nexts); + bl->preds = (int*)hl_malloc(&jit->falloc,sizeof(int)*bl->pred_count); + bl->nexts = (int*)hl_malloc(&jit->falloc,sizeof(int)*bl->next_count); + for(int i=0;ipred_count;i++) + bl->preds[i++] = blocks_get(b->preds,i)->id; + for(int i=0;inext_count;i++) + bl->nexts[i++] = blocks_get(b->nexts,i)->id; + // write phis + { + for_iter(phi,p,b->phis) + if( p->final_id >= 0 ) + bl->phi_count++; + } + bl->phis = (ephi*)hl_zalloc(&jit->falloc,sizeof(ephi)*bl->phi_count); + int i = 0; + for_iter(phi,p,b->phis) { + if( p->final_id < 0 ) + continue; + ephi *p2 = bl->phis + i++; + if( p->final_id == 0 ) + p2->value = p->value; + else + p2->value.index = -p->final_id; + p2->nvalues = ereg_count(p->vals); + p2->values = (ereg*)hl_malloc(&jit->falloc,sizeof(ereg)*p2->nvalues); + int k = 0; + for_iter(ereg,v,p->vals) { + remap_phi_reg(ctx, &v); + p2->values[k++] = v; + } + } +} + +void hl_emit_flush( jit_ctx *jit ) { + emit_ctx *ctx = jit->emit; + int i = 0; + if( ctx->flushed ) return; + ctx->flushed = true; + while( i < ctx->jump_regs.cur ) { + int pos = ctx->jump_regs.data[i++]; + einstr *e = ctx->instrs + pos; + int target = ctx->jump_regs.data[i++]; + e->size_offs = ctx->pos_map[target] - (pos + 1); + } + ctx->pos_map[ctx->fun->nops] = -1; + ctx->current_block->end_pos = ctx->emit_pos - 1; + jit->instrs = ctx->instrs; + jit->instr_count = ctx->emit_pos; + jit->emit_pos_map = ctx->pos_map; + jit->block_count = ctx->current_block->id + 1; + jit->blocks = hl_zalloc(&jit->falloc,sizeof(eblock) * jit->block_count); + for(i=0;iblock_count;i++) + jit->blocks[i].id = -1; + jit->value_count = ctx->values.cur; + jit->values_writes = ctx->values.data; + emit_walk_blocks(ctx,emit_write_block); +} + +static void hl_iter_instr_reg( einstr *e, void *ctx, void (*iter_reg)( void *, ereg * ) ) { + switch( e->op ) { + case CALL_REG: + iter_reg(ctx,&e->a); + case CALL_FUN: + case CALL_PTR: + { + int i; + ereg *args = hl_emit_get_args(ctx, e); + for(i=0;inargs;i++) + iter_reg(ctx, args + i); + } + break; + case LOAD_IMM: + // skip + break; + default: + if( !IS_NULL(e->a) ) { + iter_reg(ctx,&e->a); + if( !IS_NULL(e->b) ) + iter_reg(ctx,&e->b); + } + break; + } +} + +static void hl_emit_clean_phis( emit_ctx *ctx ) { + for(int i=0;iphi_count;i++) { + tmp_phi *p = ctx->phis[i]; + tmp_phi *cur = p; + ereg r; + while( true ) { + cur->opt = false; + r = optimize_phi_rec(ctx,cur); + if( r.index >= 0 || r.index == cur->value.index ) break; + cur = GET_PHI(r); + } + p->target = r; + } + int new_phis = 0; + for(int i=0;iphi_count;i++) { + tmp_phi *p = ctx->phis[i]; + if( p->target.index == p->value.index ) + p->final_id = ++new_phis; + else + p->final_id = -1; + } + for(int i=0;iemit_pos;i++) + hl_iter_instr_reg(ctx->instrs + i, ctx, remap_phi_reg); +} + +void hl_emit_function( jit_ctx *jit ) { + emit_ctx *ctx = jit->emit; + hl_function *f = jit->fun; + int i; + ctx->mod = jit->mod; + ctx->fun = f; + ctx->emit_pos = 0; + ctx->trap_count = 0; + ctx->ref_count = 0; + ctx->phi_count = 0; + ctx->flushed = false; + int_alloc_reset(&ctx->args_data); + int_alloc_reset(&ctx->jump_regs); + int_alloc_reset(&ctx->values); + ctx->root_block = ctx->current_block = alloc_block(ctx); + ctx->current_block->sealed = true; + ctx->arrival_points = NULL; + jit_debug("---- begin [%X] ----\n",f->findex); + if( f->nregs > ctx->max_regs ) { + free(ctx->vregs); + ctx->vregs = (vreg*)malloc(sizeof(vreg) * (f->nregs + 1)); + if( ctx->vregs == NULL ) jit_assert(); + for(i=0;inregs;i++) + R(i)->id = i; + ctx->max_regs = f->nregs; + } + + if( f->nops >= ctx->pos_map_size ) { + free(ctx->pos_map); + ctx->pos_map = (int*)malloc(sizeof(int) * (f->nops+1)); + if( ctx->pos_map == NULL ) jit_assert(); + ctx->pos_map_size = f->nops; + } + + for(i=0;inregs;i++) { + vreg *r = R(i); + r->t = f->regs[i]; + } + + for(i=0;itype->fun->nargs;i++) { + hl_type *t = f->type->fun->args[i]; + if( t->kind == HVOID ) continue; + STORE(R(i), emit_gen(ctx, LOAD_ARG, ENULL, ENULL, hl_type_mode(t))); + } + + for(i=f->nops-1;i>=0;i--) { + hl_opcode *o = f->ops + i; + if( o->op == ORef ) { + ereg ref = resolve_ref(ctx, o->p2); + if( ref.index >= 0 ) continue; + if( ctx->ref_count == MAX_REFS ) jit_error("Too many refs"); + ctx->refs[ctx->ref_count].r = emit_gen_size(ctx, ALLOC_STACK, hl_type_size(R(o->p2)->t)); + ctx->refs[ctx->ref_count].reg = o->p2; + ctx->ref_count++; + } + } + + for(int op_pos=0;op_posnops;op_pos++) { + ctx->op_pos = op_pos; + ctx->pos_map[op_pos] = ctx->emit_pos; + if( ctx->arrival_points && ctx->arrival_points->id == op_pos ) + split_block(ctx); + emit_opcode(ctx,f->ops + op_pos); + } + + hl_emit_clean_phis(ctx); + hl_emit_flush(ctx->jit); +} + +void hl_emit_alloc( jit_ctx *jit ) { + emit_ctx *ctx = (emit_ctx*)malloc(sizeof(emit_ctx)); + if( ctx == NULL ) jit_assert(); + memset(ctx,0,sizeof(emit_ctx)); + ctx->jit = jit; + jit->emit = ctx; + if( sizeof(einstr) != 16 ) jit_assert(); +} + +void hl_emit_free( jit_ctx *jit ) { + emit_ctx *ctx = jit->emit; + free(ctx->vregs); + free(ctx->instrs); + free(ctx->pos_map); + int_alloc_free(&ctx->jump_regs); + int_alloc_free(&ctx->args_data); + int_alloc_free(&ctx->values); + free(ctx); + jit->emit = NULL; +} + +static bool seal_block_rec( emit_ctx *ctx, emit_block *b, int target ) { + if( b->start_pos < target ) + return false; + if( b->start_pos == target ) { + b->wait_nexts--; + block_add_pred(ctx, b, ctx->current_block); + while( b && b->wait_nexts == 0 && ctx->wait_seal == b ) { + seal_block(ctx,b); + b = b->wait_seal_next; + ctx->wait_seal = b; + } + return true; + } + for_iter(blocks,p,b->preds) + if( p->start_pos < b->start_pos && seal_block_rec(ctx,p,target) ) + return true; + return false; +} + +static void register_block_jump( emit_ctx *ctx, int offs, bool cond ) { + int jidx = emit_jump(ctx, cond); + register_jump(ctx, jidx, offs); + if( offs < 0 ) { + int target = ctx->pos_map[ctx->op_pos + 1 + offs]; + emit_block *b = ctx->current_block; + if( !seal_block_rec(ctx, b, target) ) jit_assert(); + } +} + +static void prepare_loop_block( emit_ctx *ctx ) { + int i, last_jump = -1; + emit_block *b = ctx->current_block; + // gather all backward jumps to know when the block will be finished + for(i=ctx->op_pos+1;ifun->nops;i++) { + hl_opcode *op = &ctx->fun->ops[i]; + int offs = 0; + switch( op->op ) { + case OJFalse: + case OJTrue: + case OJNotNull: + case OJNull: + offs = op->p2; + break; + case OJAlways: + offs = op->p1; + break; + case OJEq: + case OJNotEq: + case OJSLt: + case OJSGte: + case OJSLte: + case OJSGt: + case OJULt: + case OJUGte: + case OJNotLt: + case OJNotGte: + offs = op->p3; + break; + default: + break; + } + if( offs < 0 && i + 1 + offs == ctx->op_pos ) { + jit_debug(" WAIT @%X\n",i); + b->wait_nexts++; + if( b->sealed ) { + b->sealed = false; + b->wait_seal_next = ctx->wait_seal; + ctx->wait_seal = b; + } + last_jump = i; + } + } +} + +static void emit_opcode( emit_ctx *ctx, hl_opcode *o ) { + vreg *dst = R(o->p1); + vreg *ra = R(o->p2); + vreg *rb = R(o->p3); + hl_module *m = ctx->mod; +#ifdef HL_DEBUG + int uid = (ctx->fun->findex << 16) | ctx->op_pos; + __ignore(&uid); +#endif + switch( o->op ) { + case OMov: + case OUnsafeCast: + STORE(dst, emit_gen(ctx,MOV,LOAD(ra),ENULL,hl_type_mode(ra->t))); + break; + case OInt: + STORE(dst, LOAD_CONST(m->code->ints[o->p2], dst->t)); + break; + case OBool: + STORE(dst, LOAD_CONST(o->p2, &hlt_bool)); + break; + case ONull: + STORE(dst, LOAD_CONST(0, dst->t)); + break; + case OFloat: + { + union { + float f; + double d; + uint64 i; + } v; + if( dst->t->kind == HF32 ) + v.f = (float)m->code->floats[o->p2]; + else + v.d = m->code->floats[o->p2]; + STORE(dst, LOAD_CONST(v.i, dst->t)); + } + break; + case OString: + STORE(dst, LOAD_CONST_PTR(hl_get_ustring(m->code,o->p2))); + break; + case OBytes: + { + char *b = m->code->version >= 5 ? m->code->bytes + m->code->bytes_pos[o->p2] : m->code->strings[o->p2]; + STORE(dst,LOAD_CONST_PTR(b)); + } + break; + case OGetGlobal: + { + void *addr = m->globals_data + m->globals_indexes[o->p2]; + STORE(dst, LOAD_MEM_PTR(LOAD_CONST_PTR(addr),0)); + } + break; + case OSetGlobal: + { + void *addr = m->globals_data + m->globals_indexes[o->p1]; + STORE_MEM(LOAD_CONST_PTR(addr),0,LOAD(ra)); + } + break; + case OCall0: + emit_call_fun(ctx, dst, o->p2, 0, NULL); + break; + case OCall1: + emit_call_fun(ctx, dst, o->p2, 1, &o->p3); + break; + case OCall2: + { + int args[2] = { o->p3, (int)(int_val)o->extra }; + emit_call_fun(ctx, dst, o->p2, 2, args); + } + break; + case OCall3: + { + int args[3] = { o->p3, o->extra[0], o->extra[1] }; + emit_call_fun(ctx, dst, o->p2, 3, args); + } + break; + case OCall4: + { + int args[4] = { o->p3, o->extra[0], o->extra[1], o->extra[2] }; + emit_call_fun(ctx, dst, o->p2, 4, args); + } + break; + case OCallN: + emit_call_fun(ctx, dst, o->p2, o->p3, o->extra); + break; + case OSub: + case OAdd: + case OMul: + case OSDiv: + case OUDiv: + case OShl: + case OSShr: + case OUShr: + case OAnd: + case OOr: + case OXor: + case OSMod: + case OUMod: + { + ereg va = LOAD(ra); + ereg vb = LOAD(rb); + STORE(dst, emit_gen_ext(ctx, BINOP, va, vb, hl_type_mode(dst->t), o->op)); + } + break; + case ONeg: + case ONot: + STORE(dst, emit_gen_ext(ctx, UNOP, LOAD(ra), ENULL, hl_type_mode(dst->t), o->op)); + break; + case OJFalse: + case OJTrue: + case OJNotNull: + case OJNull: + { + emit_test(ctx, LOAD(dst), o->op); + register_block_jump(ctx, o->p2, true); + } + break; + case OJEq: + case OJNotEq: + case OJSLt: + case OJSGte: + case OJSLte: + case OJSGt: + case OJULt: + case OJUGte: + case OJNotLt: + case OJNotGte: + { + emit_gen_ext(ctx, CMP, LOAD(dst), LOAD(ra), 0, o->op); + patch_instr_mode(ctx, hl_type_mode(dst->t)); + register_block_jump(ctx, o->p3, true); + } + break; + case OJAlways: + register_block_jump(ctx, o->p1, false); + break; + case OToDyn: + if( ra->t->kind == HBOOL ) { + ereg arg = LOAD(ra); + STORE(dst, emit_native_call(ctx,hl_alloc_dynbool,&arg,1,&hlt_dyn)); + } else { + ereg arg = LOAD_CONST_PTR(ra->t); + ereg ret = emit_native_call(ctx,hl_alloc_dynamic,&arg,1,&hlt_dyn); + STORE_MEM(ret,HDYN_VALUE,LOAD(ra)); + STORE(dst, ret); + } + break; + case OToSFloat: + case OToInt: + case OToUFloat: + STORE(dst, emit_conv(ctx,LOAD(ra),hl_type_mode(dst->t), o->op == OToUFloat)); + break; + case ORet: + emit_gen(ctx, RET, dst->t->kind == HVOID ? ENULL : LOAD(dst), ENULL, M_NORET); + patch_instr_mode(ctx, hl_type_mode(dst->t)); + break; + case OIncr: + case ODecr: + { + if( IS_FLOAT(dst->t) ) { + jit_assert(); + } else { + STORE(dst, emit_gen_ext(ctx,UNOP,LOAD(dst),ENULL,hl_type_mode(dst->t),o->op)); + } + } + break; + case ONew: + { + ereg arg = ENULL; + void *allocFun = NULL; + int nargs = 1; + switch( dst->t->kind ) { + case HOBJ: + case HSTRUCT: + allocFun = hl_alloc_obj; + break; + case HDYNOBJ: + allocFun = hl_alloc_dynobj; + nargs = 0; + break; + case HVIRTUAL: + allocFun = hl_alloc_virtual; + break; + default: + jit_assert(); + } + if( nargs ) arg = LOAD_CONST_PTR(dst->t); + STORE(dst, emit_native_call(ctx,allocFun,&arg,nargs,dst->t)); + } + break; + case OInstanceClosure: + { + ereg args[3]; + args[0] = LOAD_CONST_PTR(m->code->functions[m->functions_indexes[o->p2]].type); + // TODO : WRITE (emit_pos + op_count) to process later and replace address ! + args[1] = LOAD_CONST_PTR(0); + args[2] = LOAD(rb); + STORE(dst, emit_native_call(ctx,hl_alloc_closure_ptr,args,3,dst->t)); + } + break; + case OVirtualClosure: + { + hl_type *t = NULL; + hl_type *ot = ra->t; + while( t == NULL ) { + int i; + for(i=0;iobj->nproto;i++) { + hl_obj_proto *pp = ot->obj->proto + i; + if( pp->pindex == o->p3 ) { + t = m->code->functions[m->functions_indexes[pp->findex]].type; + break; + } + } + ot = ot->obj->super; + } + ereg args[3]; + ereg obj = LOAD(ra); + args[0] = LOAD_CONST_PTR(t); + args[1] = LOAD_OBJ_METHOD(obj,o->p3); + args[2] = obj; + STORE(dst, emit_native_call(ctx,hl_alloc_closure_ptr,args,3,dst->t)); + } + break; + case OCallClosure: + if( ra->t->kind == HDYN ) { + int i; + ereg st = emit_gen_size(ctx, ALLOC_STACK, o->p3); + for(i=0;ip3;i++) { + vreg *r = R(o->extra[i]); + if( !hl_is_dynamic(r->t) ) jit_assert(); + STORE_MEM(st,i*HL_WSIZE,LOAD(r)); + } + ereg args[3]; + args[0] = LOAD(ra); + args[1] = st; + args[2] = LOAD_CONST(o->p3,&hlt_i32); + STORE(dst, emit_dyn_cast(ctx,emit_native_call(ctx,hl_dyn_call,args,3,dst->t),ra->t,dst->t)); + emit_gen_size(ctx, FREE_STACK, o->p3); + } else { + ereg r = LOAD(ra); + ereg *args = get_tmp_args(ctx,o->p3+1); + // Code for if( c->hasValue ) c->fun(c->value,args) else c->fun(args) + ereg has = LOAD_MEM(r,HL_WSIZE*2,&hlt_i32); + emit_test(ctx, has, OJNull); + int jidx = emit_jump(ctx, true); + int i; + args[0] = LOAD_MEM_PTR(r,HL_WSIZE * 3); + for(i=0;ip3;i++) + args[i+1] = LOAD(R(o->extra[i])); + ereg v1 = emit_dyn_call(ctx,LOAD_MEM_PTR(r,HL_WSIZE),args,o->p3 + 1,dst->t); + int jend = emit_jump(ctx, false); + patch_jump(ctx, jidx); + for(i=0;ip3;i++) + args[i] = LOAD(R(o->extra[i])); + ereg v2 = emit_dyn_call(ctx,LOAD_MEM_PTR(r,HL_WSIZE),args,o->p3,dst->t); + patch_jump(ctx, jend); + if( dst->t->kind != HVOID ) STORE(dst, emit_phi(ctx,v1,v2)); + } + break; + case OStaticClosure: + { + vclosure *c = alloc_static_closure(ctx,o->p2); + STORE(dst, LOAD_CONST_PTR(c)); + } + break; + case OField: + { + switch( ra->t->kind ) { + case HOBJ: + case HSTRUCT: + { + hl_runtime_obj *rt = hl_get_obj_rt(ra->t); + ereg r = LOAD(ra); + if( dst->t->kind == HSTRUCT ) { + hl_type *ft = hl_obj_field_fetch(ra->t,o->p3)->t; + if( ft->kind == HPACKED ) { + STORE(dst,OFFSET(r, ENULL, 0, rt->fields_indexes[o->p3])); + break; + } + } + STORE(dst, LOAD_MEM(r,rt->fields_indexes[o->p3],dst->t)); + } + break; + case HVIRTUAL: + // code for : if( hl_vfields(o)[f] ) r = *hl_vfields(o)[f]; else r = hl_dyn_get(o,hash(field),vt) + { + ereg obj = LOAD(ra); + ereg field = LOAD_MEM_PTR(obj,sizeof(vvirtual)+HL_WSIZE*o->p3); + emit_test(ctx, field, OJNull); + int jidx = emit_jump(ctx, true); + ereg v1 = LOAD_MEM(field,0,dst->t); + int jend = emit_jump(ctx, false); + patch_jump(ctx, jidx); + bool need_type = dyn_need_type(dst->t); + ereg args[3]; + args[0] = obj; + args[1] = LOAD_CONST(ra->t->virt->fields[o->p3].hashed_name,&hlt_i32); + if( need_type ) args[2] = LOAD_CONST_PTR(dst->t); + ereg v2 = emit_native_call(ctx,get_dynget(dst->t),args,need_type?3:2,dst->t); + patch_jump(ctx, jend); + STORE(dst, emit_phi(ctx, v1, v2)); + } + break; + default: + jit_assert(); + break; + } + } + break; + case OSetField: + { + switch( dst->t->kind ) { + case HOBJ: + case HSTRUCT: + { + ereg obj = LOAD(dst); + ereg val = LOAD(rb); + hl_runtime_obj *rt = hl_get_obj_rt(dst->t); + int field_pos = rt->fields_indexes[o->p2]; + if( rb->t->kind == HSTRUCT ) { + hl_type *ft = hl_obj_field_fetch(dst->t,o->p2)->t; + if( ft->kind == HPACKED ) { + emit_store_size(ctx,obj,field_pos,val,0,hl_get_obj_rt(ft->tparam)->size); + break; + } + } + STORE_MEM(obj,field_pos, val); + } + break; + case HVIRTUAL: + // code for : if( hl_vfields(o)[f] ) *hl_vfields(o)[f] = v; else hl_dyn_set(o,hash(field),vt,v) + { + ereg obj = LOAD(dst); + ereg val = LOAD(rb); + ereg field = LOAD_MEM_PTR(obj,sizeof(vvirtual)+HL_WSIZE*o->p2); + emit_test(ctx, field, OJNull); + int jidx = emit_jump(ctx, true); + STORE_MEM(field, 0, val); + int jend = emit_jump(ctx, false); + patch_jump(ctx, jidx); + bool need_type = dyn_need_type(dst->t); + ereg args[4]; + args[0] = obj; + args[1] = LOAD_CONST(dst->t->virt->fields[o->p2].hashed_name,&hlt_i32); + if( need_type ) { + args[2] = LOAD_CONST_PTR(rb->t); + args[3] = val; + } else { + args[2] = val; + } + emit_native_call(ctx,get_dynset(dst->t),args,need_type?4:3,dst->t); + patch_jump(ctx, jend); + } + break; + default: + jit_assert(); + break; + } + } + break; + case OGetThis: + { + vreg *r = R(0); + ereg obj = LOAD(r); + hl_runtime_obj *rt = hl_get_obj_rt(r->t); + int field_pos = rt->fields_indexes[o->p2]; + if( dst->t->kind == HSTRUCT ) { + hl_type *ft = hl_obj_field_fetch(r->t,o->p2)->t; + if( ft->kind == HPACKED ) { + STORE(dst, OFFSET(obj, ENULL, 0, field_pos)); + break; + } + } + STORE(dst, LOAD_MEM(obj, field_pos, dst->t)); + } + break; + case OSetThis: + { + vreg *r = R(0); + ereg obj = LOAD(r); + ereg val = LOAD(ra); + hl_runtime_obj *rt = hl_get_obj_rt(r->t); + int field_pos = rt->fields_indexes[o->p1]; + if( ra->t->kind == HSTRUCT ) { + hl_type *ft = hl_obj_field_fetch(r->t,o->p1)->t; + if( ft->kind == HPACKED ) { + emit_store_size(ctx, obj, field_pos, val, 0, hl_get_obj_rt(ft->tparam)->size); + break; + } + } + STORE_MEM(obj,field_pos,val); + } + break; + case OCallThis: + { + int i; + int nargs = o->p3 + 1; + ereg obj = LOAD(R(0)); + ereg *args = get_tmp_args(ctx, nargs); + args[0] = obj; + for(i=1;iextra[i-1])); + ereg fun = LOAD_OBJ_METHOD(obj, o->p2); + STORE(dst, emit_dyn_call(ctx,fun,args,nargs,dst->t)); + } + break; + case OCallMethod: + { + vreg *r = R(o->extra[0]); + ereg obj = LOAD(r); + switch( r->t->kind ) { + case HOBJ: + { + int i; + int nargs = o->p3; + ereg *args = get_tmp_args(ctx, nargs); + for(i=0;iextra[i])); + ereg fun = LOAD_OBJ_METHOD(obj, o->p2); + STORE(dst, emit_dyn_call(ctx,fun,args,nargs,dst->t)); + } + break; + case HVIRTUAL: + // code for : if( hl_vfields(o)[f] ) dst = *hl_vfields(o)[f](o->value,args...); else dst = hl_dyn_call_obj(o->value,field,args,&ret) + { + vreg *_o = R(o->extra[0]); + ereg obj = LOAD(_o); + ereg field = LOAD_MEM_PTR(obj,sizeof(vvirtual)+HL_WSIZE*o->p2); + emit_test(ctx, field, OJNull); + int jidx = emit_jump(ctx, true); + + int nargs = o->p3; + ereg *args = get_tmp_args(ctx, nargs); + int i; + args[0] = LOAD_MEM_PTR(obj,HL_WSIZE); + for(i=1;iextra[i])); + ereg v1 = emit_dyn_call(ctx,LOAD_MEM_PTR(field,0),args,nargs,dst->t); + + int jend = emit_jump(ctx, false); + patch_jump(ctx, jidx); + + nargs = o->p3 - 1; + ereg eargs = emit_gen_size(ctx, ALLOC_STACK, nargs); + for(i=0;iextra[i+1]))); + bool need_dyn = !hl_is_ptr(dst->t) && dst->t->kind != HVOID; + int dyn_size = sizeof(vdynamic)/HL_WSIZE; + ereg edyn = need_dyn ? emit_gen_size(ctx, ALLOC_STACK, dyn_size) : LOAD_CONST_PTR(NULL); + + args = get_tmp_args(ctx, 4); + args[0] = LOAD_MEM_PTR(obj,HL_WSIZE); + args[1] = LOAD_CONST(_o->t->virt->fields[o->p2].hashed_name,&hlt_i32); + args[2] = eargs; + args[3] = edyn; + + ereg v2 = emit_native_call(ctx, hl_dyn_call_obj, args, 4, dst->t); + + emit_gen_size(ctx, FREE_STACK, o->p3 + (need_dyn ? dyn_size : 0)); + patch_jump(ctx, jend); + + if( dst->t->kind != HVOID ) STORE(dst, emit_phi(ctx, v1, v2)); + } + break; + default: + jit_assert(); + break; + } + } + break; + case OThrow: + case ORethrow: + { + ereg arg = LOAD(dst); + emit_native_call(ctx, o->op == OThrow ? hl_throw : hl_rethrow, &arg, 1, NULL); + } + break; + case OLabel: + if( ctx->current_block->start_pos != ctx->emit_pos ) + split_block(ctx); + prepare_loop_block(ctx); + break; + case OGetI8: + case OGetI16: + case OGetMem: + { + ereg offs = OFFSET(LOAD(ra),LOAD(rb),1,0); + ereg val = LOAD_MEM(offs, 0, dst->t); + if( o->op != OGetMem ) val = emit_conv(ctx, val, M_I32, false); + STORE(dst, val); + } + break; + case OSetI8: + case OSetI16: + case OSetMem: + { + ereg offs = OFFSET(LOAD(dst), LOAD(ra),1,0); + ereg val = LOAD(rb); + if( o->op != OSetMem ) val = emit_conv(ctx, val, M_I32, false); + STORE_MEM(offs, 0, val); + } + break; + case OType: + STORE(dst, LOAD_CONST_PTR(m->code->types + o->p2)); + break; + case OGetType: + { + ereg r = LOAD(ra); + emit_test(ctx, r, OJNotNull); + int jidx = emit_jump(ctx, true); + ereg v1 = LOAD_CONST_PTR(&hlt_void); + int jend = emit_jump(ctx, false); + patch_jump(ctx, jidx); + ereg v2 = LOAD_MEM_PTR(r,0); + patch_jump(ctx, jend); + STORE(dst, emit_phi(ctx, v1, v2)); + } + break; + case OGetArray: + { + if( ra->t->kind == HABSTRACT ) { + int osize; + bool isPtr = dst->t->kind != HOBJ && dst->t->kind != HSTRUCT; + if( isPtr ) + osize = HL_WSIZE; // a pointer into the carray + else { + hl_runtime_obj *rt = hl_get_obj_rt(dst->t); + osize = rt->size; // a mem offset into it + } + ereg pos = OFFSET(LOAD(ra), LOAD(rb), osize, 0); + ereg val = isPtr ? LOAD_MEM_PTR(pos,0) : pos; + STORE(dst, val); + } else { + ereg pos = OFFSET(LOAD(ra), LOAD(rb), hl_type_size(dst->t), sizeof(varray)); + STORE(dst, LOAD_MEM_PTR(pos,0)); + } + } + break; + case OSetArray: + { + if( dst->t->kind == HABSTRACT ) { + int osize; + bool isPtr = rb->t->kind != HOBJ && rb->t->kind != HSTRUCT; + if( isPtr) { + osize = HL_WSIZE; + } else { + hl_runtime_obj *rt = hl_get_obj_rt(rb->t); + osize = rt->size; + } + ereg pos = OFFSET(LOAD(dst), LOAD(ra), osize, 0); + emit_store_size(ctx, pos, 0, LOAD(rb), 0, osize); + } else { + ereg pos = OFFSET(LOAD(dst), LOAD(ra), hl_type_size(dst->t), sizeof(varray)); + STORE_MEM(pos, 0, LOAD(rb)); + } + } + break; + case OArraySize: + STORE(dst, LOAD_MEM(LOAD(ra),HL_WSIZE*2,&hlt_i32)); + break; + case ORef: + { + ereg ref = resolve_ref(ctx, ra->id); + if( IS_NULL(ref) ) jit_assert(); + ereg r = vreg_find(ctx->current_block->written_vars, ra->id); + if( !IS_NULL(r) ) { + STORE_MEM(ref, 0, LOAD(ra)); + vreg_remove(&ctx->current_block->written_vars, ra->id); + } + STORE(dst, ref); + } + break; + case OUnref: + STORE(dst, LOAD_MEM(LOAD(ra),0,dst->t)); + break; + case OSetref: + STORE_MEM(LOAD(dst),0,LOAD(ra)); + break; + case ORefData: + switch( ra->t->kind ) { + case HARRAY: + STORE(dst, OFFSET(LOAD(ra),ENULL,0,sizeof(varray))); + break; + default: + jit_assert(); + } + break; + case ORefOffset: + STORE(dst, OFFSET(LOAD(ra),LOAD(rb), hl_type_size(dst->t->tparam),0)); + break; + case OToVirtual: + { + ereg args[2]; + args[0] = LOAD(ra); + args[1] = LOAD_CONST_PTR(dst->t); + STORE(dst, emit_native_call(ctx,hl_to_virtual,args,2, dst->t)); + } + break; + case OMakeEnum: + { + ereg args[2]; + args[0] = LOAD_CONST_PTR(dst->t); + args[1] = LOAD_CONST(o->p2,&hlt_i32); + ereg en = emit_native_call(ctx, hl_alloc_enum, args, 2, dst->t); + STORE(dst, en); + hl_enum_construct *c = &dst->t->tenum->constructs[o->p2]; + for(int i=0;inparams;i++) + STORE_MEM(en, c->offsets[i], LOAD(R(o->extra[i]))); + } + break; + case OEnumAlloc: + { + ereg args[2]; + args[0] = LOAD_CONST_PTR(dst->t); + args[1] = LOAD_CONST(o->p2,&hlt_i32); + STORE(dst, emit_native_call(ctx, hl_alloc_enum, args, 2, dst->t)); + } + break; + case OEnumField: + { + hl_enum_construct *c = &ra->t->tenum->constructs[o->p3]; + int slot = (int)(int_val)o->extra; + STORE(dst, LOAD_MEM(LOAD(ra),c->offsets[slot], dst->t)); + } + break; + case OEnumIndex: + STORE(dst, LOAD_MEM(LOAD(ra),HL_WSIZE,dst->t)); + break; + case OSetEnumField: + { + hl_enum_construct *c = &dst->t->tenum->constructs[0]; + STORE_MEM(LOAD(dst), c->offsets[o->p2], LOAD(rb)); + } + break; + case ONullCheck: + { + emit_test(ctx, LOAD(dst), OJNotNull); + int jok = emit_jump(ctx, true); + + // ----- DETECT FIELD ACCESS ---------------- + hl_function *f = ctx->fun; + hl_opcode *next = f->ops + ctx->op_pos + 1; + bool null_field_access = false; + int hashed_name = 0; + // skip const and operation between nullcheck and access + while( (next < f->ops + f->nops - 1) && (next->op >= OInt && next->op <= ODecr) ) { + next++; + } + if( (next->op == OField && next->p2 == o->p1) || (next->op == OSetField && next->p1 == o->p1) ) { + int fid = next->op == OField ? next->p3 : next->p2; + hl_obj_field *f = NULL; + if( dst->t->kind == HOBJ || dst->t->kind == HSTRUCT ) + f = hl_obj_field_fetch(dst->t, fid); + else if( dst->t->kind == HVIRTUAL ) + f = dst->t->virt->fields + fid; + if( f == NULL ) jit_assert(); + null_field_access = true; + hashed_name = f->hashed_name; + } else if( (next->op >= OCall1 && next->op <= OCallN) && next->p3 == o->p1 ) { + int fid = next->p2 < 0 ? -1 : m->functions_indexes[next->p2]; + hl_function *cf = m->code->functions + fid; + const uchar *name = fun_field_name(cf); + null_field_access = true; + hashed_name = hl_hash_gen(name, true); + } + // ----------------------------------------- + ereg arg = null_field_access ? LOAD_CONST(hashed_name,&hlt_i32) : ENULL; + emit_native_call(ctx, null_field_access ? hl_jit_null_field_access : hl_jit_null_access, &arg, null_field_access ? 1 : 0, NULL); + patch_jump(ctx, jok); + } + break; + case OSafeCast: + STORE(dst, emit_dyn_cast(ctx, LOAD(ra), ra->t, dst->t)); + break; + case ODynGet: + { + bool need_type = dyn_need_type(dst->t); + ereg args[3]; + args[0] = LOAD(ra); + args[1] = LOAD_CONST(hl_hash_utf8(m->code->strings[o->p3]),&hlt_i32); + if( need_type ) args[2] = LOAD_CONST_PTR(dst->t); + STORE(dst, emit_native_call(ctx, get_dynget(dst->t), args, need_type ? 3 : 2, dst->t)); + } + break; + case ODynSet: + { + bool need_type = dyn_need_type(dst->t); + ereg args[4]; + args[0] = LOAD(dst); + args[1] = LOAD_CONST(hl_hash_utf8(m->code->strings[o->p2]),&hlt_i32); + if( need_type ) { + args[2] = LOAD_CONST_PTR(rb->t); + args[3] = LOAD(rb); + } else + args[2] = LOAD(rb); + emit_native_call(ctx, get_dynset(rb->t), args, need_type ? 4 : 3, &hlt_void); + } + break; + case OTrap: + { + ereg st = emit_gen_size(ctx, ALLOC_STACK, sizeof(hl_trap_ctx)); + + ereg thread, current_addr; + static hl_thread_info *tinf = NULL; + static hl_trap_ctx *trap = NULL; +# ifndef HL_THREADS + if( tinf == NULL ) tinf = hl_get_thread(); + current_addr = LOAD_CONST_PTR(&tinf->trap_current); +# else + thread = emit_native_call(ctx, hl_get_thread, NULL, 0, &hlt_bytes); + current_addr = OFFSET(thread, ENULL, 0, (int)(int_val)&tinf->trap_current); +# endif + STORE_MEM(st, (int)(int_val)&trap->prev, LOAD_MEM_PTR(current_addr,0)); + STORE_MEM(current_addr, 0, st); + + + /* + trap E,@catch + catch g + catch g2 + ... + @:catch + + // Before haxe 5 + This is a bit hackshish : we want to detect the type of exception filtered by the catch so we check the following + sequence of HL opcodes: + + trap E,@catch + ... + @catch: + global R, _ + call _, ???(R,E) + + ??? is expected to be hl.BaseType.check + */ + hl_function *f = ctx->fun; + hl_opcode *cat = f->ops + ctx->op_pos + 1; + hl_opcode *next = f->ops + ctx->op_pos + 1 + o->p2; + hl_opcode *next2 = f->ops + ctx->op_pos + 2 + o->p2; + void *addr = NULL; + if( cat->op == OCatch || (next->op == OGetGlobal && next2->op == OCall2 && next2->p3 == next->p1 && dst->id == (int)(int_val)next2->extra) ) { + int gindex = cat->op == OCatch ? cat->p1 : next->p2; + hl_type *gt = m->code->globals[gindex]; + while( gt->kind == HOBJ && gt->obj->super ) gt = gt->obj->super; + if( gt->kind == HOBJ && gt->obj->nfields && gt->obj->fields[0].t->kind == HTYPE ) + addr = m->globals_data + m->globals_indexes[gindex]; + } + STORE_MEM(st, (int)(int_val)&trap->tcheck, addr ? LOAD_MEM_PTR(LOAD_CONST_PTR(addr),0) : LOAD_CONST_PTR(NULL)); + + void *fun = setjmp; + ereg args[2]; + int nargs = 1; + args[0] = OFFSET(st, ENULL, 0, (int)(int_val)&trap->buf); +#if defined(HL_WIN) && defined(HL_64) + // On Win64 setjmp actually takes two arguments + // the jump buffer and the frame pointer (or the stack pointer if there is no FP) + nargs = 2; + args[1] = emit_gen(ctx, NATIVE_REG, ENULL, ENULL, REG_RBP); +#endif +#ifdef HL_MINGW + fun = _setjmp; +#endif + ereg ret = emit_native_call(ctx, fun, args, nargs, &hlt_i32); + emit_test(ctx, ret, OJNull); + int jskip = emit_jump(ctx, true); + emit_gen_size(ctx, FREE_STACK, sizeof(hl_trap_ctx)); + STORE(dst, tinf ? LOAD_CONST_PTR(&tinf->exc_value) : LOAD_MEM_PTR(thread,(int)(int_val)&tinf->exc_value)); + + int jtrap = emit_jump(ctx, false); + register_jump(ctx, jtrap, o->p2); + patch_jump(ctx, jskip); + + if( ctx->trap_count == MAX_TRAPS ) jit_error("Too many try/catch depth"); + ctx->traps[ctx->trap_count++] = st; + } + break; + case OEndTrap: + { + if( ctx->trap_count == 0 ) jit_assert(); + ereg st = ctx->traps[ctx->trap_count - 1]; + + ereg thread, current_addr; + static hl_thread_info *tinf = NULL; + static hl_trap_ctx *trap = NULL; +# ifndef HL_THREADS + if( tinf == NULL ) tinf = hl_get_thread(); + current_addr = LOAD_CONST_PTR(&tinf->trap_current); +# else + thread = emit_native_call(ctx, hl_get_thread, NULL, 0, &hlt_bytes); + current_addr = OFFSET(thread, ENULL, 0, (int)(int_val)&tinf->trap_current); +# endif + + STORE_MEM(current_addr, 0, LOAD_MEM_PTR(st,(int)(int_val)&trap->prev)); + +# ifdef HL_WIN + // erase eip (prevent false positive in exception stack) + { + _JUMP_BUFFER *b = NULL; +# ifdef HL_64 + int offset = (int)(int_val)&(b->Rip); +# else + int offset = (int)(int_val)&(b->Eip); +# endif + STORE_MEM(st, offset, LOAD_CONST_PTR(NULL)); + } +# endif + + emit_gen_size(ctx, FREE_STACK, sizeof(hl_trap_ctx)); + } + break; + case OSwitch: + { + ereg v = LOAD(dst); + int count = o->p2; + emit_gen_ext(ctx, CMP, v, LOAD_CONST(count,&hlt_i32), 0, OJUGte); + patch_instr_mode(ctx, M_I32); + int jdefault = emit_jump(ctx, true); + emit_gen_ext(ctx, JUMP_TABLE, v, ENULL, 0, count); + for(int i=0; iextra[i]); + } + patch_jump(ctx, jdefault); + } + break; + case OGetTID: + STORE(dst, LOAD_MEM(LOAD(ra),0,&hlt_i32)); + break; + case OAssert: + emit_native_call(ctx, hl_jit_assert, NULL, 0, &hlt_void); + break; + case ONop: + break; + case OPrefetch: + { + ereg r = LOAD(dst); + if( o->p2 > 0 ) { + switch( dst->t->kind ) { + case HOBJ: + case HSTRUCT: + { + hl_runtime_obj *rt = hl_get_obj_rt(dst->t); + r = OFFSET(r, ENULL, 0, rt->fields_indexes[o->p2-1]); + } + break; + default: + jit_assert(); + break; + } + } + emit_gen(ctx, PREFETCH, r, ENULL, o->p3); + } + break; + case OAsm: + jit_assert(); + break; + case OCatch: + // Only used by OTrap typing + break; + default: + jit_error(hl_op_name(o->op)); + break; + } +} diff --git a/src/jit_old.c b/src/jit_old.c new file mode 100644 index 000000000..7e4e6e88b --- /dev/null +++ b/src/jit_old.c @@ -0,0 +1,4730 @@ +/* + * Copyright (C)2015-2016 Haxe Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ +#ifdef _MSC_VER +#pragma warning(disable:4820) +#endif +#include +#include +#include "hlsystem.h" + +#ifdef __arm__ +# error "JIT does not support ARM processors, only x86 and x86-64 are supported, please use HashLink/C native compilation instead" +#endif + +#ifdef HL_DEBUG +# define JIT_DEBUG +#endif + +typedef enum { + Eax = 0, + Ecx = 1, + Edx = 2, + Ebx = 3, + Esp = 4, + Ebp = 5, + Esi = 6, + Edi = 7, +#ifdef HL_64 + R8 = 8, + R9 = 9, + R10 = 10, + R11 = 11, + R12 = 12, + R13 = 13, + R14 = 14, + R15 = 15, +#endif + _LAST = 0xFF +} CpuReg; + +typedef enum { + MOV, + LEA, + PUSH, + ADD, + SUB, + IMUL, // only overflow flag changes compared to MUL + DIV, + IDIV, + CDQ, + CDQE, + POP, + RET, + CALL, + AND, + OR, + XOR, + CMP, + TEST, + NOP, + SHL, + SHR, + SAR, + INC, + DEC, + JMP, + // FPU + FSTP, + FSTP32, + FLD, + FLD32, + FLDCW, + // SSE + MOVSD, + MOVSS, + COMISD, + COMISS, + ADDSD, + SUBSD, + MULSD, + DIVSD, + ADDSS, + SUBSS, + MULSS, + DIVSS, + XORPD, + CVTSI2SD, + CVTSI2SS, + CVTSD2SI, + CVTSD2SS, + CVTSS2SD, + CVTSS2SI, + STMXCSR, + LDMXCSR, + // 8-16 bits + MOV8, + CMP8, + TEST8, + PUSH8, + MOV16, + CMP16, + TEST16, + // prefetchs + PREFETCHT0, + PREFETCHT1, + PREFETCHT2, + PREFETCHNTA, + PREFETCHW, + // -- + _CPU_LAST +} CpuOp; + +#define JAlways 0 +#define JOverflow 0x80 +#define JULt 0x82 +#define JUGte 0x83 +#define JEq 0x84 +#define JNeq 0x85 +#define JULte 0x86 +#define JUGt 0x87 +#define JParity 0x8A +#define JNParity 0x8B +#define JSLt 0x8C +#define JSGte 0x8D +#define JSLte 0x8E +#define JSGt 0x8F + +#define JCarry JLt +#define JZero JEq +#define JNotZero JNeq + +#define B(bv) *ctx->buf.b++ = (unsigned char)(bv) +#define W(wv) *ctx->buf.w++ = wv + +#ifdef HL_64 +# define W64(wv) *ctx->buf.w64++ = wv +#else +# define W64(wv) W(wv) +#endif + +static const int SIB_MULT[] = {-1, 0, 1, -1, 2, -1, -1, -1, 3}; + +#define MOD_RM(mod,reg,rm) B(((mod) << 6) | (((reg)&7) << 3) | ((rm)&7)) +#define SIB(mult,rmult,rbase) B((SIB_MULT[mult]<<6) | (((rmult)&7)<<3) | ((rbase)&7)) +#define IS_SBYTE(c) ( (c) >= -128 && (c) < 128 ) + +#define AddJump(how,local) { if( (how) == JAlways ) { B(0xE9); } else { B(0x0F); B(how); }; local = BUF_POS(); W(0); } +#define AddJump_small(how,local) { if( (how) == JAlways ) { B(0xEB); } else B(how - 0x10); local = BUF_POS() | 0x40000000; B(0); } +#define XJump(how,local) AddJump(how,local) +#define XJump_small(how,local) AddJump_small(how,local) + +#define MAX_OP_SIZE 256 + +#define BUF_POS() ((int)(ctx->buf.b - ctx->startBuf)) +#define RTYPE(r) r->t->kind + +#ifdef HL_64 +# define RESERVE_ADDRESS 0x8000000000000000 +#else +# define RESERVE_ADDRESS 0x80000000 +#endif + +#if defined(HL_WIN_CALL) && defined(HL_64) +# define IS_WINCALL64 1 +#else +# define IS_WINCALL64 0 +#endif + +typedef struct jlist jlist; +struct jlist { + int pos; + int target; + jlist *next; +}; + +typedef struct vreg vreg; + +typedef enum { + RCPU = 0, + RFPU = 1, + RSTACK = 2, + RCONST = 3, + RADDR = 4, + RMEM = 5, + RUNUSED = 6, + RCPU_CALL = 1 | 8, + RCPU_8BITS = 1 | 16 +} preg_kind; + +typedef struct { + preg_kind kind; + int id; + int lock; + vreg *holds; +} preg; + +struct vreg { + int stackPos; + int size; + hl_type *t; + preg *current; + preg stack; +}; + +#define REG_AT(i) (ctx->pregs + (i)) + +#ifdef HL_64 +# define RCPU_COUNT 16 +# define RFPU_COUNT 16 +# ifdef HL_WIN_CALL +# define CALL_NREGS 4 +# define RCPU_SCRATCH_COUNT 7 +# define RFPU_SCRATCH_COUNT 6 +static const int RCPU_SCRATCH_REGS[] = { Eax, Ecx, Edx, R8, R9, R10, R11 }; +static const CpuReg CALL_REGS[] = { Ecx, Edx, R8, R9 }; +# else +# define CALL_NREGS 6 // TODO : XMM6+XMM7 are FPU reg parameters +# define RCPU_SCRATCH_COUNT 9 +# define RFPU_SCRATCH_COUNT 16 +static const int RCPU_SCRATCH_REGS[] = { Eax, Ecx, Edx, Esi, Edi, R8, R9, R10, R11 }; +static const CpuReg CALL_REGS[] = { Edi, Esi, Edx, Ecx, R8, R9 }; +# endif +#else +# define CALL_NREGS 0 +# define RCPU_COUNT 8 +# define RFPU_COUNT 8 +# define RCPU_SCRATCH_COUNT 3 +# define RFPU_SCRATCH_COUNT 8 +static const int RCPU_SCRATCH_REGS[] = { Eax, Ecx, Edx }; +#endif + +#define XMM(i) ((i) + RCPU_COUNT) +#define PXMM(i) REG_AT(XMM(i)) +#define REG_IS_FPU(i) ((i) >= RCPU_COUNT) + +#define PEAX REG_AT(Eax) +#define PESP REG_AT(Esp) +#define PEBP REG_AT(Ebp) + +#define REG_COUNT (RCPU_COUNT + RFPU_COUNT) + +#define ID2(a,b) ((a) | ((b)<<8)) +#define R(id) (ctx->vregs + (id)) +#define ASSERT(i) { printf("JIT ERROR %d (jit.c line %d)\n",i,(int)__LINE__); jit_exit(); } +#define IS_FLOAT(r) ((r)->t->kind == HF64 || (r)->t->kind == HF32) +#define RLOCK(r) if( (r)->lock < ctx->currentPos ) (r)->lock = ctx->currentPos +#define RUNLOCK(r) if( (r)->lock == ctx->currentPos ) (r)->lock = 0 + +#define BREAK() B(0xCC) + +static preg _unused = { RUNUSED, 0, 0, NULL }; +static preg *UNUSED = &_unused; + +struct _jit_ctx { + union { + unsigned char *b; + unsigned int *w; + unsigned long long *w64; + int *i; + double *d; + } buf; + vreg *vregs; + preg pregs[REG_COUNT]; + vreg *savedRegs[REG_COUNT]; + int savedLocks[REG_COUNT]; + int *opsPos; + int maxRegs; + int maxOps; + int bufSize; + int totalRegsSize; + int functionPos; + int allocOffset; + int currentPos; + int nativeArgsCount; + unsigned char *startBuf; + hl_module *m; + hl_function *f; + jlist *jumps; + jlist *calls; + jlist *switchs; + hl_alloc falloc; // cleared per-function + hl_alloc galloc; + vclosure *closure_list; + hl_debug_infos *debug; + int c2hl; + int hl2c; + void *static_functions[8]; + bool static_function_offset; +#ifdef WIN64_UNWIND_TABLES + int unwind_offset; + int nunwind; + PRUNTIME_FUNCTION unwind_table; +#endif +}; + +#ifdef WIN64_UNWIND_TABLES + +typedef enum _UNWIND_OP_CODES +{ + UWOP_PUSH_NONVOL = 0, /* info == register number */ + UWOP_ALLOC_LARGE, /* no info, alloc size in next 2 slots */ + UWOP_ALLOC_SMALL, /* info == size of allocation / 8 - 1 */ + UWOP_SET_FPREG, /* no info, FP = RSP + UNWIND_INFO.FPRegOffset*16 */ + UWOP_SAVE_NONVOL, /* info == register number, offset in next slot */ + UWOP_SAVE_NONVOL_FAR, /* info == register number, offset in next 2 slots */ + UWOP_SAVE_XMM128 = 8, /* info == XMM reg number, offset in next slot */ + UWOP_SAVE_XMM128_FAR, /* info == XMM reg number, offset in next 2 slots */ + UWOP_PUSH_MACHFRAME /* info == 0: no error-code, 1: error-code */ +} UNWIND_CODE_OPS; + +void write_uwcode(jit_ctx *ctx, unsigned char offset, UNWIND_CODE_OPS code, unsigned char info) +{ + B(offset); + B((code) | (info) << 4); +} + +void write_unwind_data(jit_ctx *ctx) +{ + // All generated functions use a frame pointer, so the same unwind info can be used for all of them + unsigned char version = 1; + unsigned char flags = 0; + unsigned char CountOfCodes = 2; + unsigned char SizeOfProlog = 4; + unsigned char FrameRegister = 5; // RBP + unsigned char FrameOffset = 0; + B((version) | (flags) << 3); + B(SizeOfProlog); + B(CountOfCodes); + B((FrameRegister) | (FrameOffset) << 4); + write_uwcode(ctx, 4, UWOP_SET_FPREG, 0); + write_uwcode(ctx, 1, UWOP_PUSH_NONVOL, 5); +} +#endif + +#define jit_exit() { hl_debug_break(); exit(-1); } +#define jit_error(msg) _jit_error(ctx,msg,__LINE__) + +#ifndef HL_64 +# ifdef HL_DEBUG +# define error_i64() jit_error("i64-32") +# else +void error_i64() { + printf("The module you are loading is using 64 bit ints that are not supported by the HL32.\nPlease run using HL64 or compile with -D hl-legacy32"); + jit_exit(); +} +# endif +#endif + +static void _jit_error( jit_ctx *ctx, const char *msg, int line ); +static void on_jit_error( const char *msg, int_val line ); + +static preg *pmem( preg *r, CpuReg reg, int offset ) { + r->kind = RMEM; + r->id = 0 | (reg << 4) | (offset << 8); + return r; +} + +static preg *pmem2( preg *r, CpuReg reg, CpuReg reg2, int mult, int offset ) { + r->kind = RMEM; + r->id = mult | (reg << 4) | (reg2 << 8); + r->holds = (void*)(int_val)offset; + return r; +} + +#ifdef HL_64 +static preg *pcodeaddr( preg *r, int offset ) { + r->kind = RMEM; + r->id = 15 | (offset << 4); + return r; +} +#endif + +static preg *pconst( preg *r, int c ) { + r->kind = RCONST; + r->holds = NULL; + r->id = c; + return r; +} + +static preg *pconst64( preg *r, int_val c ) { +#ifdef HL_64 + if( ((int)c) == c ) + return pconst(r,(int)c); + r->kind = RCONST; + r->id = 0xC064C064; + r->holds = (vreg*)c; + return r; +#else + return pconst(r,(int)c); +#endif +} + +#ifndef HL_64 +// it is not possible to access direct 64 bit address in x86-64 +static preg *paddr( preg *r, void *p ) { + r->kind = RADDR; + r->holds = (vreg*)p; + return r; +} +#endif + +static void save_regs( jit_ctx *ctx ) { + int i; + for(i=0;isavedRegs[i] = ctx->pregs[i].holds; + ctx->savedLocks[i] = ctx->pregs[i].lock; + } +} + +static void restore_regs( jit_ctx *ctx ) { + int i; + for(i=0;imaxRegs;i++) + ctx->vregs[i].current = NULL; + for(i=0;isavedRegs[i]; + preg *p = ctx->pregs + i; + p->holds = r; + p->lock = ctx->savedLocks[i]; + if( r ) r->current = p; + } +} + +static void jit_buf( jit_ctx *ctx ) { + if( BUF_POS() > ctx->bufSize - MAX_OP_SIZE ) { + int nsize = ctx->bufSize * 4 / 3; + unsigned char *nbuf; + int curpos; + if( nsize == 0 ) { + int i; + for(i=0;im->code->nfunctions;i++) + nsize += ctx->m->code->functions[i].nops; + nsize *= 4; + } + if( nsize < ctx->bufSize + MAX_OP_SIZE * 4 ) nsize = ctx->bufSize + MAX_OP_SIZE * 4; + curpos = BUF_POS(); + nbuf = (unsigned char*)malloc(nsize); + if( nbuf == NULL ) ASSERT(nsize); + if( ctx->startBuf ) { + memcpy(nbuf,ctx->startBuf,curpos); + free(ctx->startBuf); + } + ctx->startBuf = nbuf; + ctx->buf.b = nbuf + curpos; + ctx->bufSize = nsize; + } +} + +static const char *KNAMES[] = { "cpu","fpu","stack","const","addr","mem","unused" }; +#define ERRIF(c) if( c ) { printf("%s(%s,%s)\n",f?f->name:"???",KNAMES[a->kind], KNAMES[b->kind]); ASSERT(0); } + +typedef struct { + const char *name; // single operand + int r_mem; // r32 / r/m32 r32 + int mem_r; // r/m32 / r32 r/m32 + int r_const; // r32 / imm32 imm32 + int r_i8; // r32 / imm8 imm8 + int mem_const; // r/m32 / imm32 N/A +} opform; + +#define FLAG_LONGOP 0x80000000 +#define FLAG_16B 0x40000000 +#define FLAG_8B 0x20000000 +#define FLAG_DUAL 0x10000000 + +#define RM(op,id) ((op) | (((id)+1)<<8)) +#define GET_RM(op) (((op) >> ((op) < 0 ? 24 : 8)) & 15) +#define SBYTE(op) ((op) << 16) +#define LONG_OP(op) ((op) | FLAG_LONGOP) +#define OP16(op) LONG_OP((op) | FLAG_16B) +#define LONG_RM(op,id) LONG_OP(op | (((id) + 1) << 24)) + +static opform OP_FORMS[_CPU_LAST] = { + { "MOV", 0x8B, 0x89, 0xB8, 0, RM(0xC7,0) }, + { "LEA", 0x8D }, + { "PUSH", 0x50, RM(0xFF,6), 0x68, 0x6A }, + { "ADD", 0x03, 0x01, RM(0x81,0), RM(0x83,0) }, + { "SUB", 0x2B, 0x29, RM(0x81,5), RM(0x83,5) }, + { "IMUL", LONG_OP(0x0FAF), 0, 0x69 | FLAG_DUAL, 0x6B | FLAG_DUAL }, + { "DIV", RM(0xF7,6), RM(0xF7,6) }, + { "IDIV", RM(0xF7,7), RM(0xF7,7) }, + { "CDQ", 0x99 }, + { "CDQE", 0x98 }, + { "POP", 0x58, RM(0x8F,0) }, + { "RET", 0xC3 }, + { "CALL", RM(0xFF,2), RM(0xFF,2), 0xE8 }, + { "AND", 0x23, 0x21, RM(0x81,4), RM(0x83,4) }, + { "OR", 0x0B, 0x09, RM(0x81,1), RM(0x83,1) }, + { "XOR", 0x33, 0x31, RM(0x81,6), RM(0x83,6) }, + { "CMP", 0x3B, 0x39, RM(0x81,7), RM(0x83,7) }, + { "TEST", 0x85, 0x85/*SWP?*/, RM(0xF7,0) }, + { "NOP", 0x90 }, + { "SHL", RM(0xD3,4), 0, 0, RM(0xC1,4) }, + { "SHR", RM(0xD3,5), 0, 0, RM(0xC1,5) }, + { "SAR", RM(0xD3,7), 0, 0, RM(0xC1,7) }, + { "INC", IS_64 ? RM(0xFF,0) : 0x40, RM(0xFF,0) }, + { "DEC", IS_64 ? RM(0xFF,1) : 0x48, RM(0xFF,1) }, + { "JMP", RM(0xFF,4) }, + // FPU + { "FSTP", 0, RM(0xDD,3) }, + { "FSTP32", 0, RM(0xD9,3) }, + { "FLD", 0, RM(0xDD,0) }, + { "FLD32", 0, RM(0xD9,0) }, + { "FLDCW", 0, RM(0xD9, 5) }, + // SSE + { "MOVSD", 0xF20F10, 0xF20F11 }, + { "MOVSS", 0xF30F10, 0xF30F11 }, + { "COMISD", 0x660F2F }, + { "COMISS", LONG_OP(0x0F2F) }, + { "ADDSD", 0xF20F58 }, + { "SUBSD", 0xF20F5C }, + { "MULSD", 0xF20F59 }, + { "DIVSD", 0xF20F5E }, + { "ADDSS", 0xF30F58 }, + { "SUBSS", 0xF30F5C }, + { "MULSS", 0xF30F59 }, + { "DIVSS", 0xF30F5E }, + { "XORPD", 0x660F57 }, + { "CVTSI2SD", 0xF20F2A }, + { "CVTSI2SS", 0xF30F2A }, + { "CVTSD2SI", 0xF20F2D }, + { "CVTSD2SS", 0xF20F5A }, + { "CVTSS2SD", 0xF30F5A }, + { "CVTSS2SI", 0xF30F2D }, + { "STMXCSR", 0, LONG_RM(0x0FAE,3) }, + { "LDMXCSR", 0, LONG_RM(0x0FAE,2) }, + // 8 bits, + { "MOV8", 0x8A, 0x88, 0, 0xB0, RM(0xC6,0) }, + { "CMP8", 0x3A, 0x38, 0, RM(0x80,7) }, + { "TEST8", 0x84, 0x84, RM(0xF6,0) }, + { "PUSH8", 0, 0, 0x6A | FLAG_8B }, + { "MOV16", OP16(0x8B), OP16(0x89), OP16(0xB8) }, + { "CMP16", OP16(0x3B), OP16(0x39) }, + { "TEST16", OP16(0x85) }, + // prefetchs + { "PREFETCHT0", 0, LONG_RM(0x0F18,1) }, + { "PREFETCHT1", 0, LONG_RM(0x0F18,2) }, + { "PREFETCHT2", 0, LONG_RM(0x0F18,3) }, + { "PREFETCHNTA", 0, LONG_RM(0x0F18,0) }, + { "PREFETCHW", 0, LONG_RM(0x0F0D,1) }, +}; + +#ifdef HL_64 +# define REX() if( r64 ) B(r64 | 0x40) +#else +# define REX() +#endif + +#define OP(b) \ + if( (b) & 0xFF0000 ) { \ + B((b)>>16); \ + if( r64 ) B(r64 | 0x40); /* also in 32 bits mode */ \ + B((b)>>8); \ + B(b); \ + } else { \ + if( (b) & FLAG_16B ) { \ + B(0x66); \ + REX(); \ + } else {\ + REX(); \ + if( (b) & FLAG_LONGOP ) B((b)>>8); \ + }\ + B(b); \ + } + +static bool is_reg8( preg *a ) { + return a->kind == RSTACK || a->kind == RMEM || a->kind == RCONST || (a->kind == RCPU && a->id != Esi && a->id != Edi); +} + +static void op( jit_ctx *ctx, CpuOp o, preg *a, preg *b, bool mode64 ) { + opform *f = &OP_FORMS[o]; + int r64 = mode64 && (o != PUSH && o != POP && o != CALL && o != PUSH8 && o < PREFETCHT0) ? 8 : 0; + switch( o ) { + case CMP8: + case TEST8: + case MOV8: + if( !is_reg8(a) || !is_reg8(b) ) + ASSERT(0); + break; + default: + break; + } + switch( ID2(a->kind,b->kind) ) { + case ID2(RUNUSED,RUNUSED): + ERRIF(f->r_mem == 0); + OP(f->r_mem); + break; + case ID2(RCPU,RCPU): + case ID2(RFPU,RFPU): + ERRIF( f->r_mem == 0 ); + if( a->id > 7 ) r64 |= 4; + if( b->id > 7 ) r64 |= 1; + OP(f->r_mem); + MOD_RM(3,a->id,b->id); + break; + case ID2(RCPU,RFPU): + case ID2(RFPU,RCPU): + ERRIF( (f->r_mem>>16) == 0 ); + if( a->id > 7 ) r64 |= 4; + if( b->id > 7 ) r64 |= 1; + OP(f->r_mem); + MOD_RM(3,a->id,b->id); + break; + case ID2(RCPU,RUNUSED): + ERRIF( f->r_mem == 0 ); + if( a->id > 7 ) r64 |= 1; + if( GET_RM(f->r_mem) > 0 ) { + OP(f->r_mem); + MOD_RM(3, GET_RM(f->r_mem)-1, a->id); + } else + OP(f->r_mem + (a->id&7)); + break; + case ID2(RSTACK,RUNUSED): + ERRIF( f->mem_r == 0 || GET_RM(f->mem_r) == 0 ); + { + int stackPos = R(a->id)->stackPos; + OP(f->mem_r); + if( IS_SBYTE(stackPos) ) { + MOD_RM(1,GET_RM(f->mem_r)-1,Ebp); + B(stackPos); + } else { + MOD_RM(2,GET_RM(f->mem_r)-1,Ebp); + W(stackPos); + } + } + break; + case ID2(RCPU,RCONST): + ERRIF( f->r_const == 0 && f->r_i8 == 0 ); + if( a->id > 7 ) r64 |= 1; + { + int_val cval = b->holds ? (int_val)b->holds : b->id; + // short byte form + if( f->r_i8 && IS_SBYTE(cval) ) { + if( (f->r_i8&FLAG_DUAL) && a->id > 7 ) r64 |= 4; + OP(f->r_i8); + if( (f->r_i8&FLAG_DUAL) ) MOD_RM(3,a->id,a->id); else MOD_RM(3,GET_RM(f->r_i8)-1,a->id); + B((int)cval); + } else if( GET_RM(f->r_const) > 0 || (f->r_const&FLAG_DUAL) ) { + if( (f->r_i8&FLAG_DUAL) && a->id > 7 ) r64 |= 4; + OP(f->r_const&0xFF); + if( (f->r_i8&FLAG_DUAL) ) MOD_RM(3,a->id,a->id); else MOD_RM(3,GET_RM(f->r_const)-1,a->id); + if( mode64 && IS_64 && o == MOV ) W64(cval); else W((int)cval); + } else { + ERRIF( f->r_const == 0); + OP((f->r_const&0xFF) + (a->id&7)); + if( mode64 && IS_64 && o == MOV ) W64(cval); else W((int)cval); + } + } + break; + case ID2(RSTACK,RCPU): + case ID2(RSTACK,RFPU): + ERRIF( f->mem_r == 0 ); + if( b->id > 7 ) r64 |= 4; + { + int stackPos = R(a->id)->stackPos; + OP(f->mem_r); + if( IS_SBYTE(stackPos) ) { + MOD_RM(1,b->id,Ebp); + B(stackPos); + } else { + MOD_RM(2,b->id,Ebp); + W(stackPos); + } + } + break; + case ID2(RCPU,RSTACK): + case ID2(RFPU,RSTACK): + ERRIF( f->r_mem == 0 ); + if( a->id > 7 ) r64 |= 4; + { + int stackPos = R(b->id)->stackPos; + OP(f->r_mem); + if( IS_SBYTE(stackPos) ) { + MOD_RM(1,a->id,Ebp); + B(stackPos); + } else { + MOD_RM(2,a->id,Ebp); + W(stackPos); + } + } + break; + case ID2(RCONST,RUNUSED): + ERRIF( f->r_const == 0 ); + { + int_val cval = a->holds ? (int_val)a->holds : a->id; + OP(f->r_const); + if( f->r_const & FLAG_8B ) B((int)cval); else W((int)cval); + } + break; + case ID2(RMEM,RUNUSED): + ERRIF( f->mem_r == 0 ); + { + int mult = a->id & 0xF; + int regOrOffs = mult == 15 ? a->id >> 4 : a->id >> 8; + CpuReg reg = (a->id >> 4) & 0xF; + if( mult == 15 ) { + ERRIF(1); + } else if( mult == 0 ) { + if( reg > 7 ) r64 |= 1; + OP(f->mem_r); + if( regOrOffs == 0 && (reg&7) != Ebp ) { + MOD_RM(0,GET_RM(f->mem_r)-1,reg); + if( (reg&7) == Esp ) B(0x24); + } else if( IS_SBYTE(regOrOffs) ) { + MOD_RM(1,GET_RM(f->mem_r)-1,reg); + if( (reg&7) == Esp ) B(0x24); + B(regOrOffs); + } else { + MOD_RM(2,GET_RM(f->mem_r)-1,reg); + if( (reg&7) == Esp ) B(0x24); + W(regOrOffs); + } + } else { + // [eax + ebx * M] + ERRIF(1); + } + } + break; + case ID2(RCPU, RMEM): + case ID2(RFPU, RMEM): + ERRIF( f->r_mem == 0 ); + { + int mult = b->id & 0xF; + int regOrOffs = mult == 15 ? b->id >> 4 : b->id >> 8; + CpuReg reg = (b->id >> 4) & 0xF; + if( mult == 15 ) { + int pos; + if( a->id > 7 ) r64 |= 4; + OP(f->r_mem); + MOD_RM(0,a->id,5); + if( IS_64 ) { + // offset wrt current code + pos = BUF_POS() + 4; + W(regOrOffs - pos); + } else { + ERRIF(1); + } + } else if( mult == 0 ) { + if( a->id > 7 ) r64 |= 4; + if( reg > 7 ) r64 |= 1; + OP(f->r_mem); + if( regOrOffs == 0 && (reg&7) != Ebp ) { + MOD_RM(0,a->id,reg); + if( (reg&7) == Esp ) B(0x24); + } else if( IS_SBYTE(regOrOffs) ) { + MOD_RM(1,a->id,reg); + if( (reg&7) == Esp ) B(0x24); + B(regOrOffs); + } else { + MOD_RM(2,a->id,reg); + if( (reg&7) == Esp ) B(0x24); + W(regOrOffs); + } + } else { + int offset = (int)(int_val)b->holds; + if( a->id > 7 ) r64 |= 4; + if( reg > 7 ) r64 |= 1; + if( regOrOffs > 7 ) r64 |= 2; + OP(f->r_mem); + MOD_RM(offset == 0 ? 0 : IS_SBYTE(offset) ? 1 : 2,a->id,4); + SIB(mult,regOrOffs,reg); + if( offset ) { + if( IS_SBYTE(offset) ) B(offset); else W(offset); + } + } + } + break; +# ifndef HL_64 + case ID2(RFPU,RADDR): +# endif + case ID2(RCPU,RADDR): + ERRIF( f->r_mem == 0 ); + if( a->id > 7 ) r64 |= 4; + OP(f->r_mem); + MOD_RM(0,a->id,5); + if( IS_64 ) + W64((int_val)b->holds); + else + W((int)(int_val)b->holds); + break; +# ifndef HL_64 + case ID2(RADDR,RFPU): +# endif + case ID2(RADDR,RCPU): + ERRIF( f->mem_r == 0 ); + if( b->id > 7 ) r64 |= 4; + OP(f->mem_r); + MOD_RM(0,b->id,5); + if( IS_64 ) + W64((int_val)a->holds); + else + W((int)(int_val)a->holds); + break; + case ID2(RMEM, RCPU): + case ID2(RMEM, RFPU): + ERRIF( f->mem_r == 0 ); + { + int mult = a->id & 0xF; + int regOrOffs = mult == 15 ? a->id >> 4 : a->id >> 8; + CpuReg reg = (a->id >> 4) & 0xF; + if( mult == 15 ) { + int pos; + if( b->id > 7 ) r64 |= 4; + OP(f->mem_r); + MOD_RM(0,b->id,5); + if( IS_64 ) { + // offset wrt current code + pos = BUF_POS() + 4; + W(regOrOffs - pos); + } else { + ERRIF(1); + } + } else if( mult == 0 ) { + if( b->id > 7 ) r64 |= 4; + if( reg > 7 ) r64 |= 1; + OP(f->mem_r); + if( regOrOffs == 0 && (reg&7) != Ebp ) { + MOD_RM(0,b->id,reg); + if( (reg&7) == Esp ) B(0x24); + } else if( IS_SBYTE(regOrOffs) ) { + MOD_RM(1,b->id,reg); + if( (reg&7) == Esp ) B(0x24); + B(regOrOffs); + } else { + MOD_RM(2,b->id,reg); + if( (reg&7) == Esp ) B(0x24); + W(regOrOffs); + } + } else { + int offset = (int)(int_val)a->holds; + if( b->id > 7 ) r64 |= 4; + if( reg > 7 ) r64 |= 1; + if( regOrOffs > 7 ) r64 |= 2; + OP(f->mem_r); + MOD_RM(offset == 0 ? 0 : IS_SBYTE(offset) ? 1 : 2,b->id,4); + SIB(mult,regOrOffs,reg); + if( offset ) { + if( IS_SBYTE(offset) ) B(offset); else W(offset); + } + } + } + break; + default: + ERRIF(1); + } + if( ctx->debug && ctx->f && o == CALL ) { + preg p; + op(ctx,MOV,pmem(&p,Esp,-HL_WSIZE),PEBP,true); // erase EIP (clean stack report) + } +} + +static void op32( jit_ctx *ctx, CpuOp o, preg *a, preg *b ) { + op(ctx,o,a,b,false); +} + +static void op64( jit_ctx *ctx, CpuOp o, preg *a, preg *b ) { +#ifndef HL_64 + op(ctx,o,a,b,false); +#else + op(ctx,o,a,b,true); +#endif +} + +static void patch_jump( jit_ctx *ctx, int p ) { + if( p == 0 ) return; + if( p & 0x40000000 ) { + int d; + p &= 0x3FFFFFFF; + d = BUF_POS() - (p + 1); + if( d < -128 || d >= 128 ) ASSERT(d); + *(char*)(ctx->startBuf + p) = (char)d; + } else { + *(int*)(ctx->startBuf + p) = BUF_POS() - (p + 4); + } +} + +static void patch_jump_to( jit_ctx *ctx, int p, int target ) { + if( p == 0 ) return; + if( p & 0x40000000 ) { + int d; + p &= 0x3FFFFFFF; + d = target - (p + 1); + if( d < -128 || d >= 128 ) ASSERT(d); + *(char*)(ctx->startBuf + p) = (char)d; + } else { + *(int*)(ctx->startBuf + p) = target - (p + 4); + } +} + +static int stack_size( hl_type *t ) { + switch( t->kind ) { + case HUI8: + case HUI16: + case HBOOL: +# ifdef HL_64 + case HI32: + case HF32: +# endif + return sizeof(int_val); + case HI64: + default: + return hl_type_size(t); + } +} + +static int call_reg_index( int reg ) { +# ifdef HL_64 + int i; + for(i=0;ikind == RFPU ) + return p->id < CALL_NREGS; + for(i=0;ikind == RCPU && p->id == CALL_REGS[i] ) + return true; + return false; +# else + return false; +# endif +} + +static preg *alloc_reg( jit_ctx *ctx, preg_kind k ) { + int i; + preg *p; + switch( k ) { + case RCPU: + case RCPU_CALL: + case RCPU_8BITS: + { + int off = ctx->allocOffset++; + const int count = RCPU_SCRATCH_COUNT; + for(i=0;ipregs + r; + if( p->lock >= ctx->currentPos ) continue; + if( k == RCPU_CALL && is_call_reg(p) ) continue; + if( k == RCPU_8BITS && !is_reg8(p) ) continue; + if( p->holds == NULL ) { + RLOCK(p); + return p; + } + } + for(i=0;ipregs + RCPU_SCRATCH_REGS[(i + off)%count]; + if( p->lock >= ctx->currentPos ) continue; + if( k == RCPU_CALL && is_call_reg(p) ) continue; + if( k == RCPU_8BITS && !is_reg8(p) ) continue; + if( p->holds ) { + RLOCK(p); + p->holds->current = NULL; + p->holds = NULL; + return p; + } + } + } + break; + case RFPU: + { + int off = ctx->allocOffset++; + const int count = RFPU_SCRATCH_COUNT; + for(i=0;ilock >= ctx->currentPos ) continue; + if( p->holds == NULL ) { + RLOCK(p); + return p; + } + } + for(i=0;ilock >= ctx->currentPos ) continue; + if( p->holds ) { + RLOCK(p); + p->holds->current = NULL; + p->holds = NULL; + return p; + } + } + } + break; + default: + ASSERT(k); + } + ASSERT(0); // out of registers ! + return NULL; +} + +static preg *fetch( vreg *r ) { + if( r->current ) + return r->current; + return &r->stack; +} + +static void scratch( preg *r ) { + if( r && r->holds ) { + r->holds->current = NULL; + r->holds = NULL; + r->lock = 0; + } +} + +static preg *copy( jit_ctx *ctx, preg *to, preg *from, int size ); + +static void load( jit_ctx *ctx, preg *r, vreg *v ) { + preg *from = fetch(v); + if( from == r || v->size == 0 ) return; + if( r->holds ) r->holds->current = NULL; + if( v->current ) { + v->current->holds = NULL; + from = r; + } + r->holds = v; + v->current = r; + copy(ctx,r,from,v->size); +} + +static preg *alloc_fpu( jit_ctx *ctx, vreg *r, bool andLoad ) { + preg *p = fetch(r); + if( p->kind != RFPU ) { + if( !IS_FLOAT(r) && (IS_64 || r->t->kind != HI64) ) ASSERT(r->t->kind); + p = alloc_reg(ctx, RFPU); + if( andLoad ) + load(ctx,p,r); + else { + if( r->current ) + r->current->holds = NULL; + r->current = p; + p->holds = r; + } + } else + RLOCK(p); + return p; +} + +static void reg_bind( vreg *r, preg *p ) { + if( r->current ) + r->current->holds = NULL; + r->current = p; + p->holds = r; +} + +static preg *alloc_cpu( jit_ctx *ctx, vreg *r, bool andLoad ) { + preg *p = fetch(r); + if( p->kind != RCPU ) { +# ifndef HL_64 + if( r->t->kind == HI64 ) return alloc_fpu(ctx,r,andLoad); + if( r->size > 4 ) ASSERT(r->size); +# endif + p = alloc_reg(ctx, RCPU); + if( andLoad ) + load(ctx,p,r); + else + reg_bind(r,p); + } else + RLOCK(p); + return p; +} + +// allocate a register that is not a call parameter +static preg *alloc_cpu_call( jit_ctx *ctx, vreg *r ) { + preg *p = fetch(r); + if( p->kind != RCPU ) { +# ifndef HL_64 + if( r->t->kind == HI64 ) return alloc_fpu(ctx,r,true); + if( r->size > 4 ) ASSERT(r->size); +# endif + p = alloc_reg(ctx, RCPU_CALL); + load(ctx,p,r); + } else if( is_call_reg(p) ) { + preg *p2 = alloc_reg(ctx, RCPU_CALL); + op64(ctx,MOV,p2,p); + scratch(p); + reg_bind(r,p2); + return p2; + } else + RLOCK(p); + return p; +} + +static preg *fetch32( jit_ctx *ctx, vreg *r ) { + if( r->current ) + return r->current; + // make sure that the register is correctly erased + if( r->size < 4 ) { + preg *p = alloc_cpu(ctx, r, true); + RUNLOCK(p); + return p; + } + return fetch(r); +} + +// make sure higher bits are zeroes +static preg *alloc_cpu64( jit_ctx *ctx, vreg *r, bool andLoad ) { +# ifndef HL_64 + return alloc_cpu(ctx,r,andLoad); +# else + preg *p = fetch(r); + if( !andLoad ) ASSERT(0); + if( p->kind != RCPU ) { + p = alloc_reg(ctx, RCPU); + op64(ctx,XOR,p,p); + load(ctx,p,r); + } else { + // remove higher bits + preg tmp; + op64(ctx,SHL,p,pconst(&tmp,32)); + op64(ctx,SHR,p,pconst(&tmp,32)); + RLOCK(p); + } + return p; +# endif +} + +// make sure the register can be used with 8 bits access +static preg *alloc_cpu8( jit_ctx *ctx, vreg *r, bool andLoad ) { + preg *p = fetch(r); + if( p->kind != RCPU ) { + p = alloc_reg(ctx, RCPU_8BITS); + load(ctx,p,r); + } else if( !is_reg8(p) ) { + preg *p2 = alloc_reg(ctx, RCPU_8BITS); + op64(ctx,MOV,p2,p); + scratch(p); + reg_bind(r,p2); + return p2; + } else + RLOCK(p); + return p; +} + +static preg *copy( jit_ctx *ctx, preg *to, preg *from, int size ) { + if( size == 0 || to == from ) return to; + switch( ID2(to->kind,from->kind) ) { + case ID2(RMEM,RCPU): + case ID2(RSTACK,RCPU): + case ID2(RCPU,RSTACK): + case ID2(RCPU,RMEM): + case ID2(RCPU,RCPU): +# ifndef HL_64 + case ID2(RCPU,RADDR): + case ID2(RADDR,RCPU): +# endif + switch( size ) { + case 1: + if( to->kind == RCPU ) { + op64(ctx,XOR,to,to); + if( !is_reg8(to) ) { + preg p; + op32(ctx,MOV16,to,from); + op32(ctx,SHL,to,pconst(&p,24)); + op32(ctx,SHR,to,pconst(&p,24)); + break; + } + } + if( !is_reg8(from) ) { + preg *r = alloc_reg(ctx, RCPU_CALL); + op32(ctx, MOV, r, from); + RUNLOCK(r); + op32(ctx,MOV8,to,r); + return from; + } + op32(ctx,MOV8,to,from); + break; + case 2: + if( to->kind == RCPU ) + op64(ctx,XOR,to,to); + op32(ctx,MOV16,to,from); + break; + case 4: + op32(ctx,MOV,to,from); + break; + case 8: + if( IS_64 ) { + op64(ctx,MOV,to,from); + break; + } + default: + ASSERT(size); + } + return to->kind == RCPU ? to : from; + case ID2(RFPU,RFPU): + case ID2(RMEM,RFPU): + case ID2(RSTACK,RFPU): + case ID2(RFPU,RMEM): + case ID2(RFPU,RSTACK): + switch( size ) { + case 8: + op64(ctx,MOVSD,to,from); + break; + case 4: + op32(ctx,MOVSS,to,from); + break; + default: + ASSERT(size); + } + return to->kind == RFPU ? to : from; + case ID2(RMEM,RSTACK): + { + vreg *rfrom = R(from->id); + if( IS_FLOAT(rfrom) ) + return copy(ctx,to,alloc_fpu(ctx,rfrom,true),size); + return copy(ctx,to,alloc_cpu(ctx,rfrom,true),size); + } + case ID2(RMEM,RMEM): + case ID2(RSTACK,RMEM): + case ID2(RSTACK,RSTACK): +# ifndef HL_64 + case ID2(RMEM,RADDR): + case ID2(RSTACK,RADDR): + case ID2(RADDR,RSTACK): +# endif + { + preg *tmp; + if( (!IS_64 && size == 8) || (to->kind == RSTACK && IS_FLOAT(R(to->id))) || (from->kind == RSTACK && IS_FLOAT(R(from->id))) ) { + tmp = alloc_reg(ctx, RFPU); + op64(ctx,size == 8 ? MOVSD : MOVSS,tmp,from); + } else { + tmp = alloc_reg(ctx, RCPU); + copy(ctx,tmp,from,size); + } + return copy(ctx,to,tmp,size); + } +# ifdef HL_64 + case ID2(RCPU,RADDR): + case ID2(RMEM,RADDR): + case ID2(RSTACK,RADDR): + { + preg p; + preg *tmp = alloc_reg(ctx, RCPU); + op64(ctx,MOV,tmp,pconst64(&p,(int_val)from->holds)); + return copy(ctx,to,pmem(&p,tmp->id,0),size); + } + case ID2(RADDR,RCPU): + case ID2(RADDR,RMEM): + case ID2(RADDR,RSTACK): + { + preg p; + preg *tmp = alloc_reg(ctx, RCPU); + op64(ctx,MOV,tmp,pconst64(&p,(int_val)to->holds)); + return copy(ctx,pmem(&p,tmp->id,0),from,size); + } +# endif + default: + break; + } + printf("copy(%s,%s)\n",KNAMES[to->kind], KNAMES[from->kind]); + ASSERT(0); + return NULL; +} + +static void store( jit_ctx *ctx, vreg *r, preg *v, bool bind ) { + if( r->current && r->current != v ) { + r->current->holds = NULL; + r->current = NULL; + } + v = copy(ctx,&r->stack,v,r->size); + if( IS_FLOAT(r) != (v->kind == RFPU) ) + ASSERT(0); + if( bind && r->current != v && (v->kind == RCPU || v->kind == RFPU) ) { + scratch(v); + r->current = v; + v->holds = r; + } +} + +static void store_result( jit_ctx *ctx, vreg *r ) { +# ifndef HL_64 + switch( r->t->kind ) { + case HF64: + scratch(r->current); + op64(ctx,FSTP,&r->stack,UNUSED); + break; + case HF32: + scratch(r->current); + op64(ctx,FSTP32,&r->stack,UNUSED); + break; + case HI64: + scratch(r->current); + error_i64(); + break; + default: +# endif + store(ctx,r,IS_FLOAT(r) ? REG_AT(XMM(0)) : PEAX,true); +# ifndef HL_64 + break; + } +# endif +} + +static void op_mov( jit_ctx *ctx, vreg *to, vreg *from ) { + preg *r = fetch(from); +# ifndef HL_64 + if( to->t->kind == HI64 ) { + error_i64(); + return; + } +# endif + if( from->t->kind == HF32 && r->kind != RFPU ) + r = alloc_fpu(ctx,from,true); + store(ctx, to, r, true); +} + +static void copy_to( jit_ctx *ctx, vreg *to, preg *from ) { + store(ctx,to,from,true); +} + +static void copy_from( jit_ctx *ctx, preg *to, vreg *from ) { + copy(ctx,to,fetch(from),from->size); +} + +static void store_const( jit_ctx *ctx, vreg *r, int c ) { + preg p; + if( c == 0 ) + op(ctx,XOR,alloc_cpu(ctx,r,false),alloc_cpu(ctx,r,false),r->size == 8); + else if( r->size == 8 ) + op64(ctx,MOV,alloc_cpu(ctx,r,false),pconst64(&p,c)); + else + op32(ctx,MOV,alloc_cpu(ctx,r,false),pconst(&p,c)); + store(ctx,r,r->current,false); +} + +static void discard_regs( jit_ctx *ctx, bool native_call ) { + int i; + for(i=0;ipregs + RCPU_SCRATCH_REGS[i]; + if( r->holds ) { + r->holds->current = NULL; + r->holds = NULL; + } + } + for(i=0;ipregs + XMM(i); + if( r->holds ) { + r->holds->current = NULL; + r->holds = NULL; + } + } +} + +static int pad_before_call( jit_ctx *ctx, int size ) { + int total = size + ctx->totalRegsSize + HL_WSIZE * 2; // EIP+EBP + if( total & 15 ) { + int pad = 16 - (total & 15); + preg p; + if( pad ) op64(ctx,SUB,PESP,pconst(&p,pad)); + size += pad; + } + return size; +} + +static void push_reg( jit_ctx *ctx, vreg *r ) { + preg p; + switch( stack_size(r->t) ) { + case 1: + op64(ctx,SUB,PESP,pconst(&p,1)); + op32(ctx,MOV8,pmem(&p,Esp,0),alloc_cpu8(ctx,r,true)); + break; + case 2: + op64(ctx,SUB,PESP,pconst(&p,2)); + op32(ctx,MOV16,pmem(&p,Esp,0),alloc_cpu(ctx,r,true)); + break; + case 4: + if( r->size < 4 ) + alloc_cpu(ctx,r,true); // force fetch (higher bits set to 0) + if( !IS_64 ) { + if( r->current != NULL && r->current->kind == RFPU ) scratch(r->current); + op32(ctx,PUSH,fetch(r),UNUSED); + } else { + // pseudo push32 (not available) + op64(ctx,SUB,PESP,pconst(&p,4)); + op32(ctx,MOV,pmem(&p,Esp,0),alloc_cpu(ctx,r,true)); + } + break; + case 8: + if( fetch(r)->kind == RFPU ) { + op64(ctx,SUB,PESP,pconst(&p,8)); + op64(ctx,MOVSD,pmem(&p,Esp,0),fetch(r)); + } else if( IS_64 ) + op64(ctx,PUSH,fetch(r),UNUSED); + else if( r->stack.kind == RSTACK ) { + scratch(r->current); + r->stackPos += 4; + op32(ctx,PUSH,&r->stack,UNUSED); + r->stackPos -= 4; + op32(ctx,PUSH,&r->stack,UNUSED); + } else + ASSERT(0); + break; + default: + ASSERT(r->size); + } +} + +static int begin_native_call( jit_ctx *ctx, int nargs ) { + ctx->nativeArgsCount = nargs; + return pad_before_call(ctx, nargs > CALL_NREGS ? (nargs - CALL_NREGS) * HL_WSIZE : 0); +} + +static preg *alloc_native_arg( jit_ctx *ctx ) { +# ifdef HL_64 + int rid = ctx->nativeArgsCount - 1; + preg *r = rid < CALL_NREGS ? REG_AT(CALL_REGS[rid]) : alloc_reg(ctx,RCPU_CALL); + scratch(r); + return r; +# else + return alloc_reg(ctx, RCPU); +# endif +} + +static void set_native_arg( jit_ctx *ctx, preg *r ) { + if( r->kind == RSTACK ) { + vreg *v = ctx->vregs + r->id; + if( v->size < 4 ) + r = fetch32(ctx, v); + } +# ifdef HL_64 + if( r->kind == RFPU ) ASSERT(0); + int rid = --ctx->nativeArgsCount; + preg *target; + if( rid >= CALL_NREGS ) { + op64(ctx,PUSH,r,UNUSED); + return; + } + target = REG_AT(CALL_REGS[rid]); + if( target != r ) { + op64(ctx, MOV, target, r); + scratch(target); + } +# else + op32(ctx,PUSH,r,UNUSED); +# endif +} + +static void set_native_arg_fpu( jit_ctx *ctx, preg *r, bool isf32 ) { +# ifdef HL_64 + if( r->kind == RCPU ) ASSERT(0); + // can only be used if last argument !! + ctx->nativeArgsCount--; + preg *target = REG_AT(XMM(IS_WINCALL64 ? ctx->nativeArgsCount : 0)); + if( target != r ) { + op64(ctx, isf32 ? MOVSS : MOVSD, target, r); + scratch(target); + } +# else + op32(ctx,PUSH,r,UNUSED); +# endif +} + +typedef struct { + int nextCpu; + int nextFpu; + int mapped[REG_COUNT]; +} call_regs; + +static int select_call_reg( call_regs *regs, hl_type *t, int id ) { +# ifndef HL_64 + return -1; +#else + bool isFloat = t->kind == HF32 || t->kind == HF64; +# ifdef HL_WIN_CALL + int index = regs->nextCpu++; +# else + int index = isFloat ? regs->nextFpu++ : regs->nextCpu++; +# endif + if( index >= CALL_NREGS ) + return -1; + int reg = isFloat ? XMM(index) : CALL_REGS[index]; + regs->mapped[reg] = id + 1; + return reg; +#endif +} + +static int mapped_reg( call_regs *regs, int id ) { +# ifndef HL_64 + return -1; +#else + int i; + for(i=0;imapped[r] == id + 1 ) return r; + r = XMM(i); + if( regs->mapped[r] == id + 1 ) return r; + } + return -1; +#endif +} + +static int prepare_call_args( jit_ctx *ctx, int count, int *args, vreg *vregs, int extraSize ) { + int i; + int size = extraSize, paddedSize; + call_regs ctmp = {0}; + for(i=0;it, i); + if( cr >= 0 ) { + preg *c = REG_AT(cr); + preg *cur = fetch(r); + if( cur != c ) { + copy(ctx,c,cur,r->size); + scratch(c); + } + RLOCK(c); + continue; + } + size += stack_size(r->t); + } + paddedSize = pad_before_call(ctx,size); + for(i=0;i= 0 ) continue; + push_reg(ctx,r); + if( r->current ) RUNLOCK(r->current); + } + return paddedSize; +} + +static void op_call( jit_ctx *ctx, preg *r, int size ) { + preg p; +# ifdef JIT_DEBUG + if( IS_64 && size >= 0 ) { + int jchk; + op32(ctx,TEST,PESP,pconst(&p,15)); + XJump(JZero,jchk); + BREAK(); // unaligned ESP + patch_jump(ctx, jchk); + } +# endif + if( IS_WINCALL64 ) { + // MSVC requires 32bytes of free space here + op64(ctx,SUB,PESP,pconst(&p,32)); + if( size >= 0 ) size += 32; + } + op32(ctx, CALL, r, UNUSED); + if( size > 0 ) op64(ctx,ADD,PESP,pconst(&p,size)); +} + +static void call_native( jit_ctx *ctx, void *nativeFun, int size ) { + bool isExc = nativeFun == hl_assert || nativeFun == hl_throw || nativeFun == on_jit_error; + preg p; + // native function, already resolved + op64(ctx,MOV,PEAX,pconst64(&p,(int_val)nativeFun)); + op_call(ctx,PEAX, isExc ? -1 : size); + if( isExc ) + return; + discard_regs(ctx, true); +} + +static void op_call_fun( jit_ctx *ctx, vreg *dst, int findex, int count, int *args ) { + int fid = findex < 0 ? -1 : ctx->m->functions_indexes[findex]; + bool isNative = fid >= ctx->m->code->nfunctions; + int size = prepare_call_args(ctx,count,args,ctx->vregs,0); + preg p; + if( fid < 0 ) { + ASSERT(fid); + } else if( isNative ) { + call_native(ctx,ctx->m->functions_ptrs[findex],size); + } else { + int cpos = BUF_POS() + (IS_WINCALL64 ? 4 : 0); +# ifdef JIT_DEBUG + if( IS_64 ) cpos += 13; // ESP CHECK +# endif + if( ctx->m->functions_ptrs[findex] ) { + // already compiled + op_call(ctx,pconst(&p,(int)(int_val)ctx->m->functions_ptrs[findex] - (cpos + 5)), size); + } else if( ctx->m->code->functions + fid == ctx->f ) { + // our current function + op_call(ctx,pconst(&p, ctx->functionPos - (cpos + 5)), size); + } else { + // stage for later + jlist *j = (jlist*)hl_malloc(&ctx->galloc,sizeof(jlist)); + j->pos = cpos; + j->target = findex; + j->next = ctx->calls; + ctx->calls = j; + op_call(ctx,pconst(&p,0), size); + } + discard_regs(ctx, false); + } + if( dst ) + store_result(ctx,dst); +} + +static void op_enter( jit_ctx *ctx ) { + preg p; + op64(ctx, PUSH, PEBP, UNUSED); + op64(ctx, MOV, PEBP, PESP); + if( ctx->totalRegsSize ) op64(ctx, SUB, PESP, pconst(&p,ctx->totalRegsSize)); +} + +static void op_ret( jit_ctx *ctx, vreg *r ) { + preg p; + switch( r->t->kind ) { + case HF32: +# ifdef HL_64 + op64(ctx, MOVSS, PXMM(0), fetch(r)); +# else + op64(ctx,FLD32,&r->stack,UNUSED); +# endif + break; + case HF64: +# ifdef HL_64 + op64(ctx, MOVSD, PXMM(0), fetch(r)); +# else + op64(ctx,FLD,&r->stack,UNUSED); +# endif + break; + default: + if( r->size < 4 && !r->current ) + fetch32(ctx, r); + if( r->current != PEAX ) + op64(ctx,MOV,PEAX,fetch(r)); + break; + } + if( ctx->totalRegsSize ) op64(ctx, ADD, PESP, pconst(&p, ctx->totalRegsSize)); +# ifdef JIT_DEBUG + { + int jeq; + op64(ctx, CMP, PESP, PEBP); + XJump_small(JEq,jeq); + jit_error("invalid ESP"); + patch_jump(ctx,jeq); + } +# endif + op64(ctx, POP, PEBP, UNUSED); + op64(ctx, RET, UNUSED, UNUSED); +} + +static void call_native_consts( jit_ctx *ctx, void *nativeFun, int_val *args, int nargs ) { + int size = pad_before_call(ctx, IS_64 ? 0 : HL_WSIZE*nargs); + preg p; + int i; +# ifdef HL_64 + for(i=0;i=0;i--) + op32(ctx, PUSH, pconst64(&p, args[i]), UNUSED); +# endif + call_native(ctx, nativeFun, size); +} + +static void on_jit_error( const char *msg, int_val line ) { + char buf[256]; + int iline = (int)line; + sprintf(buf,"%s (line %d)",msg,iline); +#ifdef HL_WIN_DESKTOP + MessageBoxA(NULL,buf,"JIT ERROR",MB_OK); +#else + printf("JIT ERROR : %s\n",buf); +#endif + hl_debug_break(); + hl_throw(NULL); +} + +static void _jit_error( jit_ctx *ctx, const char *msg, int line ) { + int_val args[2] = { (int_val)msg, (int_val)line }; + call_native_consts(ctx,on_jit_error,args,2); +} + + +static preg *op_binop( jit_ctx *ctx, vreg *dst, vreg *a, vreg *b, hl_op bop ) { + preg *pa = fetch(a), *pb = fetch(b), *out = NULL; + CpuOp o; + if( IS_FLOAT(a) ) { + bool isf32 = a->t->kind == HF32; + switch( bop ) { + case OAdd: o = isf32 ? ADDSS : ADDSD; break; + case OSub: o = isf32 ? SUBSS : SUBSD; break; + case OMul: o = isf32 ? MULSS : MULSD; break; + case OSDiv: o = isf32 ? DIVSS : DIVSD; break; + case OJSLt: + case OJSGte: + case OJSLte: + case OJSGt: + case OJEq: + case OJNotEq: + case OJNotLt: + case OJNotGte: + o = isf32 ? COMISS : COMISD; + break; + case OSMod: + { + int args[] = { a->stack.id, b->stack.id }; + int size = prepare_call_args(ctx,2,args,ctx->vregs,0); + void *mod_fun; + if( isf32 ) mod_fun = fmodf; else mod_fun = fmod; + call_native(ctx,mod_fun,size); + store_result(ctx,dst); + return fetch(dst); + } + default: + printf("%s\n", hl_op_name(bop)); + ASSERT(bop); + } + } else { + bool is64 = a->t->kind == HI64; +# ifndef HL_64 + if( is64 ) { + error_i64(); + return fetch(a); + } +# endif + switch( bop ) { + case OAdd: o = ADD; break; + case OSub: o = SUB; break; + case OMul: o = IMUL; break; + case OAnd: o = AND; break; + case OOr: o = OR; break; + case OXor: o = XOR; break; + case OShl: + case OUShr: + case OSShr: + if( !b->current || b->current->kind != RCPU || b->current->id != Ecx ) { + scratch(REG_AT(Ecx)); + op(ctx,MOV,REG_AT(Ecx),pb,is64); + RLOCK(REG_AT(Ecx)); + pa = fetch(a); + } else + RLOCK(b->current); + if( pa->kind != RCPU ) { + pa = alloc_reg(ctx, RCPU); + op(ctx,MOV,pa,fetch(a), is64); + } + op(ctx,bop == OShl ? SHL : (bop == OUShr ? SHR : SAR), pa, UNUSED,is64); + if( dst ) store(ctx, dst, pa, true); + return pa; + case OSDiv: + case OUDiv: + case OSMod: + case OUMod: + { + preg *out = bop == OSMod || bop == OUMod ? REG_AT(Edx) : PEAX; + preg *r = pb; + preg p; + int jz, jz1 = 0, jend; + if( pa->kind == RCPU && pa->id == Eax ) RLOCK(pa); + // ensure b in CPU reg and not in Eax/Edx (for UI8/UI16) + if( pb->kind != RCPU || (pb->id == Eax || pb->id == Edx) ) { + scratch(REG_AT(Ecx)); + scratch(pb); + load(ctx,REG_AT(Ecx),b); + r = REG_AT(Ecx); + } + // integer div 0 => 0 + op(ctx,TEST,r,r,is64); + XJump_small(JZero, jz); + // Prevent MIN/-1 overflow exception + // OSMod: r = (b == 0 || b == -1) ? 0 : a % b + // OSDiv: r = (b == 0 || b == -1) ? a * b : a / b + if( bop == OSMod || bop == OSDiv ) { + op(ctx, CMP, r, pconst(&p,-1), is64); + XJump_small(JEq, jz1); + } + pa = fetch(a); + if( pa->kind != RCPU || pa->id != Eax ) { + scratch(PEAX); + scratch(pa); + load(ctx,PEAX,a); + } + scratch(REG_AT(Edx)); + scratch(REG_AT(Eax)); + if( bop == OUDiv || bop == OUMod ) + op(ctx, XOR, REG_AT(Edx), REG_AT(Edx), is64); + else + op(ctx, CDQ, UNUSED, UNUSED, is64); // sign-extend Eax into Eax:Edx + op(ctx, bop == OUDiv || bop == OUMod ? DIV : IDIV, r, UNUSED, is64); + XJump_small(JAlways, jend); + patch_jump(ctx, jz); + patch_jump(ctx, jz1); + if( bop != OSDiv ) { + op(ctx, XOR, out, out, is64); + } else { + load(ctx, out, a); + op(ctx, IMUL, out, r, is64); + } + patch_jump(ctx, jend); + if( dst ) store(ctx, dst, out, true); + return out; + } + case OJSLt: + case OJSGte: + case OJSLte: + case OJSGt: + case OJULt: + case OJUGte: + case OJEq: + case OJNotEq: + switch( a->t->kind ) { + case HUI8: + case HBOOL: + o = CMP8; + break; + case HUI16: + o = CMP16; + break; + default: + o = CMP; + break; + } + break; + default: + printf("%s\n", hl_op_name(bop)); + ASSERT(bop); + } + } + switch( RTYPE(a) ) { + case HI32: + case HUI8: + case HUI16: + case HBOOL: +# ifndef HL_64 + case HDYNOBJ: + case HVIRTUAL: + case HOBJ: + case HSTRUCT: + case HFUN: + case HMETHOD: + case HBYTES: + case HNULL: + case HENUM: + case HDYN: + case HTYPE: + case HABSTRACT: + case HARRAY: +# endif + switch( ID2(pa->kind, pb->kind) ) { + case ID2(RCPU,RCPU): + case ID2(RCPU,RSTACK): + op32(ctx, o, pa, pb); + scratch(pa); + out = pa; + break; + case ID2(RSTACK,RCPU): + if( dst == a && o != IMUL ) { + op32(ctx, o, pa, pb); + dst = NULL; + out = pa; + } else { + alloc_cpu(ctx,a, true); + return op_binop(ctx,dst,a,b,bop); + } + break; + case ID2(RSTACK,RSTACK): + alloc_cpu(ctx, a, true); + return op_binop(ctx, dst, a, b, bop); + default: + printf("%s(%d,%d)\n", hl_op_name(bop), pa->kind, pb->kind); + ASSERT(ID2(pa->kind, pb->kind)); + } + if( dst ) store(ctx, dst, out, true); + return out; +# ifdef HL_64 + case HOBJ: + case HSTRUCT: + case HDYNOBJ: + case HVIRTUAL: + case HFUN: + case HMETHOD: + case HBYTES: + case HNULL: + case HENUM: + case HDYN: + case HTYPE: + case HABSTRACT: + case HARRAY: + case HI64: + case HGUID: + switch( ID2(pa->kind, pb->kind) ) { + case ID2(RCPU,RCPU): + case ID2(RCPU,RSTACK): + op64(ctx, o, pa, pb); + scratch(pa); + out = pa; + break; + case ID2(RSTACK,RCPU): + if( dst == a && OP_FORMS[o].mem_r ) { + op64(ctx, o, pa, pb); + dst = NULL; + out = pa; + } else { + alloc_cpu(ctx,a, true); + return op_binop(ctx,dst,a,b,bop); + } + break; + case ID2(RSTACK,RSTACK): + alloc_cpu(ctx, a, true); + return op_binop(ctx, dst, a, b, bop); + default: + printf("%s(%d,%d)\n", hl_op_name(bop), pa->kind, pb->kind); + ASSERT(ID2(pa->kind, pb->kind)); + } + if( dst ) store(ctx, dst, out, true); + return out; +# endif + case HF64: + case HF32: + pa = alloc_fpu(ctx, a, true); + pb = alloc_fpu(ctx, b, true); + switch( ID2(pa->kind, pb->kind) ) { + case ID2(RFPU,RFPU): + op64(ctx,o,pa,pb); + if( (o == COMISD || o == COMISS) && bop != OJSGt ) { + int jnotnan; + XJump_small(JNParity,jnotnan); + switch( bop ) { + case OJSLt: + case OJNotLt: + { + preg *r = alloc_reg(ctx,RCPU); + // set CF=0, ZF=1 + op64(ctx,XOR,r,r); + RUNLOCK(r); + break; + } + case OJSGte: + case OJNotGte: + { + preg *r = alloc_reg(ctx,RCPU); + // set ZF=0, CF=1 + op64(ctx,XOR,r,r); + op64(ctx,CMP,r,PESP); + RUNLOCK(r); + break; + } + break; + case OJNotEq: + case OJEq: + // set ZF=0, CF=? + case OJSLte: + // set ZF=0, CF=0 + op64(ctx,TEST,PESP,PESP); + break; + default: + ASSERT(bop); + } + patch_jump(ctx,jnotnan); + } + scratch(pa); + out = pa; + break; + default: + printf("%s(%d,%d)\n", hl_op_name(bop), pa->kind, pb->kind); + ASSERT(ID2(pa->kind, pb->kind)); + } + if( dst ) store(ctx, dst, out, true); + return out; + default: + ASSERT(RTYPE(a)); + } + return NULL; +} + +static int do_jump( jit_ctx *ctx, hl_op op, bool isFloat ) { + int j; + switch( op ) { + case OJAlways: + XJump(JAlways,j); + break; + case OJSGte: + XJump(isFloat ? JUGte : JSGte,j); + break; + case OJSGt: + XJump(isFloat ? JUGt : JSGt,j); + break; + case OJUGte: + XJump(JUGte,j); + break; + case OJSLt: + XJump(isFloat ? JULt : JSLt,j); + break; + case OJSLte: + XJump(isFloat ? JULte : JSLte,j); + break; + case OJULt: + XJump(JULt,j); + break; + case OJEq: + XJump(JEq,j); + break; + case OJNotEq: + XJump(JNeq,j); + break; + case OJNotLt: + XJump(JUGte,j); + break; + case OJNotGte: + XJump(JULt,j); + break; + default: + j = 0; + printf("Unknown JUMP %d\n",op); + break; + } + return j; +} + +static void register_jump( jit_ctx *ctx, int pos, int target ) { + jlist *j = (jlist*)hl_malloc(&ctx->falloc, sizeof(jlist)); + j->pos = pos; + j->target = target; + j->next = ctx->jumps; + ctx->jumps = j; + if( target != 0 && ctx->opsPos[target] == 0 ) + ctx->opsPos[target] = -1; +} + +#define HDYN_VALUE 8 + +static void dyn_value_compare( jit_ctx *ctx, preg *a, preg *b, hl_type *t ) { + preg p; + switch( t->kind ) { + case HUI8: + case HBOOL: + op32(ctx,MOV8,a,pmem(&p,a->id,HDYN_VALUE)); + op32(ctx,MOV8,b,pmem(&p,b->id,HDYN_VALUE)); + op64(ctx,CMP8,a,b); + break; + case HUI16: + op32(ctx,MOV16,a,pmem(&p,a->id,HDYN_VALUE)); + op32(ctx,MOV16,b,pmem(&p,b->id,HDYN_VALUE)); + op64(ctx,CMP16,a,b); + break; + case HI32: + op32(ctx,MOV,a,pmem(&p,a->id,HDYN_VALUE)); + op32(ctx,MOV,b,pmem(&p,b->id,HDYN_VALUE)); + op64(ctx,CMP,a,b); + break; + case HF32: + { + preg *fa = alloc_reg(ctx, RFPU); + preg *fb = alloc_reg(ctx, RFPU); + op64(ctx,MOVSS,fa,pmem(&p,a->id,HDYN_VALUE)); + op64(ctx,MOVSS,fb,pmem(&p,b->id,HDYN_VALUE)); + op64(ctx,COMISD,fa,fb); + } + break; + case HF64: + { + preg *fa = alloc_reg(ctx, RFPU); + preg *fb = alloc_reg(ctx, RFPU); + op64(ctx,MOVSD,fa,pmem(&p,a->id,HDYN_VALUE)); + op64(ctx,MOVSD,fb,pmem(&p,b->id,HDYN_VALUE)); + op64(ctx,COMISD,fa,fb); + } + break; + case HI64: + default: + // ptr comparison + op64(ctx,MOV,a,pmem(&p,a->id,HDYN_VALUE)); + op64(ctx,MOV,b,pmem(&p,b->id,HDYN_VALUE)); + op64(ctx,CMP,a,b); + break; + } +} + +static void op_jump( jit_ctx *ctx, vreg *a, vreg *b, hl_opcode *op, int targetPos ) { + if( a->t->kind == HDYN || b->t->kind == HDYN || a->t->kind == HFUN || b->t->kind == HFUN ) { + int args[] = { a->stack.id, b->stack.id }; + int size = prepare_call_args(ctx,2,args,ctx->vregs,0); + call_native(ctx,hl_dyn_compare,size); + if( op->op == OJSGt || op->op == OJSGte ) { + preg p; + int jinvalid; + op32(ctx,CMP,PEAX,pconst(&p,hl_invalid_comparison)); + XJump_small(JEq,jinvalid); + op32(ctx,TEST,PEAX,PEAX); + register_jump(ctx,do_jump(ctx,op->op, IS_FLOAT(a)),targetPos); + patch_jump(ctx,jinvalid); + return; + } + op32(ctx,TEST,PEAX,PEAX); + } else switch( a->t->kind ) { + case HTYPE: + { + int args[] = { a->stack.id, b->stack.id }; + int size = prepare_call_args(ctx,2,args,ctx->vregs,0); + preg p; + call_native(ctx,hl_same_type,size); + op64(ctx,CMP8,PEAX,pconst(&p,1)); + } + break; + case HNULL: + { + preg *pa = hl_type_size(a->t->tparam) == 1 ? alloc_cpu8(ctx,a,true) : alloc_cpu(ctx,a,true); + preg *pb = hl_type_size(b->t->tparam) == 1 ? alloc_cpu8(ctx,b,true) : alloc_cpu(ctx,b,true); + if( op->op == OJEq ) { + // if( a == b || (a && b && a->v == b->v) ) goto + int ja, jb; + // if( a != b && (!a || !b || a->v != b->v) ) goto + op64(ctx,CMP,pa,pb); + register_jump(ctx,do_jump(ctx,OJEq,false),targetPos); + op64(ctx,TEST,pa,pa); + XJump_small(JZero,ja); + op64(ctx,TEST,pb,pb); + XJump_small(JZero,jb); + dyn_value_compare(ctx,pa,pb,a->t->tparam); + register_jump(ctx,do_jump(ctx,OJEq,false),targetPos); + scratch(pa); + scratch(pb); + patch_jump(ctx,ja); + patch_jump(ctx,jb); + } else if( op->op == OJNotEq ) { + int jeq, jcmp; + // if( a != b && (!a || !b || a->v != b->v) ) goto + op64(ctx,CMP,pa,pb); + XJump_small(JEq,jeq); + op64(ctx,TEST,pa,pa); + register_jump(ctx,do_jump(ctx,OJEq,false),targetPos); + op64(ctx,TEST,pb,pb); + register_jump(ctx,do_jump(ctx,OJEq,false),targetPos); + dyn_value_compare(ctx,pa,pb,a->t->tparam); + XJump_small(JZero,jcmp); + scratch(pa); + scratch(pb); + register_jump(ctx,do_jump(ctx,OJNotEq,false),targetPos); + patch_jump(ctx,jcmp); + patch_jump(ctx,jeq); + } else + ASSERT(op->op); + return; + } + case HVIRTUAL: + { + preg p; + preg *pa = alloc_cpu(ctx,a,true); + preg *pb = alloc_cpu(ctx,b,true); + int ja,jb,jav,jbv,jvalue; + if( b->t->kind == HOBJ ) { + if( op->op == OJEq ) { + // if( a ? (b && a->value == b) : (b == NULL) ) goto + op64(ctx,TEST,pa,pa); + XJump_small(JZero,ja); + op64(ctx,TEST,pb,pb); + XJump_small(JZero,jb); + op64(ctx,MOV,pa,pmem(&p,pa->id,HL_WSIZE)); + op64(ctx,CMP,pa,pb); + XJump_small(JAlways,jvalue); + patch_jump(ctx,ja); + op64(ctx,TEST,pb,pb); + patch_jump(ctx,jvalue); + register_jump(ctx,do_jump(ctx,OJEq,false),targetPos); + patch_jump(ctx,jb); + } else if( op->op == OJNotEq ) { + // if( a ? (b == NULL || a->value != b) : (b != NULL) ) goto + op64(ctx,TEST,pa,pa); + XJump_small(JZero,ja); + op64(ctx,TEST,pb,pb); + register_jump(ctx,do_jump(ctx,OJEq,false),targetPos); + op64(ctx,MOV,pa,pmem(&p,pa->id,HL_WSIZE)); + op64(ctx,CMP,pa,pb); + XJump_small(JAlways,jvalue); + patch_jump(ctx,ja); + op64(ctx,TEST,pb,pb); + patch_jump(ctx,jvalue); + register_jump(ctx,do_jump(ctx,OJNotEq,false),targetPos); + } else + ASSERT(op->op); + scratch(pa); + return; + } + op64(ctx,CMP,pa,pb); + if( op->op == OJEq ) { + // if( a == b || (a && b && a->value && b->value && a->value == b->value) ) goto + register_jump(ctx,do_jump(ctx,OJEq, false),targetPos); + op64(ctx,TEST,pa,pa); + XJump_small(JZero,ja); + op64(ctx,TEST,pb,pb); + XJump_small(JZero,jb); + op64(ctx,MOV,pa,pmem(&p,pa->id,HL_WSIZE)); + op64(ctx,TEST,pa,pa); + XJump_small(JZero,jav); + op64(ctx,MOV,pb,pmem(&p,pb->id,HL_WSIZE)); + op64(ctx,TEST,pb,pb); + XJump_small(JZero,jbv); + op64(ctx,CMP,pa,pb); + XJump_small(JNeq,jvalue); + register_jump(ctx,do_jump(ctx,OJEq, false),targetPos); + patch_jump(ctx,ja); + patch_jump(ctx,jb); + patch_jump(ctx,jav); + patch_jump(ctx,jbv); + patch_jump(ctx,jvalue); + } else if( op->op == OJNotEq ) { + int jnext; + // if( a != b && (!a || !b || !a->value || !b->value || a->value != b->value) ) goto + XJump_small(JEq,jnext); + op64(ctx,TEST,pa,pa); + XJump_small(JZero,ja); + op64(ctx,TEST,pb,pb); + XJump_small(JZero,jb); + op64(ctx,MOV,pa,pmem(&p,pa->id,HL_WSIZE)); + op64(ctx,TEST,pa,pa); + XJump_small(JZero,jav); + op64(ctx,MOV,pb,pmem(&p,pb->id,HL_WSIZE)); + op64(ctx,TEST,pb,pb); + XJump_small(JZero,jbv); + op64(ctx,CMP,pa,pb); + XJump_small(JEq,jvalue); + patch_jump(ctx,ja); + patch_jump(ctx,jb); + patch_jump(ctx,jav); + patch_jump(ctx,jbv); + register_jump(ctx,do_jump(ctx,OJAlways, false),targetPos); + patch_jump(ctx,jnext); + patch_jump(ctx,jvalue); + } else + ASSERT(op->op); + scratch(pa); + scratch(pb); + return; + } + break; + case HOBJ: + case HSTRUCT: + if( b->t->kind == HVIRTUAL ) { + op_jump(ctx,b,a,op,targetPos); // inverse + return; + } + if( hl_get_obj_rt(a->t)->compareFun ) { + preg *pa = alloc_cpu(ctx,a,true); + preg *pb = alloc_cpu(ctx,b,true); + preg p; + int jeq, ja, jb, jcmp; + int args[] = { a->stack.id, b->stack.id }; + switch( op->op ) { + case OJEq: + // if( a == b || (a && b && cmp(a,b) == 0) ) goto + op64(ctx,CMP,pa,pb); + XJump_small(JEq,jeq); + op64(ctx,TEST,pa,pa); + XJump_small(JZero,ja); + op64(ctx,TEST,pb,pb); + XJump_small(JZero,jb); + op_call_fun(ctx,NULL,(int)(int_val)a->t->obj->rt->compareFun,2,args); + op32(ctx,TEST,PEAX,PEAX); + XJump_small(JNotZero,jcmp); + patch_jump(ctx,jeq); + register_jump(ctx,do_jump(ctx,OJAlways,false),targetPos); + patch_jump(ctx,ja); + patch_jump(ctx,jb); + patch_jump(ctx,jcmp); + break; + case OJNotEq: + // if( a != b && (!a || !b || cmp(a,b) != 0) ) goto + op64(ctx,CMP,pa,pb); + XJump_small(JEq,jeq); + op64(ctx,TEST,pa,pa); + register_jump(ctx,do_jump(ctx,OJEq,false),targetPos); + op64(ctx,TEST,pb,pb); + register_jump(ctx,do_jump(ctx,OJEq,false),targetPos); + + op_call_fun(ctx,NULL,(int)(int_val)a->t->obj->rt->compareFun,2,args); + op32(ctx,TEST,PEAX,PEAX); + XJump_small(JZero,jcmp); + + register_jump(ctx,do_jump(ctx,OJNotEq,false),targetPos); + patch_jump(ctx,jcmp); + patch_jump(ctx,jeq); + break; + default: + // if( a && b && cmp(a,b) ?? 0 ) goto + op64(ctx,TEST,pa,pa); + XJump_small(JZero,ja); + op64(ctx,TEST,pb,pb); + XJump_small(JZero,jb); + op_call_fun(ctx,NULL,(int)(int_val)a->t->obj->rt->compareFun,2,args); + op32(ctx,CMP,PEAX,pconst(&p,0)); + register_jump(ctx,do_jump(ctx,op->op,false),targetPos); + patch_jump(ctx,ja); + patch_jump(ctx,jb); + break; + } + return; + } + // fallthrough + default: + // make sure we have valid 8 bits registers + if( a->size == 1 ) alloc_cpu8(ctx,a,true); + if( b->size == 1 ) alloc_cpu8(ctx,b,true); + op_binop(ctx,NULL,a,b,op->op); + break; + } + register_jump(ctx,do_jump(ctx,op->op, IS_FLOAT(a)),targetPos); +} + +jit_ctx *hl_jit_alloc() { + int i; + jit_ctx *ctx = (jit_ctx*)malloc(sizeof(jit_ctx)); + if( ctx == NULL ) return NULL; + memset(ctx,0,sizeof(jit_ctx)); + hl_alloc_init(&ctx->falloc); + hl_alloc_init(&ctx->galloc); + for(i=0;iid = i; + r->kind = RCPU; + } + for(i=0;iid = i; + r->kind = RFPU; + } + return ctx; +} + +void hl_jit_free( jit_ctx *ctx, h_bool can_reset ) { + free(ctx->vregs); + free(ctx->opsPos); + free(ctx->startBuf); + ctx->maxRegs = 0; + ctx->vregs = NULL; + ctx->maxOps = 0; + ctx->opsPos = NULL; + ctx->startBuf = NULL; + ctx->bufSize = 0; + ctx->buf.b = NULL; + ctx->calls = NULL; + ctx->switchs = NULL; + ctx->closure_list = NULL; + hl_free(&ctx->falloc); + hl_free(&ctx->galloc); + if( !can_reset ) free(ctx); +} + +static void jit_nops( jit_ctx *ctx ) { + while( BUF_POS() & 15 ) + op32(ctx, NOP, UNUSED, UNUSED); +} + +#define MAX_ARGS 16 + +static void *call_jit_c2hl = NULL; +static void *call_jit_hl2c = NULL; + +static void *callback_c2hl( void *_f, hl_type *t, void **args, vdynamic *ret ) { + /* + prepare stack and regs according to prepare_call_args, but by reading runtime type information + from the function type. The stack and regs will be setup by the trampoline function. + */ + void **f = (void**)_f; + unsigned char stack[MAX_ARGS * 8]; + call_regs cregs = {0}; + if( t->fun->nargs > MAX_ARGS ) + hl_error("Too many arguments for dynamic call"); + int i, size = 0, pad = 0, pos = 0; + for(i=0;ifun->nargs;i++) { + hl_type *at = t->fun->args[i]; + int creg = select_call_reg(&cregs,at,i); + if( creg >= 0 ) + continue; + size += stack_size(at); + } + pad = (-size) & 15; + size += pad; + pos = 0; + for(i=0;ifun->nargs;i++) { + // RTL + hl_type *at = t->fun->args[i]; + void *v = args[i]; + int creg = mapped_reg(&cregs,i); + void *store; + if( creg >= 0 ) { + if( REG_IS_FPU(creg) ) { + store = stack + size + CALL_NREGS * HL_WSIZE + (creg - XMM(0)) * sizeof(double); + } else { + store = stack + size + call_reg_index(creg) * HL_WSIZE; + } + switch( at->kind ) { + case HBOOL: + case HUI8: + *(int_val*)store = *(unsigned char*)v; + break; + case HUI16: + *(int_val*)store = *(unsigned short*)v; + break; + case HI32: + *(int_val*)store = *(int*)v; + break; + case HF32: + *(void**)store = 0; + *(float*)store = *(float*)v; + break; + case HF64: + *(double*)store = *(double*)v; + break; + case HI64: + case HGUID: + *(int64*)store = *(int64*)v; + break; + default: + *(void**)store = v; + break; + } + } else { + int tsize = stack_size(at); + store = stack + pos; + pos += tsize; + switch( at->kind ) { + case HBOOL: + case HUI8: + *(int*)store = *(unsigned char*)v; + break; + case HUI16: + *(int*)store = *(unsigned short*)v; + break; + case HI32: + case HF32: + *(int*)store = *(int*)v; + break; + case HF64: + *(double*)store = *(double*)v; + break; + case HI64: + case HGUID: + *(int64*)store = *(int64*)v; + break; + default: + *(void**)store = v; + break; + } + } + } + pos += pad; + pos >>= IS_64 ? 3 : 2; + switch( t->fun->ret->kind ) { + case HUI8: + case HUI16: + case HI32: + case HBOOL: + ret->v.i = ((int (*)(void *, void *, void *))call_jit_c2hl)(*f, (void**)&stack + pos, &stack); + return &ret->v.i; + case HI64: + case HGUID: + ret->v.i64 = ((int64 (*)(void *, void *, void *))call_jit_c2hl)(*f, (void**)&stack + pos, &stack); + return &ret->v.i64; + case HF32: + ret->v.f = ((float (*)(void *, void *, void *))call_jit_c2hl)(*f, (void**)&stack + pos, &stack); + return &ret->v.f; + case HF64: + ret->v.d = ((double (*)(void *, void *, void *))call_jit_c2hl)(*f, (void**)&stack + pos, &stack); + return &ret->v.d; + default: + return ((void *(*)(void *, void *, void *))call_jit_c2hl)(*f, (void**)&stack + pos, &stack); + } +} + +static void jit_c2hl( jit_ctx *ctx ) { + // create the function that will be called by callback_c2hl + // it will make sure to prepare the stack/regs according to native calling conventions + int jeq, jloop, jstart; + preg *fptr, *stack, *stend; + preg p; + + op64(ctx,PUSH,PEBP,UNUSED); + op64(ctx,MOV,PEBP,PESP); + +# ifdef HL_64 + + fptr = REG_AT(R10); + stack = PEAX; + stend = REG_AT(R11); + op64(ctx, MOV, fptr, REG_AT(CALL_REGS[0])); + op64(ctx, MOV, stack, REG_AT(CALL_REGS[1])); + op64(ctx, MOV, stend, REG_AT(CALL_REGS[2])); + + // set native call regs + int i; + for(i=0;iid,i*HL_WSIZE)); + for(i=0;iid,(i+CALL_NREGS)*HL_WSIZE)); + +# else + + // make sure the stack is aligned on 16 bytes + // the amount of push we will do afterwards is guaranteed to be a multiple of 16bytes by hl_callback +# ifdef HL_VCC + // VCC does not guarantee us an aligned stack... + op64(ctx,MOV,PEAX,PESP); + op64(ctx,AND,PEAX,pconst(&p,15)); + op64(ctx,SUB,PESP,PEAX); +# else + op64(ctx,SUB,PESP,pconst(&p,8)); +# endif + + // mov arguments to regs + fptr = REG_AT(Eax); + stack = REG_AT(Edx); + stend = REG_AT(Ecx); + op64(ctx,MOV,fptr,pmem(&p,Ebp,HL_WSIZE*2)); + op64(ctx,MOV,stack,pmem(&p,Ebp,HL_WSIZE*3)); + op64(ctx,MOV,stend,pmem(&p,Ebp,HL_WSIZE*4)); + +# endif + + // push stack args + jstart = BUF_POS(); + op64(ctx,CMP,stack,stend); + XJump(JEq,jeq); + op64(ctx,SUB,stack,pconst(&p,HL_WSIZE)); + op64(ctx,PUSH,pmem(&p,stack->id,0),UNUSED); + XJump(JAlways,jloop); + patch_jump(ctx,jeq); + patch_jump_to(ctx, jloop, jstart); + + op_call(ctx,fptr,0); + + // cleanup and ret + op64(ctx,MOV,PESP,PEBP); + op64(ctx,POP,PEBP, UNUSED); + op64(ctx,RET,UNUSED,UNUSED); +} + +static vdynamic *jit_wrapper_call( vclosure_wrapper *c, char *stack_args, void **regs ) { + vdynamic *args[MAX_ARGS]; + int i; + int nargs = c->cl.t->fun->nargs; + call_regs cregs = {0}; + if( nargs > MAX_ARGS ) + hl_error("Too many arguments for wrapped call"); + cregs.nextCpu++; // skip fptr in HL64 - was passed as arg0 + for(i=0;icl.t->fun->args[i]; + int creg = select_call_reg(&cregs,t,i); + if( creg < 0 ) { + args[i] = hl_is_dynamic(t) ? *(vdynamic**)stack_args : hl_make_dyn(stack_args,t); + stack_args += stack_size(t); + } else if( hl_is_dynamic(t) ) { + args[i] = *(vdynamic**)(regs + call_reg_index(creg)); + } else if( t->kind == HF32 || t->kind == HF64 ) { + args[i] = hl_make_dyn(regs + CALL_NREGS + creg - XMM(0),&hlt_f64); + } else { + args[i] = hl_make_dyn(regs + call_reg_index(creg),t); + } + } + return hl_dyn_call(c->wrappedFun,args,nargs); +} + +static void *jit_wrapper_ptr( vclosure_wrapper *c, char *stack_args, void **regs ) { + vdynamic *ret = jit_wrapper_call(c, stack_args, regs); + hl_type *tret = c->cl.t->fun->ret; + switch( tret->kind ) { + case HVOID: + return NULL; + case HUI8: + case HUI16: + case HI32: + case HBOOL: + return (void*)(int_val)hl_dyn_casti(&ret,&hlt_dyn,tret); + case HI64: + case HGUID: + return (void*)(int_val)hl_dyn_casti64(&ret,&hlt_dyn); + default: + return hl_dyn_castp(&ret,&hlt_dyn,tret); + } +} + +static double jit_wrapper_d( vclosure_wrapper *c, char *stack_args, void **regs ) { + vdynamic *ret = jit_wrapper_call(c, stack_args, regs); + return hl_dyn_castd(&ret,&hlt_dyn); +} + +static void jit_hl2c( jit_ctx *ctx ) { + // create a function that is called with a vclosure_wrapper* and native args + // and pack and pass the args to callback_hl2c + preg p; + int jfloat1, jfloat2, jexit; + hl_type_fun *ft = NULL; + int size; +# ifdef HL_64 + preg *cl = REG_AT(CALL_REGS[0]); + preg *tmp = REG_AT(CALL_REGS[1]); +# else + preg *cl = REG_AT(Ecx); + preg *tmp = REG_AT(Edx); +# endif + + op64(ctx,PUSH,PEBP,UNUSED); + op64(ctx,MOV,PEBP,PESP); + +# ifdef HL_64 + // push registers + int i; + op64(ctx,SUB,PESP,pconst(&p,CALL_NREGS*8)); + for(i=0;it->fun->ret->kind ) { + // case HF32: case HF64: return jit_wrapper_d(arg0,&args); + // default: return jit_wrapper_ptr(arg0,&args); + // } + if( !IS_64 ) + op64(ctx,MOV,cl,pmem(&p,Ebp,HL_WSIZE*2)); // load arg0 + op64(ctx,MOV,tmp,pmem(&p,cl->id,0)); // ->t + op64(ctx,MOV,tmp,pmem(&p,tmp->id,HL_WSIZE)); // ->fun + op64(ctx,MOV,tmp,pmem(&p,tmp->id,(int)(int_val)&ft->ret)); // ->ret + op32(ctx,MOV,tmp,pmem(&p,tmp->id,0)); // -> kind + + op32(ctx,CMP,tmp,pconst(&p,HF64)); + XJump_small(JEq,jfloat1); + op32(ctx,CMP,tmp,pconst(&p,HF32)); + XJump_small(JEq,jfloat2); + + // 64 bits : ESP + EIP (+WIN64PAD) + // 32 bits : ESP + EIP + PARAM0 + int args_pos = IS_64 ? ((IS_WINCALL64 ? 32 : 0) + HL_WSIZE * 2) : (HL_WSIZE*3); + + size = begin_native_call(ctx,3); + op64(ctx, LEA, tmp, pmem(&p,Ebp,-HL_WSIZE*CALL_NREGS*2)); + set_native_arg(ctx, tmp); + op64(ctx, LEA, tmp, pmem(&p,Ebp,args_pos)); + set_native_arg(ctx, tmp); + set_native_arg(ctx, cl); + call_native(ctx, jit_wrapper_ptr, size); + XJump_small(JAlways, jexit); + + patch_jump(ctx,jfloat1); + patch_jump(ctx,jfloat2); + size = begin_native_call(ctx,3); + op64(ctx, LEA, tmp, pmem(&p,Ebp,-HL_WSIZE*CALL_NREGS*2)); + set_native_arg(ctx, tmp); + op64(ctx, LEA, tmp, pmem(&p,Ebp,args_pos)); + set_native_arg(ctx, tmp); + set_native_arg(ctx, cl); + call_native(ctx, jit_wrapper_d, size); + + patch_jump(ctx,jexit); + op64(ctx,MOV,PESP,PEBP); + op64(ctx,POP,PEBP, UNUSED); + op64(ctx,RET,UNUSED,UNUSED); +} + +static void jit_fail( uchar *msg ) { + if( msg == NULL ) { + hl_debug_break(); + msg = USTR("assert"); + } + vdynamic *d = hl_alloc_dynamic(&hlt_bytes); + d->v.ptr = msg; + hl_throw(d); +} + +static void jit_null_access( jit_ctx *ctx ) { + op64(ctx,PUSH,PEBP,UNUSED); + op64(ctx,MOV,PEBP,PESP); + int_val arg = (int_val)USTR("Null access"); + call_native_consts(ctx, jit_fail, &arg, 1); +} + +static void jit_null_fail( int fhash ) { + vbyte *field = hl_field_name(fhash); + hl_buffer *b = hl_alloc_buffer(); + hl_buffer_str(b, USTR("Null access .")); + hl_buffer_str(b, (uchar*)field); + vdynamic *d = hl_alloc_dynamic(&hlt_bytes); + d->v.ptr = hl_buffer_content(b,NULL); + hl_throw(d); +} + +static void jit_null_field_access( jit_ctx *ctx ) { + preg p; + op64(ctx,PUSH,PEBP,UNUSED); + op64(ctx,MOV,PEBP,PESP); + int size = begin_native_call(ctx, 1); + int args_pos = (IS_WINCALL64 ? 32 : 0) + HL_WSIZE*2; + set_native_arg(ctx, pmem(&p,Ebp,args_pos)); + call_native(ctx,jit_null_fail,size); +} + +static void jit_assert( jit_ctx *ctx ) { + op64(ctx,PUSH,PEBP,UNUSED); + op64(ctx,MOV,PEBP,PESP); + int_val arg = 0; + call_native_consts(ctx, jit_fail, &arg, 1); +} + +static int jit_build( jit_ctx *ctx, void (*fbuild)( jit_ctx *) ) { + int pos; + jit_buf(ctx); + jit_nops(ctx); + pos = BUF_POS(); + fbuild(ctx); + int endPos = BUF_POS(); + jit_nops(ctx); +#ifdef WIN64_UNWIND_TABLES + int fid = ctx->nunwind++; + ctx->unwind_table[fid].BeginAddress = pos; + ctx->unwind_table[fid].EndAddress = endPos; + ctx->unwind_table[fid].UnwindData = ctx->unwind_offset; +#endif + return pos; +} + +static void hl_jit_init_module( jit_ctx *ctx, hl_module *m ) { + int i; + ctx->m = m; + if( m->code->hasdebug ) { + ctx->debug = (hl_debug_infos*)malloc(sizeof(hl_debug_infos) * m->code->nfunctions); + memset(ctx->debug, -1, sizeof(hl_debug_infos) * m->code->nfunctions); + } + for(i=0;icode->nfloats;i++) { + jit_buf(ctx); + *ctx->buf.d++ = m->code->floats[i]; + } +#ifdef WIN64_UNWIND_TABLES + jit_buf(ctx); + ctx->unwind_offset = BUF_POS(); + write_unwind_data(ctx); + + ctx->unwind_table = malloc(sizeof(RUNTIME_FUNCTION) * (m->code->nfunctions + 10)); + memset(ctx->unwind_table, 0, sizeof(RUNTIME_FUNCTION) * (m->code->nfunctions + 10)); +#endif +} + +void hl_jit_init( jit_ctx *ctx, hl_module *m ) { + hl_jit_init_module(ctx,m); + ctx->c2hl = jit_build(ctx, jit_c2hl); + ctx->hl2c = jit_build(ctx, jit_hl2c); + ctx->static_functions[0] = (void*)(int_val)jit_build(ctx,jit_null_access); + ctx->static_functions[1] = (void*)(int_val)jit_build(ctx,jit_assert); + ctx->static_functions[2] = (void*)(int_val)jit_build(ctx,jit_null_field_access); +} + +void hl_jit_reset( jit_ctx *ctx, hl_module *m ) { + ctx->debug = NULL; + hl_jit_init_module(ctx,m); +} + +static void *get_dyncast( hl_type *t ) { + switch( t->kind ) { + case HF32: + return hl_dyn_castf; + case HF64: + return hl_dyn_castd; + case HI64: + case HGUID: + return hl_dyn_casti64; + case HI32: + case HUI16: + case HUI8: + case HBOOL: + return hl_dyn_casti; + default: + return hl_dyn_castp; + } +} + +static void *get_dynset( hl_type *t ) { + switch( t->kind ) { + case HF32: + return hl_dyn_setf; + case HF64: + return hl_dyn_setd; + case HI64: + case HGUID: + return hl_dyn_seti64; + case HI32: + case HUI16: + case HUI8: + case HBOOL: + return hl_dyn_seti; + default: + return hl_dyn_setp; + } +} + +static void *get_dynget( hl_type *t ) { + switch( t->kind ) { + case HF32: + return hl_dyn_getf; + case HF64: + return hl_dyn_getd; + case HI64: + case HGUID: + return hl_dyn_geti64; + case HI32: + case HUI16: + case HUI8: + case HBOOL: + return hl_dyn_geti; + default: + return hl_dyn_getp; + } +} + +static double uint_to_double( unsigned int v ) { + return v; +} + +static vclosure *alloc_static_closure( jit_ctx *ctx, int fid ) { + hl_module *m = ctx->m; + vclosure *c = hl_malloc(&m->ctx.alloc,sizeof(vclosure)); + int fidx = m->functions_indexes[fid]; + c->hasValue = 0; + if( fidx >= m->code->nfunctions ) { + // native + c->t = m->code->natives[fidx - m->code->nfunctions].t; + c->fun = m->functions_ptrs[fid]; + c->value = NULL; + } else { + c->t = m->code->functions[fidx].type; + c->fun = (void*)(int_val)fid; + c->value = ctx->closure_list; + ctx->closure_list = c; + } + return c; +} + +static void make_dyn_cast( jit_ctx *ctx, vreg *dst, vreg *v ) { + int size; + preg p; + preg *tmp; + if( v->t->kind == HNULL && v->t->tparam->kind == dst->t->kind ) { + int jnull, jend; + preg *out; + switch( dst->t->kind ) { + case HUI8: + case HUI16: + case HI32: + case HBOOL: + case HI64: + case HGUID: + tmp = alloc_cpu(ctx, v, true); + op64(ctx, TEST, tmp, tmp); + XJump_small(JZero, jnull); + op64(ctx, MOV, tmp, pmem(&p,tmp->id,8)); + XJump_small(JAlways, jend); + patch_jump(ctx, jnull); + op64(ctx, XOR, tmp, tmp); + patch_jump(ctx, jend); + store(ctx, dst, tmp, true); + return; + case HF32: + case HF64: + tmp = alloc_cpu(ctx, v, true); + out = alloc_fpu(ctx, dst, false); + op64(ctx, TEST, tmp, tmp); + XJump_small(JZero, jnull); + op64(ctx, dst->t->kind == HF32 ? MOVSS : MOVSD, out, pmem(&p,tmp->id,8)); + XJump_small(JAlways, jend); + patch_jump(ctx, jnull); + op64(ctx, XORPD, out, out); + patch_jump(ctx, jend); + store(ctx, dst, out, true); + return; + default: + break; + } + } + switch( dst->t->kind ) { + case HF32: + case HF64: + case HI64: + case HGUID: + size = begin_native_call(ctx, 2); + set_native_arg(ctx, pconst64(&p,(int_val)v->t)); + break; + default: + size = begin_native_call(ctx, 3); + set_native_arg(ctx, pconst64(&p,(int_val)dst->t)); + set_native_arg(ctx, pconst64(&p,(int_val)v->t)); + break; + } + tmp = alloc_native_arg(ctx); + op64(ctx,MOV,tmp,REG_AT(Ebp)); + if( v->stackPos >= 0 ) + op64(ctx,ADD,tmp,pconst(&p,v->stackPos)); + else + op64(ctx,SUB,tmp,pconst(&p,-v->stackPos)); + set_native_arg(ctx,tmp); + call_native(ctx,get_dyncast(dst->t),size); + store_result(ctx, dst); +} + +int hl_jit_function( jit_ctx *ctx, hl_module *m, hl_function *f ) { + int i, size = 0, opCount; + int codePos = BUF_POS(); + int nargs = f->type->fun->nargs; + unsigned short *debug16 = NULL; + int *debug32 = NULL; + call_regs cregs = {0}; + hl_thread_info *tinf = NULL; + preg p; + ctx->f = f; + ctx->allocOffset = 0; + if( f->nregs > ctx->maxRegs ) { + free(ctx->vregs); + ctx->vregs = (vreg*)malloc(sizeof(vreg) * (f->nregs + 1)); + if( ctx->vregs == NULL ) { + ctx->maxRegs = 0; + return -1; + } + ctx->maxRegs = f->nregs; + } + if( f->nops > ctx->maxOps ) { + free(ctx->opsPos); + ctx->opsPos = (int*)malloc(sizeof(int) * (f->nops + 1)); + if( ctx->opsPos == NULL ) { + ctx->maxOps = 0; + return -1; + } + ctx->maxOps = f->nops; + } + memset(ctx->opsPos,0,(f->nops+1)*sizeof(int)); + for(i=0;inregs;i++) { + vreg *r = R(i); + r->t = f->regs[i]; + r->size = hl_type_size(r->t); + r->current = NULL; + r->stack.holds = NULL; + r->stack.id = i; + r->stack.kind = RSTACK; + } + size = 0; + int argsSize = 0; + for(i=0;it,i); + if( creg < 0 || IS_WINCALL64 ) { + // use existing stack storage + r->stackPos = argsSize + HL_WSIZE * 2; + argsSize += stack_size(r->t); + } else { + // make room in local vars + size += r->size; + size += hl_pad_size(size,r->t); + r->stackPos = -size; + } + } + for(i=nargs;inregs;i++) { + vreg *r = R(i); + size += r->size; + size += hl_pad_size(size,r->t); // align local vars + r->stackPos = -size; + } +# ifdef HL_64 + size += (-size) & 15; // align on 16 bytes +# else + size += hl_pad_size(size,&hlt_dyn); // align on word size +# endif + ctx->totalRegsSize = size; + jit_buf(ctx); + ctx->functionPos = BUF_POS(); + // make sure currentPos is > 0 before any reg allocations happen + // otherwise `alloc_reg` thinks that all registers are locked + ctx->currentPos = 1; + op_enter(ctx); +# ifdef HL_64 + { + // store in local var + for(i=0;isize); + p->holds = r; + r->current = p; + } + } +# endif + if( ctx->m->code->hasdebug ) { + debug16 = (unsigned short*)malloc(sizeof(unsigned short) * (f->nops + 1)); + debug16[0] = (unsigned short)(BUF_POS() - codePos); + } + ctx->opsPos[0] = BUF_POS(); + + for(opCount=0;opCountnops;opCount++) { + int jump; + hl_opcode *o = f->ops + opCount; + vreg *dst = R(o->p1); + vreg *ra = R(o->p2); + vreg *rb = R(o->p3); + ctx->currentPos = opCount + 1; + jit_buf(ctx); +# ifdef JIT_DEBUG + if( opCount == 0 || f->ops[opCount-1].op != OAsm ) { + int uid = opCount + (f->findex<<16); + op32(ctx, PUSH, pconst(&p,uid), UNUSED); + op64(ctx, ADD, PESP, pconst(&p,HL_WSIZE)); + } +# endif + // emit code + switch( o->op ) { + case OMov: + case OUnsafeCast: + op_mov(ctx, dst, ra); + break; + case OInt: + store_const(ctx, dst, m->code->ints[o->p2]); + break; + case OBool: + store_const(ctx, dst, o->p2); + break; + case OGetGlobal: + { + void *addr = m->globals_data + m->globals_indexes[o->p2]; +# ifdef HL_64 + preg *tmp = alloc_reg(ctx, RCPU); + op64(ctx, MOV, tmp, pconst64(&p,(int_val)addr)); + copy_to(ctx, dst, pmem(&p,tmp->id,0)); +# else + copy_to(ctx, dst, paddr(&p,addr)); +# endif + } + break; + case OSetGlobal: + { + void *addr = m->globals_data + m->globals_indexes[o->p1]; +# ifdef HL_64 + preg *tmp = alloc_reg(ctx, RCPU); + op64(ctx, MOV, tmp, pconst64(&p,(int_val)addr)); + copy_from(ctx, pmem(&p,tmp->id,0), ra); +# else + copy_from(ctx, paddr(&p,addr), ra); +# endif + } + break; + case OCall3: + { + int args[3] = { o->p3, o->extra[0], o->extra[1] }; + op_call_fun(ctx, dst, o->p2, 3, args); + } + break; + case OCall4: + { + int args[4] = { o->p3, o->extra[0], o->extra[1], o->extra[2] }; + op_call_fun(ctx, dst, o->p2, 4, args); + } + break; + case OCallN: + op_call_fun(ctx, dst, o->p2, o->p3, o->extra); + break; + case OCall0: + op_call_fun(ctx, dst, o->p2, 0, NULL); + break; + case OCall1: + op_call_fun(ctx, dst, o->p2, 1, &o->p3); + break; + case OCall2: + { + int args[2] = { o->p3, (int)(int_val)o->extra }; + op_call_fun(ctx, dst, o->p2, 2, args); + } + break; + case OSub: + case OAdd: + case OMul: + case OSDiv: + case OUDiv: + case OShl: + case OSShr: + case OUShr: + case OAnd: + case OOr: + case OXor: + case OSMod: + case OUMod: + op_binop(ctx, dst, ra, rb, o->op); + break; + case ONeg: + { + if( IS_FLOAT(ra) ) { + preg *pa = alloc_reg(ctx,RFPU); + preg *pb = alloc_fpu(ctx,ra,true); + op64(ctx,XORPD,pa,pa); + op64(ctx,ra->t->kind == HF32 ? SUBSS : SUBSD,pa,pb); + store(ctx,dst,pa,true); + } else if( ra->t->kind == HI64 ) { +# ifdef HL_64 + preg *pa = alloc_reg(ctx,RCPU); + preg *pb = alloc_cpu(ctx,ra,true); + op64(ctx,XOR,pa,pa); + op64(ctx,SUB,pa,pb); + store(ctx,dst,pa,true); +# else + error_i64(); +# endif + } else { + preg *pa = alloc_reg(ctx,RCPU); + preg *pb = alloc_cpu(ctx,ra,true); + op32(ctx,XOR,pa,pa); + op32(ctx,SUB,pa,pb); + store(ctx,dst,pa,true); + } + } + break; + case ONot: + { + preg *v = alloc_cpu(ctx,ra,true); + op32(ctx,XOR,v,pconst(&p,1)); + store(ctx,dst,v,true); + } + break; + case OJFalse: + case OJTrue: + case OJNotNull: + case OJNull: + { + preg *r = dst->t->kind == HBOOL ? alloc_cpu8(ctx, dst, true) : alloc_cpu(ctx, dst, true); + op64(ctx, dst->t->kind == HBOOL ? TEST8 : TEST, r, r); + XJump( o->op == OJFalse || o->op == OJNull ? JZero : JNotZero,jump); + register_jump(ctx,jump,(opCount + 1) + o->p2); + } + break; + case OJEq: + case OJNotEq: + case OJSLt: + case OJSGte: + case OJSLte: + case OJSGt: + case OJULt: + case OJUGte: + case OJNotLt: + case OJNotGte: + op_jump(ctx,dst,ra,o,(opCount + 1) + o->p3); + break; + case OJAlways: + jump = do_jump(ctx,o->op,false); + register_jump(ctx,jump,(opCount + 1) + o->p1); + break; + case OToDyn: + if( ra->t->kind == HBOOL ) { + int size = begin_native_call(ctx, 1); + set_native_arg(ctx, fetch(ra)); + call_native(ctx, hl_alloc_dynbool, size); + store(ctx, dst, PEAX, true); + } else { + int_val rt = (int_val)ra->t; + int jskip = 0; + if( hl_is_ptr(ra->t) ) { + int jnz; + preg *a = alloc_cpu(ctx,ra,true); + op64(ctx,TEST,a,a); + XJump_small(JNotZero,jnz); + op64(ctx,XOR,PEAX,PEAX); // will replace the result of alloc_dynamic at jump land + XJump_small(JAlways,jskip); + patch_jump(ctx,jnz); + } + call_native_consts(ctx, hl_alloc_dynamic, &rt, 1); + // copy value to dynamic + if( (IS_FLOAT(ra) || ra->size == 8) && !IS_64 ) { + preg *tmp = REG_AT(RCPU_SCRATCH_REGS[1]); + op64(ctx,MOV,tmp,&ra->stack); + op32(ctx,MOV,pmem(&p,Eax,HDYN_VALUE),tmp); + if( ra->t->kind == HF64 ) { + ra->stackPos += 4; + op64(ctx,MOV,tmp,&ra->stack); + op32(ctx,MOV,pmem(&p,Eax,HDYN_VALUE+4),tmp); + ra->stackPos -= 4; + } + } else { + preg *tmp = REG_AT(RCPU_SCRATCH_REGS[1]); + copy_from(ctx,tmp,ra); + op64(ctx,MOV,pmem(&p,Eax,HDYN_VALUE),tmp); + } + if( hl_is_ptr(ra->t) ) patch_jump(ctx,jskip); + store(ctx, dst, PEAX, true); + } + break; + case OToSFloat: + if( ra == dst ) break; + if (ra->t->kind == HI32 || ra->t->kind == HUI16 || ra->t->kind == HUI8) { + preg* r = alloc_cpu(ctx, ra, true); + preg* w = alloc_fpu(ctx, dst, false); + op32(ctx, dst->t->kind == HF64 ? CVTSI2SD : CVTSI2SS, w, r); + store(ctx, dst, w, true); + } else if (ra->t->kind == HI64 ) { + preg* r = alloc_cpu(ctx, ra, true); + preg* w = alloc_fpu(ctx, dst, false); + op64(ctx, dst->t->kind == HF64 ? CVTSI2SD : CVTSI2SS, w, r); + store(ctx, dst, w, true); + } else if( ra->t->kind == HF64 && dst->t->kind == HF32 ) { + preg *r = alloc_fpu(ctx,ra,true); + preg *w = alloc_fpu(ctx,dst,false); + op32(ctx,CVTSD2SS,w,r); + store(ctx, dst, w, true); + } else if( ra->t->kind == HF32 && dst->t->kind == HF64 ) { + preg *r = alloc_fpu(ctx,ra,true); + preg *w = alloc_fpu(ctx,dst,false); + op32(ctx,CVTSS2SD,w,r); + store(ctx, dst, w, true); + } else + ASSERT(0); + break; + case OToUFloat: + { + int size; + size = prepare_call_args(ctx,1,&o->p2,ctx->vregs,0); + call_native(ctx,uint_to_double,size); + store_result(ctx,dst); + } + break; + case OToInt: + if( ra == dst ) break; + if( ra->t->kind == HF64 ) { + preg *r = alloc_fpu(ctx,ra,true); + preg *w = alloc_cpu(ctx,dst,false); + preg *tmp = alloc_reg(ctx,RCPU); + op32(ctx,STMXCSR,pmem(&p,Esp,-4),UNUSED); + op32(ctx,MOV,tmp,&p); + op32(ctx,OR,tmp,pconst(&p,0x6000)); // set round towards 0 + op32(ctx,MOV,pmem(&p,Esp,-8),tmp); + op32(ctx,LDMXCSR,&p,UNUSED); + op32(ctx,CVTSD2SI,w,r); + op32(ctx,LDMXCSR,pmem(&p,Esp,-4),UNUSED); + store(ctx, dst, w, true); + } else if (ra->t->kind == HF32) { + preg *r = alloc_fpu(ctx, ra, true); + preg *w = alloc_cpu(ctx, dst, false); + preg *tmp = alloc_reg(ctx, RCPU); + op32(ctx, STMXCSR, pmem(&p, Esp, -4), UNUSED); + op32(ctx, MOV, tmp, &p); + op32(ctx, OR, tmp, pconst(&p, 0x6000)); // set round towards 0 + op32(ctx, MOV, pmem(&p, Esp, -8), tmp); + op32(ctx, LDMXCSR, &p, UNUSED); + op32(ctx, CVTSS2SI, w, r); + op32(ctx, LDMXCSR, pmem(&p, Esp, -4), UNUSED); + store(ctx, dst, w, true); + } else if( (dst->t->kind == HI64 || dst->t->kind == HGUID) && ra->t->kind == HI32 ) { + if( ra->current != PEAX ) { + op32(ctx, MOV, PEAX, fetch(ra)); + scratch(PEAX); + } +# ifdef HL_64 + op64(ctx, CDQE, UNUSED, UNUSED); // sign-extend Eax into Rax + store(ctx, dst, PEAX, true); +# else + op32(ctx, CDQ, UNUSED, UNUSED); // sign-extend Eax into Eax:Edx + scratch(REG_AT(Edx)); + op32(ctx, MOV, fetch(dst), PEAX); + dst->stackPos += 4; + op32(ctx, MOV, fetch(dst), REG_AT(Edx)); + dst->stackPos -= 4; + } else if( dst->t->kind == HI32 && ra->t->kind == HI64 ) { + error_i64(); +# endif + } else { + preg *r = alloc_cpu(ctx,dst,false); + copy_from(ctx, r, ra); + store(ctx, dst, r, true); + } + break; + case ORet: + op_ret(ctx, dst); + break; + case OIncr: + { + if( IS_FLOAT(dst) ) { + ASSERT(0); + } else { + preg *v = fetch32(ctx,dst); + op32(ctx,INC,v,UNUSED); + if( v->kind != RSTACK ) store(ctx, dst, v, false); + } + } + break; + case ODecr: + { + if( IS_FLOAT(dst) ) { + ASSERT(0); + } else { + preg *v = fetch32(ctx,dst); + op32(ctx,DEC,v,UNUSED); + if( v->kind != RSTACK ) store(ctx, dst, v, false); + } + } + break; + case OFloat: + { + if( m->code->floats[o->p2] == 0 ) { + preg *f = alloc_fpu(ctx,dst,false); + op64(ctx,XORPD,f,f); + } else switch( dst->t->kind ) { + case HF64: + case HF32: +# ifdef HL_64 + op64(ctx,dst->t->kind == HF32 ? CVTSD2SS : MOVSD,alloc_fpu(ctx,dst,false),pcodeaddr(&p,o->p2 * 8)); +# else + op64(ctx,dst->t->kind == HF32 ? MOVSS : MOVSD,alloc_fpu(ctx,dst,false),paddr(&p,m->code->floats + o->p2)); +# endif + break; + default: + ASSERT(dst->t->kind); + } + store(ctx,dst,dst->current,false); + } + break; + case OString: + op64(ctx,MOV,alloc_cpu(ctx, dst, false),pconst64(&p,(int_val)hl_get_ustring(m->code,o->p2))); + store(ctx,dst,dst->current,false); + break; + case OBytes: + { + char *b = m->code->version >= 5 ? m->code->bytes + m->code->bytes_pos[o->p2] : m->code->strings[o->p2]; + op64(ctx,MOV,alloc_cpu(ctx,dst,false),pconst64(&p,(int_val)b)); + store(ctx,dst,dst->current,false); + } + break; + case ONull: + { + op64(ctx,XOR,alloc_cpu(ctx, dst, false),alloc_cpu(ctx, dst, false)); + store(ctx,dst,dst->current,false); + } + break; + case ONew: + { + int_val args[] = { (int_val)dst->t }; + void *allocFun; + int nargs = 1; + switch( dst->t->kind ) { + case HOBJ: + case HSTRUCT: + allocFun = hl_alloc_obj; + break; + case HDYNOBJ: + allocFun = hl_alloc_dynobj; + nargs = 0; + break; + case HVIRTUAL: + allocFun = hl_alloc_virtual; + break; + default: + ASSERT(dst->t->kind); + } + call_native_consts(ctx, allocFun, args, nargs); + store(ctx, dst, PEAX, true); + } + break; + case OInstanceClosure: + { + preg *r = alloc_cpu(ctx, rb, true); + jlist *j = (jlist*)hl_malloc(&ctx->galloc,sizeof(jlist)); + int size = begin_native_call(ctx,3); + set_native_arg(ctx,r); + + j->pos = BUF_POS(); + j->target = o->p2; + j->next = ctx->calls; + ctx->calls = j; + + set_native_arg(ctx,pconst64(&p,RESERVE_ADDRESS)); + set_native_arg(ctx,pconst64(&p,(int_val)m->code->functions[m->functions_indexes[o->p2]].type)); + call_native(ctx,hl_alloc_closure_ptr,size); + store(ctx,dst,PEAX,true); + } + break; + case OVirtualClosure: + { + int size, i; + preg *r = alloc_cpu_call(ctx, ra); + hl_type *t = NULL; + hl_type *ot = ra->t; + while( t == NULL ) { + for(i=0;iobj->nproto;i++) { + hl_obj_proto *pp = ot->obj->proto + i; + if( pp->pindex == o->p3 ) { + t = m->code->functions[m->functions_indexes[pp->findex]].type; + break; + } + } + ot = ot->obj->super; + } + size = begin_native_call(ctx,3); + set_native_arg(ctx,r); + // read r->type->vobj_proto[i] for function address + op64(ctx,MOV,r,pmem(&p,r->id,0)); + op64(ctx,MOV,r,pmem(&p,r->id,HL_WSIZE*2)); + op64(ctx,MOV,r,pmem(&p,r->id,HL_WSIZE*o->p3)); + set_native_arg(ctx,r); + op64(ctx,MOV,r,pconst64(&p,(int_val)t)); + set_native_arg(ctx,r); + call_native(ctx,hl_alloc_closure_ptr,size); + store(ctx,dst,PEAX,true); + } + break; + case OCallClosure: + if( ra->t->kind == HDYN ) { + // ASM for { + // vdynamic *args[] = {args}; + // vdynamic *ret = hl_dyn_call(closure,args,nargs); + // dst = hl_dyncast(ret,t_dynamic,t_dst); + // } + int offset = o->p3 * HL_WSIZE; + preg *r = alloc_reg(ctx, RCPU_CALL); + if( offset & 15 ) offset += 16 - (offset & 15); + op64(ctx,SUB,PESP,pconst(&p,offset)); + op64(ctx,MOV,r,PESP); + for(i=0;ip3;i++) { + vreg *a = R(o->extra[i]); + if( !hl_is_dynamic(a->t) ) ASSERT(0); + preg *v = alloc_cpu(ctx,a,true); + op64(ctx,MOV,pmem(&p,r->id,i * HL_WSIZE),v); + RUNLOCK(v); + } +# ifdef HL_64 + int size = begin_native_call(ctx, 3) + offset; + set_native_arg(ctx, pconst(&p,o->p3)); + set_native_arg(ctx, r); + set_native_arg(ctx, fetch(ra)); +# else + int size = pad_before_call(ctx,HL_WSIZE*2 + sizeof(int) + offset); + op64(ctx,PUSH,pconst(&p,o->p3),UNUSED); + op64(ctx,PUSH,r,UNUSED); + op64(ctx,PUSH,alloc_cpu(ctx,ra,true),UNUSED); +# endif + call_native(ctx,hl_dyn_call,size); + if( dst->t->kind != HVOID ) { + store(ctx,dst,PEAX,true); + make_dyn_cast(ctx,dst,dst); + } + } else { + int jhasvalue, jend, size; + // ASM for if( c->hasValue ) c->fun(value,args) else c->fun(args) + preg *r = alloc_cpu(ctx,ra,true); + preg *tmp = alloc_reg(ctx, RCPU); + op32(ctx,MOV,tmp,pmem(&p,r->id,HL_WSIZE*2)); + op32(ctx,TEST,tmp,tmp); + scratch(tmp); + XJump_small(JNotZero,jhasvalue); + save_regs(ctx); + size = prepare_call_args(ctx,o->p3,o->extra,ctx->vregs,0); + preg *rr = r; + if( rr->holds != ra ) rr = alloc_cpu(ctx, ra, true); + op_call(ctx, pmem(&p,rr->id,HL_WSIZE), size); + XJump_small(JAlways,jend); + patch_jump(ctx,jhasvalue); + restore_regs(ctx); +# ifdef HL_64 + { + int regids[64]; + preg *pc = REG_AT(CALL_REGS[0]); + vreg *sc = R(f->nregs); // scratch register that we temporary rebind + if( o->p3 >= 63 ) jit_error("assert"); + memcpy(regids + 1, o->extra, o->p3 * sizeof(int)); + regids[0] = f->nregs; + sc->size = HL_WSIZE; + sc->t = &hlt_dyn; + op64(ctx, MOV, pc, pmem(&p,r->id,HL_WSIZE*3)); + scratch(pc); + sc->current = pc; + pc->holds = sc; + size = prepare_call_args(ctx,o->p3 + 1,regids,ctx->vregs,0); + if( r->holds != ra ) r = alloc_cpu(ctx, ra, true); + } +# else + size = prepare_call_args(ctx,o->p3,o->extra,ctx->vregs,HL_WSIZE); + if( r->holds != ra ) r = alloc_cpu(ctx, ra, true); + op64(ctx, PUSH,pmem(&p,r->id,HL_WSIZE*3),UNUSED); // push closure value +# endif + op_call(ctx, pmem(&p,r->id,HL_WSIZE), size); + discard_regs(ctx,false); + patch_jump(ctx,jend); + store_result(ctx, dst); + } + break; + case OStaticClosure: + { + vclosure *c = alloc_static_closure(ctx,o->p2); + preg *r = alloc_reg(ctx, RCPU); + op64(ctx, MOV, r, pconst64(&p,(int_val)c)); + store(ctx,dst,r,true); + } + break; + case OField: + { +# ifndef HL_64 + if( dst->t->kind == HI64 ) { + error_i64(); + break; + } +# endif + switch( ra->t->kind ) { + case HOBJ: + case HSTRUCT: + { + hl_runtime_obj *rt = hl_get_obj_rt(ra->t); + preg *rr = alloc_cpu(ctx,ra, true); + if( dst->t->kind == HSTRUCT ) { + hl_type *ft = hl_obj_field_fetch(ra->t,o->p3)->t; + if( ft->kind == HPACKED ) { + preg *r = alloc_reg(ctx,RCPU); + op64(ctx,LEA,r,pmem(&p,(CpuReg)rr->id,rt->fields_indexes[o->p3])); + store(ctx,dst,r,true); + break; + } + } + copy_to(ctx,dst,pmem(&p, (CpuReg)rr->id, rt->fields_indexes[o->p3])); + } + break; + case HVIRTUAL: + // ASM for --> if( hl_vfields(o)[f] ) r = *hl_vfields(o)[f]; else r = hl_dyn_get(o,hash(field),vt) + { + int jhasfield, jend, size; + bool need_type = !(IS_FLOAT(dst) || dst->t->kind == HI64); + preg *v = alloc_cpu_call(ctx,ra); + preg *r = alloc_reg(ctx,RCPU); + op64(ctx,MOV,r,pmem(&p,v->id,sizeof(vvirtual)+HL_WSIZE*o->p3)); + op64(ctx,TEST,r,r); + XJump_small(JNotZero,jhasfield); + size = begin_native_call(ctx, need_type ? 3 : 2); + if( need_type ) set_native_arg(ctx,pconst64(&p,(int_val)dst->t)); + set_native_arg(ctx,pconst64(&p,(int_val)ra->t->virt->fields[o->p3].hashed_name)); + set_native_arg(ctx,v); + call_native(ctx,get_dynget(dst->t),size); + store_result(ctx,dst); + XJump_small(JAlways,jend); + patch_jump(ctx,jhasfield); + copy_to(ctx, dst, pmem(&p,(CpuReg)r->id,0)); + patch_jump(ctx,jend); + scratch(dst->current); + } + break; + default: + ASSERT(ra->t->kind); + break; + } + } + break; + case OSetField: + { + switch( dst->t->kind ) { + case HOBJ: + case HSTRUCT: + { + hl_runtime_obj *rt = hl_get_obj_rt(dst->t); + preg *rr = alloc_cpu(ctx, dst, true); + if( rb->t->kind == HSTRUCT ) { + hl_type *ft = hl_obj_field_fetch(dst->t,o->p2)->t; + if( ft->kind == HPACKED ) { + hl_runtime_obj *frt = hl_get_obj_rt(ft->tparam); + preg *prb = alloc_cpu(ctx, rb, true); + preg *tmp = alloc_reg(ctx, RCPU_CALL); + int offset = 0; + while( offset < frt->size ) { + int remain = frt->size - offset; + int copy_size = remain >= HL_WSIZE ? HL_WSIZE : (remain >= 4 ? 4 : (remain >= 2 ? 2 : 1)); + copy(ctx, tmp, pmem(&p, (CpuReg)prb->id, offset), copy_size); + copy(ctx, pmem(&p, (CpuReg)rr->id, rt->fields_indexes[o->p2]+offset), tmp, copy_size); + offset += copy_size; + } + break; + } + } + copy_from(ctx, pmem(&p, (CpuReg)rr->id, rt->fields_indexes[o->p2]), rb); + } + break; + case HVIRTUAL: + // ASM for --> if( hl_vfields(o)[f] ) *hl_vfields(o)[f] = v; else hl_dyn_set(o,hash(field),vt,v) + { + int jhasfield, jend; + preg *obj = alloc_cpu_call(ctx,dst); + preg *r = alloc_reg(ctx,RCPU); + op64(ctx,MOV,r,pmem(&p,obj->id,sizeof(vvirtual)+HL_WSIZE*o->p2)); + op64(ctx,TEST,r,r); + XJump_small(JNotZero,jhasfield); +# ifdef HL_64 + switch( rb->t->kind ) { + case HF64: + case HF32: + size = begin_native_call(ctx,3); + set_native_arg_fpu(ctx, fetch(rb), rb->t->kind == HF32); + break; + case HI64: + case HGUID: + size = begin_native_call(ctx,3); + set_native_arg(ctx, fetch(rb)); + break; + default: + size = begin_native_call(ctx, 4); + set_native_arg(ctx, fetch(rb)); + set_native_arg(ctx, pconst64(&p,(int_val)rb->t)); + break; + } + set_native_arg(ctx,pconst(&p,dst->t->virt->fields[o->p2].hashed_name)); + set_native_arg(ctx,obj); +# else + switch( rb->t->kind ) { + case HF64: + case HI64: + case HGUID: + size = pad_before_call(ctx,HL_WSIZE*2 + sizeof(double)); + push_reg(ctx,rb); + break; + case HF32: + size = pad_before_call(ctx,HL_WSIZE*2 + sizeof(float)); + push_reg(ctx,rb); + break; + default: + size = pad_before_call(ctx,HL_WSIZE*4); + op64(ctx,PUSH,fetch32(ctx,rb),UNUSED); + op64(ctx,MOV,r,pconst64(&p,(int_val)rb->t)); + op64(ctx,PUSH,r,UNUSED); + break; + } + op32(ctx,MOV,r,pconst(&p,dst->t->virt->fields[o->p2].hashed_name)); + op64(ctx,PUSH,r,UNUSED); + op64(ctx,PUSH,obj,UNUSED); +# endif + call_native(ctx,get_dynset(rb->t),size); + XJump_small(JAlways,jend); + patch_jump(ctx,jhasfield); + copy_from(ctx, pmem(&p,(CpuReg)r->id,0), rb); + patch_jump(ctx,jend); + scratch(rb->current); + } + break; + default: + ASSERT(dst->t->kind); + break; + } + } + break; + case OGetThis: + { + vreg *r = R(0); + hl_runtime_obj *rt = hl_get_obj_rt(r->t); + preg *rr = alloc_cpu(ctx,r, true); + if( dst->t->kind == HSTRUCT ) { + hl_type *ft = hl_obj_field_fetch(r->t,o->p2)->t; + if( ft->kind == HPACKED ) { + preg *r = alloc_reg(ctx,RCPU); + op64(ctx,LEA,r,pmem(&p,(CpuReg)rr->id,rt->fields_indexes[o->p2])); + store(ctx,dst,r,true); + break; + } + } + copy_to(ctx,dst,pmem(&p, (CpuReg)rr->id, rt->fields_indexes[o->p2])); + } + break; + case OSetThis: + { + vreg *r = R(0); + hl_runtime_obj *rt = hl_get_obj_rt(r->t); + preg *rr = alloc_cpu(ctx, r, true); + if( ra->t->kind == HSTRUCT ) { + hl_type *ft = hl_obj_field_fetch(r->t,o->p1)->t; + if( ft->kind == HPACKED ) { + hl_runtime_obj *frt = hl_get_obj_rt(ft->tparam); + preg *pra = alloc_cpu(ctx, ra, true); + preg *tmp = alloc_reg(ctx, RCPU_CALL); + int offset = 0; + while( offset < frt->size ) { + int remain = frt->size - offset; + int copy_size = remain >= HL_WSIZE ? HL_WSIZE : (remain >= 4 ? 4 : (remain >= 2 ? 2 : 1)); + copy(ctx, tmp, pmem(&p, (CpuReg)pra->id, offset), copy_size); + copy(ctx, pmem(&p, (CpuReg)rr->id, rt->fields_indexes[o->p1]+offset), tmp, copy_size); + offset += copy_size; + } + break; + } + } + copy_from(ctx, pmem(&p, (CpuReg)rr->id, rt->fields_indexes[o->p1]), ra); + } + break; + case OCallThis: + { + int nargs = o->p3 + 1; + int *args = (int*)hl_malloc(&ctx->falloc,sizeof(int) * nargs); + int size; + preg *r = alloc_cpu(ctx, R(0), true); + preg *tmp; + tmp = alloc_reg(ctx, RCPU_CALL); + op64(ctx,MOV,tmp,pmem(&p,r->id,0)); // read type + op64(ctx,MOV,tmp,pmem(&p,tmp->id,HL_WSIZE*2)); // read proto + args[0] = 0; + for(i=1;iextra[i-1]; + size = prepare_call_args(ctx,nargs,args,ctx->vregs,0); + op_call(ctx,pmem(&p,tmp->id,o->p2*HL_WSIZE),size); + discard_regs(ctx, false); + store_result(ctx, dst); + } + break; + case OCallMethod: + switch( R(o->extra[0])->t->kind ) { + case HOBJ: { + int size; + preg *r = alloc_cpu(ctx, R(o->extra[0]), true); + preg *tmp; + tmp = alloc_reg(ctx, RCPU_CALL); + op64(ctx,MOV,tmp,pmem(&p,r->id,0)); // read type + op64(ctx,MOV,tmp,pmem(&p,tmp->id,HL_WSIZE*2)); // read proto + size = prepare_call_args(ctx,o->p3,o->extra,ctx->vregs,0); + op_call(ctx,pmem(&p,tmp->id,o->p2*HL_WSIZE),size); + discard_regs(ctx, false); + store_result(ctx, dst); + break; + } + case HVIRTUAL: + // ASM for --> if( hl_vfields(o)[f] ) dst = *hl_vfields(o)[f](o->value,args...); else dst = hl_dyn_call_obj(o->value,field,args,&ret) + { + int size; + int paramsSize; + int jhasfield, jend; + bool need_dyn; + bool obj_in_args = false; + vreg *obj = R(o->extra[0]); + preg *v = alloc_cpu_call(ctx,obj); + preg *r = alloc_reg(ctx,RCPU_CALL); + op64(ctx,MOV,r,pmem(&p,v->id,sizeof(vvirtual)+HL_WSIZE*o->p2)); + op64(ctx,TEST,r,r); + save_regs(ctx); + + if( o->p3 < 6 ) { + XJump_small(JNotZero,jhasfield); + } else { + XJump(JNotZero,jhasfield); + } + + need_dyn = !hl_is_ptr(dst->t) && dst->t->kind != HVOID; + paramsSize = (o->p3 - 1) * HL_WSIZE; + if( need_dyn ) paramsSize += sizeof(vdynamic); + if( paramsSize & 15 ) paramsSize += 16 - (paramsSize&15); + op64(ctx,SUB,PESP,pconst(&p,paramsSize)); + op64(ctx,MOV,r,PESP); + + for(i=0;ip3-1;i++) { + vreg *a = R(o->extra[i+1]); + if( hl_is_ptr(a->t) ) { + op64(ctx,MOV,pmem(&p,r->id,i*HL_WSIZE),alloc_cpu(ctx,a,true)); + if( a->current != v ) { + RUNLOCK(a->current); + } else + obj_in_args = true; + } else { + preg *r2 = alloc_reg(ctx,RCPU); + op64(ctx,LEA,r2,&a->stack); + op64(ctx,MOV,pmem(&p,r->id,i*HL_WSIZE),r2); + if( r2 != v ) RUNLOCK(r2); + } + } + + jit_buf(ctx); + + if( !need_dyn ) { + size = begin_native_call(ctx, 5); + set_native_arg(ctx, pconst(&p,0)); + } else { + preg *rtmp = alloc_reg(ctx,RCPU); + op64(ctx,LEA,rtmp,pmem(&p,Esp,paramsSize - sizeof(vdynamic))); + size = begin_native_call(ctx, 5); + set_native_arg(ctx,rtmp); + if( !IS_64 ) RUNLOCK(rtmp); + } + set_native_arg(ctx,r); + set_native_arg(ctx,pconst(&p,obj->t->virt->fields[o->p2].hashed_name)); // fid + set_native_arg(ctx,pconst64(&p,(int_val)obj->t->virt->fields[o->p2].t)); // ftype + set_native_arg(ctx,pmem(&p,v->id,HL_WSIZE)); // o->value + call_native(ctx,hl_dyn_call_obj,size + paramsSize); + if( need_dyn ) { + preg *r = IS_FLOAT(dst) ? REG_AT(XMM(0)) : PEAX; + copy(ctx,r,pmem(&p,Esp,HDYN_VALUE - (int)sizeof(vdynamic)),dst->size); + store(ctx, dst, r, false); + } else + store(ctx, dst, PEAX, false); + + XJump_small(JAlways,jend); + patch_jump(ctx,jhasfield); + restore_regs(ctx); + + if( !obj_in_args ) { + // o = o->value hack + if( v->holds ) v->holds->current = NULL; + obj->current = v; + v->holds = obj; + op64(ctx,MOV,v,pmem(&p,v->id,HL_WSIZE)); + size = prepare_call_args(ctx,o->p3,o->extra,ctx->vregs,0); + } else { + // keep o->value in R(f->nregs) + int regids[64]; + preg *pc = alloc_reg(ctx,RCPU_CALL); + vreg *sc = R(f->nregs); // scratch register that we temporary rebind + if( o->p3 >= 63 ) jit_error("assert"); + memcpy(regids, o->extra, o->p3 * sizeof(int)); + regids[0] = f->nregs; + sc->size = HL_WSIZE; + sc->t = &hlt_dyn; + op64(ctx, MOV, pc, pmem(&p,v->id,HL_WSIZE)); + scratch(pc); + sc->current = pc; + pc->holds = sc; + size = prepare_call_args(ctx,o->p3,regids,ctx->vregs,0); + } + + op_call(ctx,r,size); + discard_regs(ctx, false); + store_result(ctx, dst); + patch_jump(ctx,jend); + } + break; + default: + ASSERT(0); + break; + } + break; + case ORethrow: + { + int size = prepare_call_args(ctx,1,&o->p1,ctx->vregs,0); + call_native(ctx,hl_rethrow,size); + } + break; + case OThrow: + { + int size = prepare_call_args(ctx,1,&o->p1,ctx->vregs,0); + call_native(ctx,hl_throw,size); + } + break; + case OLabel: + // NOP for now + discard_regs(ctx,false); + break; + case OGetI8: + case OGetI16: + { + preg *base = alloc_cpu(ctx, ra, true); + preg *offset = alloc_cpu64(ctx, rb, true); + preg *r = alloc_reg(ctx,o->op == OGetI8 ? RCPU_8BITS : RCPU); + op64(ctx,XOR,r,r); + op32(ctx, o->op == OGetI8 ? MOV8 : MOV16,r,pmem2(&p,base->id,offset->id,1,0)); + store(ctx, dst, r, true); + } + break; + case OGetMem: + { + #ifndef HL_64 + if (dst->t->kind == HI64) { + error_i64(); + } + #endif + preg *base = alloc_cpu(ctx, ra, true); + preg *offset = alloc_cpu64(ctx, rb, true); + store(ctx, dst, pmem2(&p,base->id,offset->id,1,0), false); + } + break; + case OSetI8: + { + preg *base = alloc_cpu(ctx, dst, true); + preg *offset = alloc_cpu64(ctx, ra, true); + preg *value = alloc_cpu8(ctx, rb, true); + op32(ctx,MOV8,pmem2(&p,base->id,offset->id,1,0),value); + } + break; + case OSetI16: + { + preg *base = alloc_cpu(ctx, dst, true); + preg *offset = alloc_cpu64(ctx, ra, true); + preg *value = alloc_cpu(ctx, rb, true); + op32(ctx,MOV16,pmem2(&p,base->id,offset->id,1,0),value); + } + break; + case OSetMem: + { + preg *base = alloc_cpu(ctx, dst, true); + preg *offset = alloc_cpu64(ctx, ra, true); + preg *value; + switch( rb->t->kind ) { + case HI32: + value = alloc_cpu(ctx, rb, true); + op32(ctx,MOV,pmem2(&p,base->id,offset->id,1,0),value); + break; + case HF32: + value = alloc_fpu(ctx, rb, true); + op32(ctx,MOVSS,pmem2(&p,base->id,offset->id,1,0),value); + break; + case HF64: + value = alloc_fpu(ctx, rb, true); + op32(ctx,MOVSD,pmem2(&p,base->id,offset->id,1,0),value); + break; + case HI64: + case HGUID: + value = alloc_cpu(ctx, rb, true); + op64(ctx,MOV,pmem2(&p,base->id,offset->id,1,0),value); + break; + default: + ASSERT(rb->t->kind); + break; + } + } + break; + case OType: + { + op64(ctx,MOV,alloc_cpu(ctx, dst, false),pconst64(&p,(int_val)(m->code->types + o->p2))); + store(ctx,dst,dst->current,false); + } + break; + case OGetType: + { + int jnext, jend; + preg *r = alloc_cpu(ctx, ra, true); + preg *tmp = alloc_reg(ctx, RCPU); + op64(ctx,TEST,r,r); + XJump_small(JNotZero,jnext); + op64(ctx,MOV, tmp, pconst64(&p,(int_val)&hlt_void)); + XJump_small(JAlways,jend); + patch_jump(ctx,jnext); + op64(ctx, MOV, tmp, pmem(&p,r->id,0)); + patch_jump(ctx,jend); + store(ctx,dst,tmp,true); + } + break; + case OGetArray: + { + preg *rdst = IS_FLOAT(dst) ? alloc_fpu(ctx,dst,false) : alloc_cpu(ctx,dst,false); + if( ra->t->kind == HABSTRACT ) { + int osize; + bool isRead = dst->t->kind != HOBJ && dst->t->kind != HSTRUCT; + if( isRead ) + osize = sizeof(void*); + else { + hl_runtime_obj *rt = hl_get_obj_rt(dst->t); + osize = rt->size; + } + preg *idx = alloc_cpu64(ctx, rb, true); + op64(ctx, IMUL, idx, pconst(&p,osize)); + op64(ctx, isRead?MOV:LEA, rdst, pmem2(&p,alloc_cpu(ctx,ra, true)->id,idx->id,1,0)); + store(ctx,dst,dst->current,false); + scratch(idx); + } else { + copy(ctx, rdst, pmem2(&p,alloc_cpu(ctx,ra,true)->id,alloc_cpu64(ctx,rb,true)->id,hl_type_size(dst->t),sizeof(varray)), dst->size); + store(ctx,dst,dst->current,false); + } + } + break; + case OSetArray: + { + if( dst->t->kind == HABSTRACT ) { + int osize; + bool isWrite = rb->t->kind != HOBJ && rb->t->kind != HSTRUCT; + if( isWrite ) { + osize = sizeof(void*); + } else { + hl_runtime_obj *rt = hl_get_obj_rt(rb->t); + osize = rt->size; + } + preg *pdst = alloc_cpu(ctx,dst,true); + preg *pra = alloc_cpu64(ctx,ra,true); + op64(ctx, IMUL, pra, pconst(&p,osize)); + op64(ctx, ADD, pdst, pra); + scratch(pra); + preg *prb = alloc_cpu(ctx,rb,true); + preg *tmp = alloc_reg(ctx, RCPU_CALL); + int offset = 0; + while( offset < osize ) { + int remain = osize - offset; + int copy_size = remain >= HL_WSIZE ? HL_WSIZE : (remain >= 4 ? 4 : (remain >= 2 ? 2 : 1)); + copy(ctx, tmp, pmem(&p, prb->id, offset), copy_size); + copy(ctx, pmem(&p, pdst->id, offset), tmp, copy_size); + offset += copy_size; + } + scratch(pdst); + } else { + preg *rrb = IS_FLOAT(rb) ? alloc_fpu(ctx,rb,true) : alloc_cpu(ctx,rb,true); + copy(ctx, pmem2(&p,alloc_cpu(ctx,dst,true)->id,alloc_cpu64(ctx,ra,true)->id,hl_type_size(rb->t),sizeof(varray)), rrb, rb->size); + } + } + break; + case OArraySize: + { + op32(ctx,MOV,alloc_cpu(ctx,dst,false),pmem(&p,alloc_cpu(ctx,ra,true)->id,ra->t->kind == HABSTRACT ? HL_WSIZE + 4 : HL_WSIZE*2)); + store(ctx,dst,dst->current,false); + } + break; + case ORef: + { + scratch(ra->current); + op64(ctx,MOV,alloc_cpu(ctx,dst,false),REG_AT(Ebp)); + if( ra->stackPos < 0 ) + op64(ctx,SUB,dst->current,pconst(&p,-ra->stackPos)); + else + op64(ctx,ADD,dst->current,pconst(&p,ra->stackPos)); + store(ctx,dst,dst->current,false); + } + break; + case OUnref: + copy_to(ctx,dst,pmem(&p,alloc_cpu(ctx,ra,true)->id,0)); + break; + case OSetref: + copy_from(ctx,pmem(&p,alloc_cpu(ctx,dst,true)->id,0),ra); + break; + case ORefData: + switch( ra->t->kind ) { + case HARRAY: + { + preg *r = fetch(ra); + preg *d = alloc_cpu(ctx,dst,false); + op64(ctx,MOV,d,r); + op64(ctx,ADD,d,pconst(&p,sizeof(varray))); + store(ctx,dst,dst->current,false); + } + break; + default: + ASSERT(ra->t->kind); + } + break; + case ORefOffset: + { + preg *d = alloc_cpu(ctx,rb,true); + preg *r2 = alloc_cpu(ctx,dst,false); + preg *r = fetch(ra); + int size = hl_type_size(dst->t->tparam); + op64(ctx,MOV,r2,r); + switch( size ) { + case 1: + break; + case 2: + op64(ctx,SHL,d,pconst(&p,1)); + break; + case 4: + op64(ctx,SHL,d,pconst(&p,2)); + break; + case 8: + op64(ctx,SHL,d,pconst(&p,3)); + break; + default: + op64(ctx,IMUL,d,pconst(&p,size)); + break; + } + op64(ctx,ADD,r2,d); + scratch(d); + store(ctx,dst,dst->current,false); + } + break; + case OToVirtual: + { +# ifdef HL_64 + int size = pad_before_call(ctx, 0); + op64(ctx,MOV,REG_AT(CALL_REGS[1]),fetch(ra)); + op64(ctx,MOV,REG_AT(CALL_REGS[0]),pconst64(&p,(int_val)dst->t)); +# else + int size = pad_before_call(ctx, HL_WSIZE*2); + op32(ctx,PUSH,fetch(ra),UNUSED); + op32(ctx,PUSH,pconst(&p,(int)(int_val)dst->t),UNUSED); +# endif + if( ra->t->kind == HOBJ ) hl_get_obj_rt(ra->t); // ensure it's initialized + call_native(ctx,hl_to_virtual,size); + store(ctx,dst,PEAX,true); + } + break; + case OMakeEnum: + { + hl_enum_construct *c = &dst->t->tenum->constructs[o->p2]; + int_val args[] = { (int_val)dst->t, o->p2 }; + int i; + call_native_consts(ctx, hl_alloc_enum, args, 2); + RLOCK(PEAX); + for(i=0;inparams;i++) { + preg *r = fetch(R(o->extra[i])); + copy(ctx, pmem(&p,Eax,c->offsets[i]),r, R(o->extra[i])->size); + RUNLOCK(fetch(R(o->extra[i]))); + if ((i & 15) == 0) jit_buf(ctx); + } + store(ctx, dst, PEAX, true); + } + break; + case OEnumAlloc: + { + int_val args[] = { (int_val)dst->t, o->p2 }; + call_native_consts(ctx, hl_alloc_enum, args, 2); + store(ctx, dst, PEAX, true); + } + break; + case OEnumField: + { + hl_enum_construct *c = &ra->t->tenum->constructs[o->p3]; + preg *r = alloc_cpu(ctx,ra,true); + copy_to(ctx,dst,pmem(&p,r->id,c->offsets[(int)(int_val)o->extra])); + } + break; + case OSetEnumField: + { + hl_enum_construct *c = &dst->t->tenum->constructs[0]; + preg *r = alloc_cpu(ctx,dst,true); + switch( rb->t->kind ) { + case HF64: + { + preg *d = alloc_fpu(ctx,rb,true); + copy(ctx,pmem(&p,r->id,c->offsets[o->p2]),d,8); + break; + } + default: + copy(ctx,pmem(&p,r->id,c->offsets[o->p2]),alloc_cpu(ctx,rb,true),hl_type_size(c->params[o->p2])); + break; + } + } + break; + case ONullCheck: + { + int jz; + preg *r = alloc_cpu(ctx,dst,true); + op64(ctx,TEST,r,r); + XJump_small(JNotZero,jz); + + hl_opcode *next = f->ops + opCount + 1; + bool null_field_access = false; + int hashed_name = 0; + // skip const and operation between nullcheck and access + while( (next < f->ops + f->nops - 1) && (next->op >= OInt && next->op <= ODecr) ) { + next++; + } + if( (next->op == OField && next->p2 == o->p1) || (next->op == OSetField && next->p1 == o->p1) ) { + int fid = next->op == OField ? next->p3 : next->p2; + hl_obj_field *f = NULL; + if( dst->t->kind == HOBJ || dst->t->kind == HSTRUCT ) + f = hl_obj_field_fetch(dst->t, fid); + else if( dst->t->kind == HVIRTUAL ) + f = dst->t->virt->fields + fid; + if( f == NULL ) ASSERT(dst->t->kind); + null_field_access = true; + hashed_name = f->hashed_name; + } else if( (next->op >= OCall1 && next->op <= OCallN) && next->p3 == o->p1 ) { + int fid = next->p2 < 0 ? -1 : ctx->m->functions_indexes[next->p2]; + hl_function *cf = ctx->m->code->functions + fid; + const uchar *name = fun_field_name(cf); + null_field_access = true; + hashed_name = hl_hash_gen(name, true); + } + + if( null_field_access ) { + pad_before_call(ctx, HL_WSIZE); + if( hashed_name >= 0 && hashed_name < 256 ) + op64(ctx,PUSH8,pconst(&p,hashed_name),UNUSED); + else + op32(ctx,PUSH,pconst(&p,hashed_name),UNUSED); + } else { + pad_before_call(ctx, 0); + } + + jlist *j = (jlist*)hl_malloc(&ctx->galloc,sizeof(jlist)); + j->pos = BUF_POS(); + j->target = null_field_access ? -3 : -1; + j->next = ctx->calls; + ctx->calls = j; + + op64(ctx,MOV,PEAX,pconst64(&p,RESERVE_ADDRESS)); + op_call(ctx,PEAX,-1); + patch_jump(ctx,jz); + } + break; + case OSafeCast: + make_dyn_cast(ctx, dst, ra); + break; + case ODynGet: + { + int size; +# ifdef HL_64 + if( IS_FLOAT(dst) || dst->t->kind == HI64 ) { + size = begin_native_call(ctx,2); + } else { + size = begin_native_call(ctx,3); + set_native_arg(ctx,pconst64(&p,(int_val)dst->t)); + } + set_native_arg(ctx,pconst64(&p,(int_val)hl_hash_utf8(m->code->strings[o->p3]))); + set_native_arg(ctx,fetch(ra)); +# else + preg *r; + r = alloc_reg(ctx,RCPU); + if( IS_FLOAT(dst) || dst->t->kind == HI64 ) { + size = pad_before_call(ctx,HL_WSIZE*2); + } else { + size = pad_before_call(ctx,HL_WSIZE*3); + op64(ctx,MOV,r,pconst64(&p,(int_val)dst->t)); + op64(ctx,PUSH,r,UNUSED); + } + op64(ctx,MOV,r,pconst64(&p,(int_val)hl_hash_utf8(m->code->strings[o->p3]))); + op64(ctx,PUSH,r,UNUSED); + op64(ctx,PUSH,fetch(ra),UNUSED); +# endif + call_native(ctx,get_dynget(dst->t),size); + store_result(ctx,dst); + } + break; + case ODynSet: + { + int size; +# ifdef HL_64 + switch( rb->t->kind ) { + case HF32: + case HF64: + size = begin_native_call(ctx, 3); + set_native_arg_fpu(ctx,fetch(rb),rb->t->kind == HF32); + set_native_arg(ctx,pconst64(&p,hl_hash_gen(hl_get_ustring(m->code,o->p2),true))); + set_native_arg(ctx,fetch(dst)); + call_native(ctx,get_dynset(rb->t),size); + break; + case HI64: + case HGUID: + size = begin_native_call(ctx, 3); + set_native_arg(ctx,fetch(rb)); + set_native_arg(ctx,pconst64(&p,hl_hash_gen(hl_get_ustring(m->code,o->p2),true))); + set_native_arg(ctx,fetch(dst)); + call_native(ctx,get_dynset(rb->t),size); + break; + default: + size = begin_native_call(ctx,4); + set_native_arg(ctx,fetch(rb)); + set_native_arg(ctx,pconst64(&p,(int_val)rb->t)); + set_native_arg(ctx,pconst64(&p,hl_hash_gen(hl_get_ustring(m->code,o->p2),true))); + set_native_arg(ctx,fetch(dst)); + call_native(ctx,get_dynset(rb->t),size); + break; + } +# else + switch( rb->t->kind ) { + case HF32: + size = pad_before_call(ctx, HL_WSIZE*2 + sizeof(float)); + push_reg(ctx,rb); + op32(ctx,PUSH,pconst64(&p,hl_hash_gen(hl_get_ustring(m->code,o->p2),true)),UNUSED); + op32(ctx,PUSH,fetch(dst),UNUSED); + call_native(ctx,get_dynset(rb->t),size); + break; + case HF64: + case HI64: + case HGUID: + size = pad_before_call(ctx, HL_WSIZE*2 + sizeof(double)); + push_reg(ctx,rb); + op32(ctx,PUSH,pconst64(&p,hl_hash_gen(hl_get_ustring(m->code,o->p2),true)),UNUSED); + op32(ctx,PUSH,fetch(dst),UNUSED); + call_native(ctx,get_dynset(rb->t),size); + break; + default: + size = pad_before_call(ctx, HL_WSIZE*4); + op32(ctx,PUSH,fetch32(ctx,rb),UNUSED); + op32(ctx,PUSH,pconst64(&p,(int_val)rb->t),UNUSED); + op32(ctx,PUSH,pconst64(&p,hl_hash_gen(hl_get_ustring(m->code,o->p2),true)),UNUSED); + op32(ctx,PUSH,fetch(dst),UNUSED); + call_native(ctx,get_dynset(rb->t),size); + break; + } +# endif + } + break; + case OTrap: + { + int size, jenter, jtrap; + int offset = 0; + int trap_size = (sizeof(hl_trap_ctx) + 15) & 0xFFF0; + hl_trap_ctx *t = NULL; +# ifndef HL_THREADS + if( tinf == NULL ) tinf = hl_get_thread(); // single thread +# endif + +# ifdef HL_64 + preg *trap = REG_AT(CALL_REGS[0]); +# else + preg *trap = PEAX; +# endif + RLOCK(trap); + + preg *treg = alloc_reg(ctx, RCPU); + if( !tinf ) { + call_native(ctx, hl_get_thread, 0); + op64(ctx,MOV,treg,PEAX); + offset = (int)(int_val)&tinf->trap_current; + } else { + offset = 0; + op64(ctx,MOV,treg,pconst64(&p,(int_val)&tinf->trap_current)); + } + op64(ctx,MOV,trap,pmem(&p,treg->id,offset)); + op64(ctx,SUB,PESP,pconst(&p,trap_size)); + op64(ctx,MOV,pmem(&p,Esp,(int)(int_val)&t->prev),trap); + op64(ctx,MOV,trap,PESP); + op64(ctx,MOV,pmem(&p,treg->id,offset),trap); + + /* + trap E,@catch + catch g + catch g2 + ... + @:catch + + // Before haxe 5 + This is a bit hackshish : we want to detect the type of exception filtered by the catch so we check the following + sequence of HL opcodes: + + trap E,@catch + ... + @catch: + global R, _ + call _, ???(R,E) + + ??? is expected to be hl.BaseType.check + */ + hl_opcode *cat = f->ops + opCount + 1; + hl_opcode *next = f->ops + opCount + 1 + o->p2; + hl_opcode *next2 = f->ops + opCount + 2 + o->p2; + if( cat->op == OCatch || (next->op == OGetGlobal && next2->op == OCall2 && next2->p3 == next->p1 && dst->stack.id == (int)(int_val)next2->extra) ) { + int gindex = cat->op == OCatch ? cat->p1 : next->p2; + hl_type *gt = m->code->globals[gindex]; + while( gt->kind == HOBJ && gt->obj->super ) gt = gt->obj->super; + if( gt->kind == HOBJ && gt->obj->nfields && gt->obj->fields[0].t->kind == HTYPE ) { + void *addr = m->globals_data + m->globals_indexes[gindex]; +# ifdef HL_64 + op64(ctx,MOV,treg,pconst64(&p,(int_val)addr)); + op64(ctx,MOV,treg,pmem(&p,treg->id,0)); +# else + op64(ctx,MOV,treg,paddr(&p,addr)); +# endif + } else + op64(ctx,MOV,treg,pconst(&p,0)); + } else { + op64(ctx,MOV,treg,pconst(&p,0)); + } + op64(ctx,MOV,pmem(&p,Esp,(int)(int_val)&t->tcheck),treg); + + // On Win64 setjmp actually takes two arguments + // the jump buffer and the frame pointer (or the stack pointer if there is no FP) +#if defined(HL_WIN) && defined(HL_64) + size = begin_native_call(ctx, 2); + set_native_arg(ctx, REG_AT(Ebp)); +#else + size = begin_native_call(ctx, 1); +#endif + set_native_arg(ctx,trap); +#ifdef HL_MINGW + call_native(ctx,_setjmp,size); +#else + call_native(ctx,setjmp,size); +#endif + op64(ctx,TEST,PEAX,PEAX); + XJump_small(JZero,jenter); + op64(ctx,ADD,PESP,pconst(&p,trap_size)); + if( !tinf ) { + call_native(ctx, hl_get_thread, 0); + op64(ctx,MOV,PEAX,pmem(&p, Eax, (int)(int_val)&tinf->exc_value)); + } else { + op64(ctx,MOV,PEAX,pconst64(&p,(int_val)&tinf->exc_value)); + op64(ctx,MOV,PEAX,pmem(&p, Eax, 0)); + } + store(ctx,dst,PEAX,false); + + jtrap = do_jump(ctx,OJAlways,false); + register_jump(ctx,jtrap,(opCount + 1) + o->p2); + patch_jump(ctx,jenter); + } + break; + case OEndTrap: + { + int trap_size = (sizeof(hl_trap_ctx) + 15) & 0xFFF0; + hl_trap_ctx *tmp = NULL; + preg *addr,*r; + int offset; + if (!tinf) { + call_native(ctx, hl_get_thread, 0); + addr = PEAX; + RLOCK(addr); + offset = (int)(int_val)&tinf->trap_current; + } else { + offset = 0; + addr = alloc_reg(ctx, RCPU); + op64(ctx, MOV, addr, pconst64(&p, (int_val)&tinf->trap_current)); + } + r = alloc_reg(ctx, RCPU); + op64(ctx, MOV, r, pmem(&p,addr->id,offset)); + op64(ctx, MOV, r, pmem(&p,r->id,(int)(int_val)&tmp->prev)); + op64(ctx, MOV, pmem(&p,addr->id, offset), r); +# ifdef HL_WIN + // erase eip (prevent false positive) + { + _JUMP_BUFFER *b = NULL; +# ifdef HL_64 + op64(ctx,MOV,pmem(&p,Esp,(int)(int_val)&(b->Rip)),PEAX); +# else + op64(ctx,MOV,pmem(&p,Esp,(int)&(b->Eip)),PEAX); +# endif + } +# endif + op64(ctx,ADD,PESP,pconst(&p,trap_size)); + } + break; + case OEnumIndex: + { + preg *r = alloc_reg(ctx,RCPU); + op64(ctx,MOV,r,pmem(&p,alloc_cpu(ctx,ra,true)->id,HL_WSIZE)); + store(ctx,dst,r,true); + break; + } + break; + case OSwitch: + { + int jdefault; + int i; + preg *r = alloc_cpu(ctx, dst, true); + preg *r2 = alloc_reg(ctx, RCPU); + op32(ctx, CMP, r, pconst(&p,o->p2)); + XJump(JUGte,jdefault); + // r2 = r * 5 + eip +# ifdef HL_64 + op64(ctx, XOR, r2, r2); +# endif + op32(ctx, MOV, r2, r); + op32(ctx, SHL, r2, pconst(&p,2)); + op32(ctx, ADD, r2, r); +# ifdef HL_64 + preg *tmp = alloc_reg(ctx, RCPU); + op64(ctx, MOV, tmp, pconst64(&p,RESERVE_ADDRESS)); +# else + op64(ctx, ADD, r2, pconst64(&p,RESERVE_ADDRESS)); +# endif + { + jlist *s = (jlist*)hl_malloc(&ctx->galloc, sizeof(jlist)); + s->pos = BUF_POS() - sizeof(void*); + s->next = ctx->switchs; + ctx->switchs = s; + } +# ifdef HL_64 + op64(ctx, ADD, r2, tmp); +# endif + op64(ctx, JMP, r2, UNUSED); + for(i=0;ip2;i++) { + int j = do_jump(ctx,OJAlways,false); + register_jump(ctx,j,(opCount + 1) + o->extra[i]); + if( (i & 15) == 0 ) jit_buf(ctx); + } + patch_jump(ctx, jdefault); + } + break; + case OGetTID: + op32(ctx, MOV, alloc_cpu(ctx,dst,false), pmem(&p,alloc_cpu(ctx,ra,true)->id,0)); + store(ctx,dst,dst->current,false); + break; + case OAssert: + { + pad_before_call(ctx, 0); + jlist *j = (jlist*)hl_malloc(&ctx->galloc,sizeof(jlist)); + j->pos = BUF_POS(); + j->target = -2; + j->next = ctx->calls; + ctx->calls = j; + + op64(ctx,MOV,PEAX,pconst64(&p,RESERVE_ADDRESS)); + op_call(ctx,PEAX,-1); + } + break; + case ONop: + break; + case OPrefetch: + { + preg *r = alloc_cpu(ctx, dst, true); + if( o->p2 > 0 ) { + switch( dst->t->kind ) { + case HOBJ: + case HSTRUCT: + { + hl_runtime_obj *rt = hl_get_obj_rt(dst->t); + preg *r2 = alloc_reg(ctx, RCPU); + op64(ctx, LEA, r2, pmem(&p, r->id, rt->fields_indexes[o->p2-1])); + r = r2; + } + break; + default: + ASSERT(dst->t->kind); + break; + } + } + switch( o->p3 ) { + case 0: + op64(ctx, PREFETCHT0, pmem(&p,r->id,0), UNUSED); + break; + case 1: + op64(ctx, PREFETCHT1, pmem(&p,r->id,0), UNUSED); + break; + case 2: + op64(ctx, PREFETCHT2, pmem(&p,r->id,0), UNUSED); + break; + case 3: + op64(ctx, PREFETCHNTA, pmem(&p,r->id,0), UNUSED); + break; + case 4: + op64(ctx, PREFETCHW, pmem(&p,r->id,0), UNUSED); + break; + default: + ASSERT(o->p3); + break; + } + } + break; + case OAsm: + { + switch( o->p1 ) { + case 0: // byte output + B(o->p2); + break; + case 1: // scratch cpu reg + scratch(REG_AT(o->p2)); + break; + case 2: // read vm reg + rb--; + copy(ctx, REG_AT(o->p2), &rb->stack, rb->size); + scratch(REG_AT(o->p2)); + break; + case 3: // write vm reg + rb--; + copy(ctx, &rb->stack, REG_AT(o->p2), rb->size); + scratch(rb->current); + break; + case 4: + if( ctx->totalRegsSize != 0 ) + hl_fatal("Asm naked function should not have local variables"); + if( opCount != 0 ) + hl_fatal("Asm naked function should be on first opcode"); + ctx->buf.b -= BUF_POS() - ctx->functionPos; // reset to our function start + break; + default: + ASSERT(o->p1); + break; + } + } + break; + case OCatch: + // Only used by OTrap typing + break; + default: + jit_error(hl_op_name(o->op)); + break; + } + // we are landing at this position, assume we have lost our registers + if( ctx->opsPos[opCount+1] == -1 ) + discard_regs(ctx,true); + ctx->opsPos[opCount+1] = BUF_POS(); + + // write debug infos + size = BUF_POS() - codePos; + if( debug16 && size > 0xFF00 ) { + debug32 = malloc(sizeof(int) * (f->nops + 1)); + for(i=0;icurrentPos;i++) + debug32[i] = debug16[i]; + free(debug16); + debug16 = NULL; + } + if( debug16 ) debug16[ctx->currentPos] = (unsigned short)size; else if( debug32 ) debug32[ctx->currentPos] = size; + + } + // patch jumps + { + jlist *j = ctx->jumps; + while( j ) { + *(int*)(ctx->startBuf + j->pos) = ctx->opsPos[j->target] - (j->pos + 4); + j = j->next; + } + ctx->jumps = NULL; + } + int codeEndPos = BUF_POS(); + // add nops padding + jit_nops(ctx); + // clear regs + for(i=0;iholds = NULL; + r->lock = 0; + } + // save debug infos + if( ctx->debug ) { + int fid = (int)(f - m->code->functions); + ctx->debug[fid].start = codePos; + ctx->debug[fid].offsets = debug32 ? (void*)debug32 : (void*)debug16; + ctx->debug[fid].large = debug32 != NULL; + } + // unwind info +#ifdef WIN64_UNWIND_TABLES + int uw_idx = ctx->nunwind++; + ctx->unwind_table[uw_idx].BeginAddress = codePos; + ctx->unwind_table[uw_idx].EndAddress = codeEndPos; + ctx->unwind_table[uw_idx].UnwindData = ctx->unwind_offset; +#endif + // reset tmp allocator + hl_free(&ctx->falloc); + return codePos; +} + +static void *get_wrapper( hl_type *t ) { + return call_jit_hl2c; +} + +void hl_jit_patch_method( void *old_fun, void **new_fun_table ) { + // mov eax, addr + // jmp [eax] + unsigned char *b = (unsigned char*)old_fun; + unsigned long long addr = (unsigned long long)(int_val)new_fun_table; +# ifdef HL_64 + *b++ = 0x48; + *b++ = 0xB8; + *b++ = (unsigned char)addr; + *b++ = (unsigned char)(addr>>8); + *b++ = (unsigned char)(addr>>16); + *b++ = (unsigned char)(addr>>24); + *b++ = (unsigned char)(addr>>32); + *b++ = (unsigned char)(addr>>40); + *b++ = (unsigned char)(addr>>48); + *b++ = (unsigned char)(addr>>56); +# else + *b++ = 0xB8; + *b++ = (unsigned char)addr; + *b++ = (unsigned char)(addr>>8); + *b++ = (unsigned char)(addr>>16); + *b++ = (unsigned char)(addr>>24); +# endif + *b++ = 0xFF; + *b++ = 0x20; +} + +static void missing_closure() { + hl_error("Missing static closure"); +} + +void *hl_jit_code( jit_ctx *ctx, hl_module *m, int *codesize, hl_debug_infos **debug, hl_module *previous ) { + jlist *c; + int size = BUF_POS(); + unsigned char *code; + if( size & 4095 ) size += 4096 - (size&4095); + code = (unsigned char*)hl_alloc_executable_memory(size); + if( code == NULL ) return NULL; + memcpy(code,ctx->startBuf,BUF_POS()); + *codesize = size; + *debug = ctx->debug; + if( !call_jit_c2hl ) { + call_jit_c2hl = code + ctx->c2hl; + call_jit_hl2c = code + ctx->hl2c; + hl_setup.get_wrapper = get_wrapper; + hl_setup.static_call = callback_c2hl; + hl_setup.static_call_ref = true; + } +#ifdef WIN64_UNWIND_TABLES + m->unwind_table = ctx->unwind_table; + RtlAddFunctionTable(m->unwind_table, ctx->nunwind, (DWORD64)code); +#endif + if( !ctx->static_function_offset ) { + int i; + ctx->static_function_offset = true; + for(i=0;i<(int)(sizeof(ctx->static_functions)/sizeof(void*));i++) + ctx->static_functions[i] = (void*)(code + (int)(int_val)ctx->static_functions[i]); + } + // patch calls + c = ctx->calls; + while( c ) { + void *fabs; + if( c->target < 0 ) + fabs = ctx->static_functions[-c->target-1]; + else { + fabs = m->functions_ptrs[c->target]; + if( fabs == NULL ) { + // read absolute address from previous module + int old_idx = m->hash->functions_hashes[m->functions_indexes[c->target]]; + if( old_idx < 0 ) + return NULL; + fabs = previous->functions_ptrs[(previous->code->functions + old_idx)->findex]; + } else { + // relative + fabs = (unsigned char*)code + (int)(int_val)fabs; + } + } + if( (code[c->pos]&~3) == (IS_64?0x48:0xB8) || code[c->pos] == 0x68 ) // MOV : absolute | PUSH + *(void**)(code + c->pos + (IS_64?2:1)) = fabs; + else { + int_val delta = (int_val)fabs - (int_val)code - (c->pos + 5); + int rpos = (int)delta; + if( (int_val)rpos != delta ) { + printf("Target code too far too rebase\n"); + return NULL; + } + *(int*)(code + c->pos + 1) = rpos; + } + c = c->next; + } + // patch switchs + c = ctx->switchs; + while( c ) { + *(void**)(code + c->pos) = code + c->pos + (IS_64 ? 14 : 6); + c = c->next; + } + // patch closures + { + vclosure *c = ctx->closure_list; + while( c ) { + vclosure *next; + int fidx = (int)(int_val)c->fun; + void *fabs = m->functions_ptrs[fidx]; + if( fabs == NULL ) { + // read absolute address from previous module + int old_idx = m->hash->functions_hashes[m->functions_indexes[fidx]]; + if( old_idx < 0 ) + fabs = missing_closure; + else + fabs = previous->functions_ptrs[(previous->code->functions + old_idx)->findex]; + } else { + // relative + fabs = (unsigned char*)code + (int)(int_val)fabs; + } + c->fun = fabs; + next = (vclosure*)c->value; + c->value = NULL; + c = next; + } + } + return code; +} + diff --git a/src/main.c b/src/main.c index a25b673cc..60c4db77e 100644 --- a/src/main.c +++ b/src/main.c @@ -20,7 +20,7 @@ * DEALINGS IN THE SOFTWARE. */ #include -#include +#include #include "hlsystem.h" #ifdef HL_WIN diff --git a/src/module.c b/src/module.c index e668b1064..e46f73f13 100644 --- a/src/module.c +++ b/src/module.c @@ -21,6 +21,7 @@ */ #include #include +#include #ifdef HL_WIN # undef _GUID @@ -718,6 +719,9 @@ int hl_module_init( hl_module *m, h_bool hot_reload ) { return 0; } m->functions_ptrs[f->findex] = (void*)(int_val)fpos; +# ifdef HL_DEBUG + if( hl_setup.sys_nargs > 0 && ucmp(hl_setup.sys_args[0],USTR("--dump")) == 0 ) hl_emit_dump(ctx); +# endif } m->jit_code = hl_jit_code(ctx, m, &m->codesize, &m->jit_debug, NULL); for(i=0;icode->nfunctions;i++) { diff --git a/src/opcodes.h b/src/opcodes.h index ab9b1fa51..9e4df7f60 100644 --- a/src/opcodes.h +++ b/src/opcodes.h @@ -67,8 +67,8 @@ OP_BEGIN OP(OIncr,R,X,X) OP(ODecr,R,X,X) - OP(OCall0,R,R,X) - OP(OCall1,R,R,R) + OP(OCall0,R,C,X) + OP(OCall1,R,C,R) OP(OCall2,R,AR,4) OP(OCall3,R,AR,5) OP(OCall4,R,AR,6) @@ -78,17 +78,17 @@ OP_BEGIN OP(OCallClosure,R,AR,VAR_ARGS) OP(OStaticClosure,R,G,X) - OP(OInstanceClosure,R,R,G) + OP(OInstanceClosure,R,C,R) OP(OVirtualClosure,R,R,G) OP(OGetGlobal,R,G,X) - OP(OSetGlobal,R_NW,G,X) - OP(OField,R,R,C) - OP(OSetField,R_NW,R,C) - OP(OGetThis,R,C,X) - OP(OSetThis,R_NW,R,X) + OP(OSetGlobal,G,R,X) + OP(OField,R,R,G) + OP(OSetField,R_NW,G,R) + OP(OGetThis,R,G,X) + OP(OSetThis,G,R,X) OP(ODynGet,R,R,C) - OP(ODynSet,R_NW,R,C) + OP(ODynSet,R_NW,C,R) OP(OJTrue,R_NW,J,X) OP(OJFalse,R_NW,J,X) @@ -134,7 +134,7 @@ OP_BEGIN OP(ONew,R,X,X) OP(OArraySize,R,R,X) - OP(OType,R,R,X) + OP(OType,R,G,X) OP(OGetType,R,R,X) OP(OGetTID,R,R,X)